[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2a2-12-gd39a736

Service Account noreply at mpich.org
Sat Nov 22 19:29:51 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  d39a7363aada877e7bc43011b7d2226128cf9df6 (commit)
      from  e38618b0fd8cfb5453a7ad8149afc91d9fdd5e24 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/d39a7363aada877e7bc43011b7d2226128cf9df6

commit d39a7363aada877e7bc43011b7d2226128cf9df6
Author: Charles J Archer <charles.j.archer at intel.com>
Date:   Mon Nov 17 10:15:05 2014 -0800

    Open Fabrics Working Group (OFIWG) Netmod Support
    
     * Implements a tag matching interface netmod over the OFIWG Scalable Fabric Interfaces (SFI)

diff --git a/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk b/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk
index 256ae99..648b7ee 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk
+++ b/src/mpid/ch3/channels/nemesis/netmod/Makefile.mk
@@ -13,3 +13,4 @@ include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/scif/Makefile.mk
 include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/portals4/Makefile.mk
 include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/ib/Makefile.mk
 include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/mxm/Makefile.mk
+include $(top_srcdir)/src/mpid/ch3/channels/nemesis/netmod/sfi/Makefile.mk
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/Makefile.mk b/src/mpid/ch3/channels/nemesis/netmod/sfi/Makefile.mk
new file mode 100644
index 0000000..bc3d6ef
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/Makefile.mk
@@ -0,0 +1,19 @@
+## -*- Mode: Makefile; -*-
+## vim: set ft=automake :
+##
+## (C) 2011 by Argonne National Laboratory.
+##     See COPYRIGHT in top-level directory.
+##
+if BUILD_NEMESIS_NETMOD_SFI
+
+mpi_core_sources +=                                 		\
+    src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_init.c 	\
+    src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_cm.c	 	\
+    src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_tagged.c	\
+    src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_msg.c	 	\
+    src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_data.c	 	\
+    src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_progress.c
+
+errnames_txt_files += src/mpid/ch3/channels/nemesis/netmod/sfi/errnames.txt
+
+endif
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/errnames.txt b/src/mpid/ch3/channels/nemesis/netmod/sfi/errnames.txt
new file mode 100644
index 0000000..c1ae0e3
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/errnames.txt
@@ -0,0 +1,42 @@
+**sfi_avmap:SFI get address vector map failed
+**sfi_avmap %s %d %s %s:SFI address vector map failed (%s:%d:%s:%s)
+**sfi_tsendto:SFI tagged sendto failed
+**sfi_tsendto %s %d %s %s:SFI tagged sendto failed (%s:%d:%s:%s)
+**sfi_trecvfrom:SFI tagged recvfrom failed
+**sfi_trecvfrom %s %d %s %s:SFI tagged recvfrom failed (%s:%d:%s:%s)
+**sfi_getinfo:SFI getinfo() failed
+**sfi_getinfo %s %d %s %s:SFI getinfo() failed (%s:%d:%s:%s)
+**sfi_openep:SFI endpoint open failed
+**sfi_openep %s %d %s %s:SFI endpoint open failed (%s:%d:%s:%s)
+**sfi_openfabric:SFI fabric open failure
+**sfi_openfabric %s %d %s %s:SFI fabric open failed (%s:%d:%s:%s)
+**sfi_opendomain:SFI domain open failure
+**sfi_opendomain %s %d %s %s:SFI domain open failed (%s:%d:%s:%s)
+**sfi_opencq:SFI event queue create failure
+**sfi_opencq %s %d %s %s:SFI event queue create failed (%s:%d:%s:%s)
+**sfi_avopen:SFI address vector open failed
+**sfi_avopen %s %d %s %s:SFI address vector open failed (%s:%d:%s:%s)
+**sfi_bind:SFI resource bind failure
+**sfi_bind %s %d %s %s:SFI resource bind failed (%s:%d:%s:%s)
+**sfi_ep_enable:SFI endpoint enable failed
+**sfi_ep_enable %s %d %s %s:SFI endpoint enable failed (%s:%d:%s:%s)
+**sfi_getname:SFI get endpoint name failed
+**sfi_getname %s %d %s %s:SFI get endpoint name failed (%s:%d:%s:%s)
+**sfi_avclose:SFI av close failed
+**sfi_avclose %s %d %s %s:SFI av close failed (%s:%d:%s:%s)
+**sfi_epclose:SFI endpoint close failed
+**sfi_epclose %s %d %s %s:SFI endpoint close failed (%s:%d:%s:%s)
+**sfi_cqclose:SFI cq close failed
+**sfi_cqclose %s %d %s %s:SFI cq close failed (%s:%d:%s:%s)
+**sfi_mrclose:SFI mr close failed
+**sfi_mrclose %s %d %s %s:SFI mr close failed (%s:%d:%s:%s)
+**sfi_fabricclose:SFI fabric close failed
+**sfi_fabricclose %s %d %s %s:SFI fabric close failed (%s:%d:%s:%s)
+**sfi_domainclose:SFI domain close failed
+**sfi_domainclose %s %d %s %s:SFI domain close failed (%s:%d:%s:%s)
+**sfi_tsearch:SFI tsearch failed
+**sfi_tsearch %s %d %s %s:SFI tsearch failed (%s:%d:%s:%s)
+**sfi_poll:SFI poll failed
+**sfi_poll %s %d %s %s:SFI poll failed (%s:%d:%s:%s)
+**sfi_cancel:SFI cancel failed
+**sfi_cancel %s %d %s %s:SFI cancel failed (%s:%d:%s:%s)
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_cm.c b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_cm.c
new file mode 100644
index 0000000..b39517a
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_cm.c
@@ -0,0 +1,577 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#include "sfi_impl.h"
+
+/* ------------------------------------------------------------------------ */
+/* sfi_tag_to_vc                                                            */
+/* This routine converts tag information from an incoming preposted receive */
+/* into the VC that uses the routine.  There is a possibility of a small    */
+/* list of temporary VC's that are used during dynamic task management      */
+/* to create the VC's.  This search is linear, but should be a small number */
+/* of temporary VC's that will eventually be destroyed by the upper layers  */
+/* Otherwise the tag is split into a PG "number", which is a hash of the    */
+/* data contained in the process group, and a source.  The source/pg number */
+/* is enough to look up the VC.                                             */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(sfi_tag_to_vc)
+static inline MPIDI_VC_t *sfi_tag_to_vc(uint64_t match_bits)
+{
+    int pgid = 0, port = 0;
+    MPIDI_VC_t *vc = NULL;
+    MPIDI_PG_t *pg = NULL;
+
+    BEGIN_FUNC(FCNAME);
+    if (NO_PGID == get_pgid(match_bits)) {
+        /* -------------------------------------------------------------------- */
+        /* Dynamic path -- This uses a linear search, but number of cm vc's is  */
+        /* a small number, and they should be ephemeral.  This lookup should    */
+        /* be fast yet not normally on the critical path.                       */
+        /* -------------------------------------------------------------------- */
+        port = get_port(match_bits);
+        vc = gl_data.cm_vcs;
+        while (vc && vc->port_name_tag != port) {
+            vc = VC_SFI(vc)->next;
+        }
+        if (NULL == vc) {
+            MPIU_Assertp(0);
+        }
+    }
+    else {
+        /* -------------------------------------------------------------------- */
+        /* If there are no connection management VC's, this is the normal path  */
+        /* Generate the PG number has from each known process group compare to  */
+        /* the pg number in the tag.  The number of PG's should be small        */
+        /* -------------------------------------------------------------------- */
+        pg = gl_data.pg_p;
+        while (pg) {
+            MPIDI_PG_IdToNum(pg, &pgid);
+            if (get_pgid(match_bits) == pgid) {
+                break;
+            }
+            pg = pg->next;
+        }
+        if (pg) {
+            MPIDI_PG_Get_vc(pg, get_psource(match_bits), &vc);
+        }
+        else {
+            MPIU_Assert(0);
+        }
+    }
+    END_FUNC(FCNAME);
+    return vc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_conn_req_callback                                           */
+/* A new process has been created and is connected to the current world     */
+/* The address of the new process is exchanged via the business card        */
+/* instead of being exchanged up front during the creation of the first     */
+/* world.  The new connection routine is usually invoked when two worlds    */
+/* are started via dynamic tasking.                                         */
+/* This routine:                                                            */
+/*     * repost the persistent connection management receive request        */
+/*     * malloc/create/initialize the VC                                    */
+/*     * grabs the address name from the business card                      */
+/*     * uses fi_av_insert to insert the addr into the address vector.      */
+/* This is marked as a "connection management" vc, and may be destroyed     */
+/* by the upper layers.  We handle the cm vc's slightly differently than    */
+/* other VC's because they may not be part of a process group.              */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_conn_req_callback)
+static inline int MPID_nem_sfi_conn_req_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
+{
+    int ret, len, mpi_errno = MPI_SUCCESS;
+    char bc[SFI_KVSAPPSTRLEN];
+
+    MPIDI_VC_t *vc;
+    char *addr = NULL;
+    fi_addr_t direct_addr;
+
+    BEGIN_FUNC(FCNAME);
+
+    MPIU_Memcpy(bc, rreq->dev.user_buf, wc->len);
+    bc[wc->len] = '\0';
+    MPIU_Assert(gl_data.conn_req == rreq);
+    FI_RC(fi_trecvfrom(gl_data.endpoint,
+                       gl_data.conn_req->dev.user_buf,
+                       SFI_KVSAPPSTRLEN,
+                       gl_data.mr,
+                       0,
+                       MPID_CONN_REQ,
+                       ~MPID_PROTOCOL_MASK,
+                       (void *) &(REQ_SFI(gl_data.conn_req)->sfi_context)), trecvfrom);
+
+    addr = MPIU_Malloc(gl_data.bound_addrlen);
+    MPIU_Assertp(addr);
+
+    vc = MPIU_Malloc(sizeof(MPIDI_VC_t));
+    MPIU_Assertp(vc);
+
+    MPIDI_VC_Init(vc, NULL, 0);
+    MPI_RC(MPIDI_GetTagFromPort(bc, &vc->port_name_tag));
+    ret = MPIU_Str_get_binary_arg(bc, "SFI", addr, gl_data.bound_addrlen, &len);
+    MPIU_ERR_CHKANDJUMP((ret != MPIU_STR_SUCCESS && ret != MPIU_STR_NOMEM) ||
+                        (size_t) len != gl_data.bound_addrlen,
+                        mpi_errno, MPI_ERR_OTHER, "**badbusinesscard");
+
+    FI_RC(fi_av_insert(gl_data.av, addr, 1, &direct_addr, 0ULL, NULL), avmap);
+    VC_SFI(vc)->direct_addr = direct_addr;
+    VC_SFI(vc)->ready = 1;
+    VC_SFI(vc)->is_cmvc = 1;
+    VC_SFI(vc)->next = gl_data.cm_vcs;
+    gl_data.cm_vcs = vc;
+
+    MPIDI_CH3I_Acceptq_enqueue(vc, vc->port_name_tag);
+    MPIDI_CH3I_INCR_PROGRESS_COMPLETION_COUNT;
+  fn_exit:
+    MPIU_Free(addr);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+  fn_fail:
+    if (vc)
+        MPIU_Free(vc);
+    goto fn_exit;
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_handle_packet                                               */
+/* The "parent" request tracks the state of the entire rendezvous           */
+/* As "child" requests complete, the cc counter is decremented              */
+/* Notify CH3 that we have an incoming packet (if cc hits 1).  Otherwise    */
+/* decrement the ref counter via request completion                         */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_handle_packet)
+static inline int MPID_nem_sfi_handle_packet(cq_tagged_entry_t * wc ATTRIBUTE((unused)),
+                                             MPID_Request * rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_VC_t *vc;
+
+    BEGIN_FUNC(FCNAME);
+    if (rreq->cc == 1) {
+        vc = REQ_SFI(rreq)->vc;
+        MPIU_Assert(vc);
+        MPI_RC(MPID_nem_handle_pkt(vc, REQ_SFI(rreq)->pack_buffer, REQ_SFI(rreq)->pack_buffer_size))
+            MPIU_Free(REQ_SFI(rreq)->pack_buffer);
+    }
+    MPIDI_CH3U_Request_complete(rreq);
+    END_FUNC_RC(FCNAME);
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_cts_send_callback                                           */
+/* A wrapper around MPID_nem_sfi_handle_packet that decrements              */
+/* the parent request's counter, and cleans up the CTS request              */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_cts_send_callback)
+static inline int MPID_nem_sfi_cts_send_callback(cq_tagged_entry_t * wc, MPID_Request * sreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    MPI_RC(MPID_nem_sfi_handle_packet(wc, REQ_SFI(sreq)->parent));
+    MPIDI_CH3U_Request_complete(sreq);
+    END_FUNC_RC(FCNAME);
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_preposted_callback                                          */
+/* This callback handles incoming "SendContig" messages (see sfi_msg.c)     */
+/* for the send routines.  This implements the CTS response and the RTS     */
+/* handler.  The steps are as follows:                                      */
+/*   * Create a parent data request and post a receive into a pack buffer   */
+/*   * Create a child request and send the CTS packet                       */
+/*   * Re-Post the RTS receive and handler to handle the next message       */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_preposted_callback)
+static inline int MPID_nem_sfi_preposted_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
+{
+    int c, mpi_errno = MPI_SUCCESS;
+    size_t pkt_len;
+    char *pack_buffer = NULL;
+    MPIDI_VC_t *vc;
+    MPID_Request *new_rreq, *sreq;
+    BEGIN_FUNC(FCNAME);
+
+    vc = sfi_tag_to_vc(wc->tag);
+    MPIU_Assert(vc);
+    VC_READY_CHECK(vc);
+
+    pkt_len = rreq->dev.user_count;
+    pack_buffer = (char *) MPIU_Malloc(pkt_len);
+    MPIU_ERR_CHKANDJUMP1(pack_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
+                         "**nomem", "**nomem %s", "Pack Buffer alloc");
+    c = 1;
+    MPID_nem_sfi_create_req(&new_rreq, 1);
+    MPID_cc_incr(new_rreq->cc_ptr, &c);
+    new_rreq->dev.OnDataAvail = NULL;
+    new_rreq->dev.next = NULL;
+    REQ_SFI(new_rreq)->event_callback = MPID_nem_sfi_handle_packet;
+    REQ_SFI(new_rreq)->vc = vc;
+    REQ_SFI(new_rreq)->pack_buffer = pack_buffer;
+    REQ_SFI(new_rreq)->pack_buffer_size = pkt_len;
+    FI_RC(fi_trecvfrom(gl_data.endpoint,
+                       REQ_SFI(new_rreq)->pack_buffer,
+                       REQ_SFI(new_rreq)->pack_buffer_size,
+                       gl_data.mr,
+                       VC_SFI(vc)->direct_addr,
+                       wc->tag | MPID_MSG_DATA, 0, &(REQ_SFI(new_rreq)->sfi_context)), trecvfrom);
+
+    MPID_nem_sfi_create_req(&sreq, 1);
+    sreq->dev.OnDataAvail = NULL;
+    sreq->dev.next = NULL;
+    REQ_SFI(sreq)->event_callback = MPID_nem_sfi_cts_send_callback;
+    REQ_SFI(sreq)->parent = new_rreq;
+    FI_RC(fi_tsendto(gl_data.endpoint,
+                     NULL,
+                     0,
+                     gl_data.mr,
+                     VC_SFI(vc)->direct_addr,
+                     wc->tag | MPID_MSG_CTS, &(REQ_SFI(sreq)->sfi_context)), tsendto);
+    MPIU_Assert(gl_data.persistent_req == rreq);
+
+    rreq->dev.user_count = 0;
+    FI_RC(fi_trecvfrom(gl_data.endpoint,
+                       &rreq->dev.user_count,
+                       sizeof rreq->dev.user_count,
+                       gl_data.mr,
+                       0,
+                       MPID_MSG_RTS,
+                       ~MPID_PROTOCOL_MASK, &(REQ_SFI(rreq)->sfi_context)), trecvfrom);
+    END_FUNC_RC(FCNAME);
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_connect_to_root_callback                                    */
+/* Complete and clean up the request                                        */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_connect_to_root_callback)
+int MPID_nem_sfi_connect_to_root_callback(cq_tagged_entry_t * wc ATTRIBUTE((unused)),
+                                          MPID_Request * sreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+
+    if (REQ_SFI(sreq)->pack_buffer)
+        MPIU_Free(REQ_SFI(sreq)->pack_buffer);
+    MPIDI_CH3U_Request_complete(sreq);
+
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_cm_init                                                     */
+/* This is a utility routine that sets up persistent connection management  */
+/* requests and a persistent data request to handle rendezvous SendContig   */
+/* messages.                                                                */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_cm_init)
+int MPID_nem_sfi_cm_init(MPIDI_PG_t * pg_p, int pg_rank ATTRIBUTE((unused)))
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Request *persistent_req, *conn_req;
+    BEGIN_FUNC(FCNAME);
+
+    /* ------------------------------------- */
+    /* Set up CH3 and netmod data structures */
+    /* ------------------------------------- */
+    MPI_RC(MPIDI_CH3I_Register_anysource_notification(MPID_nem_sfi_anysource_posted,
+                                                      MPID_nem_sfi_anysource_matched));
+    MPIDI_Anysource_iprobe_fn = MPID_nem_sfi_anysource_iprobe;
+    MPIDI_Anysource_improbe_fn = MPID_nem_sfi_anysource_improbe;
+    gl_data.pg_p = pg_p;
+
+    /* ----------------------------------- */
+    /* Post a persistent request to handle */
+    /* ----------------------------------- */
+    MPID_nem_sfi_create_req(&persistent_req, 1);
+    persistent_req->dev.OnDataAvail = NULL;
+    persistent_req->dev.next = NULL;
+    REQ_SFI(persistent_req)->vc = NULL;
+    REQ_SFI(persistent_req)->event_callback = MPID_nem_sfi_preposted_callback;
+    FI_RC(fi_trecvfrom(gl_data.endpoint,
+                       &persistent_req->dev.user_count,
+                       sizeof persistent_req->dev.user_count,
+                       gl_data.mr,
+                       0,
+                       MPID_MSG_RTS,
+                       ~MPID_PROTOCOL_MASK,
+                       (void *) &(REQ_SFI(persistent_req)->sfi_context)), trecvfrom);
+    gl_data.persistent_req = persistent_req;
+
+    /* --------------------------------- */
+    /* Post recv for connection requests */
+    /* --------------------------------- */
+    MPID_nem_sfi_create_req(&conn_req, 1);
+    conn_req->dev.user_buf = MPIU_Malloc(SFI_KVSAPPSTRLEN * sizeof(char));
+    conn_req->dev.OnDataAvail = NULL;
+    conn_req->dev.next = NULL;
+    REQ_SFI(conn_req)->vc = NULL;       /* We don't know the source yet */
+    REQ_SFI(conn_req)->event_callback = MPID_nem_sfi_conn_req_callback;
+    FI_RC(fi_trecvfrom(gl_data.endpoint,
+                       conn_req->dev.user_buf,
+                       SFI_KVSAPPSTRLEN,
+                       gl_data.mr,
+                       0,
+                       MPID_CONN_REQ,
+                       ~MPID_PROTOCOL_MASK, (void *) &(REQ_SFI(conn_req)->sfi_context)), trecvfrom);
+    gl_data.conn_req = conn_req;
+
+
+  fn_exit:
+    END_FUNC(FCNAME);
+    return mpi_errno;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_cm_finalize                                                 */
+/* Clean up and cancle the requests initiated by the cm_init routine        */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_cm_finalize)
+int MPID_nem_sfi_cm_finalize()
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    FI_RC(fi_cancel((fid_t) gl_data.endpoint,
+                    &(REQ_SFI(gl_data.persistent_req)->sfi_context)), cancel);
+    MPIR_STATUS_SET_CANCEL_BIT(gl_data.persistent_req->status, TRUE);
+    MPIR_STATUS_SET_COUNT(gl_data.persistent_req->status, 0);
+    MPIDI_CH3U_Request_complete(gl_data.persistent_req);
+
+    FI_RC(fi_cancel((fid_t) gl_data.endpoint, &(REQ_SFI(gl_data.conn_req)->sfi_context)), cancel);
+    MPIU_Free(gl_data.conn_req->dev.user_buf);
+    MPIR_STATUS_SET_CANCEL_BIT(gl_data.conn_req->status, TRUE);
+    MPIR_STATUS_SET_COUNT(gl_data.conn_req->status, 0);
+    MPIDI_CH3U_Request_complete(gl_data.conn_req);
+  fn_exit:
+    END_FUNC(FCNAME);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_vc_connect                                                  */
+/* Handle CH3/Nemesis VC connections                                        */
+/*   * Query the VC address information.  In particular we are looking for  */
+/*     the fabric address name.                                             */
+/*   * Use fi_av_insert to register the address name with SFI               */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_vc_connect)
+int MPID_nem_sfi_vc_connect(MPIDI_VC_t * vc)
+{
+    int len, ret, mpi_errno = MPI_SUCCESS;
+    char bc[SFI_KVSAPPSTRLEN], *addr = NULL;
+
+    BEGIN_FUNC(FCNAME);
+    addr = MPIU_Malloc(gl_data.bound_addrlen);
+    MPIU_Assert(addr);
+    MPIU_Assert(1 != VC_SFI(vc)->ready);
+
+    if (!vc->pg || !vc->pg->getConnInfo) {
+        goto fn_exit;
+    }
+
+    MPI_RC(vc->pg->getConnInfo(vc->pg_rank, bc, SFI_KVSAPPSTRLEN, vc->pg));
+    ret = MPIU_Str_get_binary_arg(bc, "SFI", addr, gl_data.bound_addrlen, &len);
+    MPIU_ERR_CHKANDJUMP((ret != MPIU_STR_SUCCESS && ret != MPIU_STR_NOMEM) ||
+                        (size_t) len != gl_data.bound_addrlen,
+                        mpi_errno, MPI_ERR_OTHER, "**badbusinesscard");
+    FI_RC(fi_av_insert(gl_data.av, addr, 1, &(VC_SFI(vc)->direct_addr), 0ULL, NULL), avmap);
+    VC_SFI(vc)->ready = 1;
+
+  fn_exit:
+    if (addr)
+        MPIU_Free(addr);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_vc_init)
+int MPID_nem_sfi_vc_init(MPIDI_VC_t * vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_VC *const vc_ch = &vc->ch;
+    MPID_nem_sfi_vc_t *const vc_sfi = VC_SFI(vc);
+
+    BEGIN_FUNC(FCNAME);
+    vc->sendNoncontig_fn = MPID_nem_sfi_SendNoncontig;
+    vc_ch->iStartContigMsg = MPID_nem_sfi_iStartContigMsg;
+    vc_ch->iSendContig = MPID_nem_sfi_iSendContig;
+    vc_ch->next = NULL;
+    vc_ch->prev = NULL;
+    vc_sfi->is_cmvc = 0;
+    vc->comm_ops = &_g_comm_ops;
+
+    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
+
+    if (NULL == vc->pg) {
+        vc_sfi->is_cmvc = 1;
+    }
+    else {
+    }
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_vc_destroy                                                  */
+/* MPID_nem_sfi_vc_terminate                                                */
+/* TODO:  Verify this code has no leaks                                     */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_vc_destroy)
+int MPID_nem_sfi_vc_destroy(MPIDI_VC_t * vc)
+{
+    BEGIN_FUNC(FCNAME);
+    if (vc && (VC_SFI(vc)->is_cmvc == 1) && (VC_SFI(vc)->ready == 1)) {
+        if (vc->pg != NULL) {
+            printf("ERROR: VC Destroy (%p) pg = %s\n", vc, (char *) vc->pg->id);
+        }
+        MPIDI_VC_t *prev = gl_data.cm_vcs;
+        while (prev && prev != vc && VC_SFI(prev)->next != vc) {
+            prev = VC_SFI(vc)->next;
+        }
+        if (VC_SFI(prev)->next == vc) {
+            VC_SFI(prev)->next = VC_SFI(vc)->next;
+        }
+        else if (vc == gl_data.cm_vcs) {
+            gl_data.cm_vcs = VC_SFI(vc)->next;
+        }
+        else {
+            MPIU_Assert(0);
+        }
+    }
+    VC_SFI(vc)->ready = 0;
+    END_FUNC(FCNAME);
+    return MPI_SUCCESS;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_vc_terminate)
+int MPID_nem_sfi_vc_terminate(MPIDI_VC_t * vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    MPI_RC(MPIDI_CH3U_Handle_connection(vc, MPIDI_VC_EVENT_TERMINATED));
+    VC_SFI(vc)->ready = 0;
+    END_FUNC_RC(FCNAME);
+}
+
+
+
+/* ------------------------------------------------------------------------ */
+/* MPID_nem_sfi_connect_to_root                                             */
+/*  * A new unconnected VC (cm/ephemeral VC) has been created.  This code   */
+/*    connects the new VC to a rank in another process group.  The parent   */
+/*    address is obtained by an out of band method and given to this        */
+/*    routine as a business card                                            */
+/*  * Read the business card address and insert the address                 */
+/*  * Send a connection request to the parent.  The parent has posted a     */
+/*    persistent request to handle incoming connection requests             */
+/*    The connect message has the child's business card.                    */
+/*  * Add the new VC to the list of ephemeral BC's (cm_vc's).  These VC's   */
+/*    are not part of the process group, so they require special handling   */
+/*    during the SendContig family of routines.                             */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(nm_connect_to_root)
+int MPID_nem_sfi_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
+{
+    int len, ret, mpi_errno = MPI_SUCCESS, str_errno = MPI_SUCCESS;
+    int my_bc_len = SFI_KVSAPPSTRLEN;
+    char *addr = NULL, *bc = NULL, *my_bc = NULL;
+    MPID_Request *sreq;
+    uint64_t conn_req_send_bits;
+
+    BEGIN_FUNC(FCNAME);
+    addr = MPIU_Malloc(gl_data.bound_addrlen);
+    bc = MPIU_Malloc(SFI_KVSAPPSTRLEN);
+    MPIU_Assertp(addr);
+    MPIU_Assertp(bc);
+    my_bc = bc;
+    if (!business_card || business_card[0] != 't') {
+        mpi_errno = MPI_ERR_OTHER;
+        goto fn_fail;
+    }
+    MPI_RC(MPIDI_GetTagFromPort(business_card, &new_vc->port_name_tag));
+    ret = MPIU_Str_get_binary_arg(business_card, "SFI", addr, gl_data.bound_addrlen, &len);
+    MPIU_ERR_CHKANDJUMP((ret != MPIU_STR_SUCCESS && ret != MPIU_STR_NOMEM) ||
+                        (size_t) len != gl_data.bound_addrlen,
+                        mpi_errno, MPI_ERR_OTHER, "**badbusinesscard");
+    FI_RC(fi_av_insert(gl_data.av, addr, 1, &(VC_SFI(new_vc)->direct_addr), 0ULL, NULL), avmap);
+
+    VC_SFI(new_vc)->ready = 1;
+    str_errno = MPIU_Str_add_int_arg(&bc, &my_bc_len, "tag", new_vc->port_name_tag);
+    MPIU_ERR_CHKANDJUMP(str_errno, mpi_errno, MPI_ERR_OTHER, "**argstr_port_name_tag");
+    MPI_RC(MPID_nem_sfi_get_business_card(MPIR_Process.comm_world->rank, &bc, &my_bc_len));
+    my_bc_len = SFI_KVSAPPSTRLEN - my_bc_len;
+
+    MPID_nem_sfi_create_req(&sreq, 1);
+    sreq->kind = MPID_REQUEST_SEND;
+    sreq->dev.OnDataAvail = NULL;
+    sreq->dev.next = NULL;
+    REQ_SFI(sreq)->event_callback = MPID_nem_sfi_connect_to_root_callback;
+    REQ_SFI(sreq)->pack_buffer = my_bc;
+    conn_req_send_bits = init_sendtag(0, MPIR_Process.comm_world->rank, 0, MPID_CONN_REQ);
+    FI_RC(fi_tsendto(gl_data.endpoint,
+                     REQ_SFI(sreq)->pack_buffer,
+                     my_bc_len,
+                     gl_data.mr,
+                     VC_SFI(new_vc)->direct_addr,
+                     conn_req_send_bits, &(REQ_SFI(sreq)->sfi_context)), tsendto);
+    MPID_nem_sfi_poll(MPID_NONBLOCKING_POLL);
+    VC_SFI(new_vc)->is_cmvc = 1;
+    VC_SFI(new_vc)->next = gl_data.cm_vcs;
+    gl_data.cm_vcs = new_vc;
+  fn_exit:
+    if (addr)
+        MPIU_Free(addr);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+  fn_fail:
+    if (my_bc)
+        MPIU_Free(my_bc);
+    goto fn_exit;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_get_business_card)
+int MPID_nem_sfi_get_business_card(int my_rank ATTRIBUTE((unused)),
+                                   char **bc_val_p, int *val_max_sz_p)
+{
+    int mpi_errno = MPI_SUCCESS, str_errno = MPIU_STR_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    str_errno = MPIU_Str_add_binary_arg(bc_val_p,
+                                        val_max_sz_p,
+                                        "SFI",
+                                        (char *) &gl_data.bound_addr, sizeof(gl_data.bound_addr));
+    if (str_errno) {
+        MPIU_ERR_CHKANDJUMP(str_errno == MPIU_STR_NOMEM, mpi_errno, MPI_ERR_OTHER, "**buscard_len");
+        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
+    }
+    END_FUNC_RC(FCNAME);
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_data.c b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_data.c
new file mode 100644
index 0000000..1e39684
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_data.c
@@ -0,0 +1,58 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#include "sfi_impl.h"
+
+
+MPID_nem_sfi_global_t gl_data;
+
+/* ************************************************************************** */
+/* Netmod Function Table                                                      */
+/* ************************************************************************** */
+MPIDI_Comm_ops_t _g_comm_ops = {
+    MPID_nem_sfi_recv_posted,   /* recv_posted */
+
+    MPID_nem_sfi_send,  /* send */
+    MPID_nem_sfi_send,  /* rsend */
+    MPID_nem_sfi_ssend, /* ssend */
+    MPID_nem_sfi_isend, /* isend */
+    MPID_nem_sfi_isend, /* irsend */
+    MPID_nem_sfi_issend,        /* issend */
+
+    NULL,       /* send_init */
+    NULL,       /* bsend_init */
+    NULL,       /* rsend_init */
+    NULL,       /* ssend_init */
+    NULL,       /* startall */
+
+    MPID_nem_sfi_cancel_send,   /* cancel_send */
+    MPID_nem_sfi_cancel_recv,   /* cancel_recv */
+
+    NULL,       /* probe */
+    MPID_nem_sfi_iprobe,        /* iprobe */
+    MPID_nem_sfi_improbe        /* improbe */
+};
+
+MPID_nem_netmod_funcs_t MPIDI_nem_sfi_funcs = {
+    MPID_nem_sfi_init,
+    MPID_nem_sfi_finalize,
+#ifdef ENABLE_CHECKPOINTING
+    NULL,
+    NULL,
+    NULL,
+#endif
+    MPID_nem_sfi_poll,
+    MPID_nem_sfi_get_business_card,
+    MPID_nem_sfi_connect_to_root,
+    MPID_nem_sfi_vc_init,
+    MPID_nem_sfi_vc_destroy,
+    MPID_nem_sfi_vc_terminate,
+    MPID_nem_sfi_anysource_iprobe,
+    MPID_nem_sfi_anysource_improbe,
+};
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_impl.h
new file mode 100644
index 0000000..9e8b93f
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_impl.h
@@ -0,0 +1,342 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#ifndef SFI_IMPL_H
+#define SFI_IMPL_H
+
+#include "mpid_nem_impl.h"
+#include "mpihandlemem.h"
+#include "pmi.h"
+#include <rdma/fabric.h>
+#include <rdma/fi_errno.h>
+#include <rdma/fi_endpoint.h>
+#include <rdma/fi_domain.h>
+#include <rdma/fi_tagged.h>
+#include <rdma/fi_cm.h>
+#include <netdb.h>
+
+/* ************************************************************************** */
+/* Type Definitions                                                           */
+/* ************************************************************************** */
+typedef struct iovec iovec_t;
+typedef struct fi_info info_t;
+typedef struct fi_cq_attr cq_attr_t;
+typedef struct fi_av_attr av_attr_t;
+typedef struct fi_domain_attr domain_attr_t;
+typedef struct fi_tx_ctx_attr tx_ctx_attr_t;
+typedef struct fi_cq_tagged_entry cq_tagged_entry_t;
+typedef struct fi_cq_err_entry cq_err_entry_t;
+typedef struct fi_context context_t;
+typedef int (*event_callback_fn) (cq_tagged_entry_t * wc, MPID_Request *);
+typedef int (*req_fn) (MPIDI_VC_t *, MPID_Request *, int *);
+
+/* ******************************** */
+/* Global Object for state tracking */
+/* ******************************** */
+typedef struct {
+    fi_addr_t bound_addr;       /* This ranks bound address    */
+    fi_addr_t any_addr;         /* Specifies any source        */
+    size_t bound_addrlen;       /* length of the bound address */
+    struct fid_fabric *fabric;  /* fabric object               */
+    struct fid_domain *domain;  /* domain object               */
+    struct fid_ep *endpoint;    /* endpoint object             */
+    struct fid_cq *cq;          /* completion queue            */
+    struct fid_av *av;          /* address vector              */
+    struct fid_mr *mr;          /* memory region               */
+    MPIDI_PG_t *pg_p;           /* MPI Process group           */
+    MPIDI_VC_t *cm_vcs;         /* temporary VC's              */
+    MPID_Request *persistent_req;       /* Unexpected request queue    */
+    MPID_Request *conn_req;     /* Connection request          */
+    MPIDI_Comm_ops_t comm_ops;
+} MPID_nem_sfi_global_t;
+
+/* ******************************** */
+/* Device channel specific data     */
+/* This is per destination          */
+/* ******************************** */
+typedef struct {
+    fi_addr_t direct_addr;      /* Remote SFI address */
+    int ready;                  /* VC ready state     */
+    int is_cmvc;                /* Cleanup VC         */
+    MPIDI_VC_t *next;           /* VC queue           */
+} MPID_nem_sfi_vc_t;
+#define VC_SFI(vc) ((MPID_nem_sfi_vc_t *)vc->ch.netmod_area.padding)
+
+/* ******************************** */
+/* Per request object data          */
+/* SFI/Netmod specific              */
+/* ******************************** */
+typedef struct {
+    context_t sfi_context;      /* Context Object              */
+    void *addr;                 /* SFI Address                 */
+    event_callback_fn event_callback;   /* Callback Event              */
+    char *pack_buffer;          /* MPI Pack Buffer             */
+    int pack_buffer_size;       /* Pack buffer size            */
+    int match_state;            /* State of the match          */
+    int req_started;            /* Request state               */
+    MPIDI_VC_t *vc;             /* VC paired with this request */
+    uint64_t tag;               /* 64 bit tag request          */
+    MPID_Request *parent;       /* Parent request              */
+} MPID_nem_sfi_req_t;
+#define REQ_SFI(req) ((MPID_nem_sfi_req_t *)((req)->ch.netmod_area.padding))
+
+/* ******************************** */
+/* Logging and function macros      */
+/* ******************************** */
+#undef FUNCNAME
+#define FUNCNAME nothing
+#define BEGIN_FUNC(FUNCNAME)                    \
+  MPIDI_STATE_DECL(FUNCNAME);                   \
+  MPIDI_FUNC_ENTER(FUNCNAME);
+#define END_FUNC(FUNCNAME)                      \
+  MPIDI_FUNC_EXIT(FUNCNAME);
+#define END_FUNC_RC(FUNCNAME) \
+  fn_exit:                    \
+  MPIDI_FUNC_EXIT(FUNCNAME);  \
+  return mpi_errno;           \
+fn_fail:                      \
+  goto fn_exit;
+
+#define __SHORT_FILE__                          \
+  (strrchr(__FILE__,'/')                        \
+   ? strrchr(__FILE__,'/')+1                    \
+   : __FILE__                                   \
+)
+#define DECL_FUNC(FUNCNAME)  MPIU_QUOTE(FUNCNAME)
+#define SFI_COMPILE_TIME_ASSERT(expr_)                                  \
+  do { switch(0) { case 0: case (expr_): default: break; } } while (0)
+
+#define FI_RC(FUNC,STR)                                         \
+  do                                                            \
+    {                                                           \
+      ssize_t _ret = FUNC;                                      \
+      MPIU_ERR_##CHKANDJUMP4(_ret<0,                            \
+                           mpi_errno,                           \
+                           MPI_ERR_OTHER,                       \
+                           "**sfi_"#STR,                        \
+                           "**sfi_"#STR" %s %d %s %s",          \
+                           __SHORT_FILE__,                      \
+                           __LINE__,                            \
+                           FCNAME,                              \
+                           fi_strerror(-_ret));                 \
+    } while (0)
+
+#define PMI_RC(FUNC,STR)                                        \
+  do                                                            \
+    {                                                           \
+      pmi_errno  = FUNC;                                        \
+      MPIU_ERR_##CHKANDJUMP4(pmi_errno!=PMI_SUCCESS,            \
+                           mpi_errno,                           \
+                           MPI_ERR_OTHER,                       \
+                           "**sfi_"#STR,                        \
+                           "**sfi_"#STR" %s %d %s %s",          \
+                           __SHORT_FILE__,                      \
+                           __LINE__,                            \
+                           FCNAME,                              \
+                           #STR);                               \
+    } while (0)
+
+#define MPI_RC(FUNC)                                        \
+  do                                                        \
+    {                                                       \
+      mpi_errno  = FUNC;                                    \
+      if (mpi_errno) MPIU_ERR_POP(mpi_errno);               \
+    } while (0);
+
+#define VC_READY_CHECK(vc)                      \
+({                                              \
+  if (1 != VC_SFI(vc)->ready) {                 \
+    MPI_RC(MPID_nem_sfi_vc_connect(vc));        \
+  }                                             \
+})
+
+#define SFI_ADDR_INIT(src, vc, remote_proc) \
+({                                          \
+  if (MPI_ANY_SOURCE != src) {              \
+    MPIU_Assert(vc != NULL);                \
+    VC_READY_CHECK(vc);                     \
+    remote_proc = VC_SFI(vc)->direct_addr;  \
+  } else {                                  \
+    MPIU_Assert(vc == NULL);                \
+    remote_proc = gl_data.any_addr;         \
+  }                                         \
+})
+
+
+#define NO_PGID 0
+
+/* **************************************************************************
+ *  match/ignore bit manipulation
+ * **************************************************************************
+ * 0123 4567 01234567 0123 4567 01234567 0123 4567 01234567 01234567 01234567
+ *     |                  |                  |
+ * ^   |    context id    |       source     |       message tag
+ * |   |                  |                  |
+ * +---- protocol
+ * ************************************************************************** */
+#define MPID_PROTOCOL_MASK       (0xF000000000000000ULL)
+#define MPID_CONTEXT_MASK        (0x0FFFF00000000000ULL)
+#define MPID_SOURCE_MASK         (0x00000FFFF0000000ULL)
+#define MPID_TAG_MASK            (0x000000000FFFFFFFULL)
+#define MPID_PGID_MASK           (0x00000000FFFFFFFFULL)
+#define MPID_PSOURCE_MASK        (0x0000FFFF00000000ULL)
+#define MPID_PORT_NAME_MASK      (0x0FFF000000000000ULL)
+#define MPID_SYNC_SEND           (0x1000000000000000ULL)
+#define MPID_SYNC_SEND_ACK       (0x2000000000000000ULL)
+#define MPID_MSG_RTS             (0x3000000000000000ULL)
+#define MPID_MSG_CTS             (0x4000000000000000ULL)
+#define MPID_MSG_DATA            (0x5000000000000000ULL)
+#define MPID_CONN_REQ            (0x6000000000000000ULL)
+#define MPID_SOURCE_SHIFT        (16)
+#define MPID_TAG_SHIFT           (28)
+#define MPID_PSOURCE_SHIFT       (16)
+#define MPID_PORT_SHIFT          (32)
+#define SFI_KVSAPPSTRLEN         1024
+
+/* ******************************** */
+/* Request manipulation inlines     */
+/* ******************************** */
+static inline void MPID_nem_sfi_init_req(MPID_Request * req)
+{
+    memset(REQ_SFI(req), 0, sizeof(MPID_nem_sfi_req_t));
+}
+
+static inline int MPID_nem_sfi_create_req(MPID_Request ** request, int refcnt)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Request *req;
+    req = MPID_Request_create();
+    MPIU_Assert(req);
+    MPIU_Object_set_ref(req, refcnt);
+    MPID_nem_sfi_init_req(req);
+    *request = req;
+    return mpi_errno;
+}
+
+/* ******************************** */
+/* Tag Manipulation inlines         */
+/* ******************************** */
+static inline uint64_t init_sendtag(MPIR_Context_id_t contextid, int source, int tag, uint64_t type)
+{
+    uint64_t match_bits;
+    match_bits = contextid;
+    match_bits = (match_bits << MPID_SOURCE_SHIFT);
+    match_bits |= source;
+    match_bits = (match_bits << MPID_TAG_SHIFT);
+    match_bits |= (MPID_TAG_MASK & tag) | type;
+    return match_bits;
+}
+
+/* receive posting */
+static inline uint64_t init_recvtag(uint64_t * mask_bits,
+                                    MPIR_Context_id_t contextid, int source, int tag)
+{
+    uint64_t match_bits = 0;
+    *mask_bits = MPID_SYNC_SEND;
+    match_bits = contextid;
+    match_bits = (match_bits << MPID_SOURCE_SHIFT);
+    if (MPI_ANY_SOURCE == source) {
+        match_bits = (match_bits << MPID_TAG_SHIFT);
+        *mask_bits |= MPID_SOURCE_MASK;
+    }
+    else {
+        match_bits |= source;
+        match_bits = (match_bits << MPID_TAG_SHIFT);
+    }
+    if (MPI_ANY_TAG == tag)
+        *mask_bits |= MPID_TAG_MASK;
+    else
+        match_bits |= (MPID_TAG_MASK & tag);
+
+    return match_bits;
+}
+
+static inline int get_tag(uint64_t match_bits)
+{
+    return ((int) (match_bits & MPID_TAG_MASK));
+}
+
+static inline int get_source(uint64_t match_bits)
+{
+    return ((int) ((match_bits & MPID_SOURCE_MASK) >> (MPID_TAG_SHIFT)));
+}
+
+static inline int get_psource(uint64_t match_bits)
+{
+    return ((int) ((match_bits & MPID_PSOURCE_MASK) >> (MPID_PORT_SHIFT)));
+}
+
+static inline int get_pgid(uint64_t match_bits)
+{
+    return ((int) (match_bits & MPID_PGID_MASK));
+}
+
+static inline int get_port(uint64_t match_bits)
+{
+    return ((int) ((match_bits & MPID_PORT_NAME_MASK) >> MPID_TAG_SHIFT));
+}
+
+/* ************************************************************************** */
+/* MPICH Comm Override and Netmod functions                                   */
+/* ************************************************************************** */
+int MPID_nem_sfi_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *req);
+int MPID_nem_sfi_send(struct MPIDI_VC *vc, const void *buf, int count,
+                      MPI_Datatype datatype, int dest, int tag, MPID_Comm * comm,
+                      int context_offset, struct MPID_Request **request);
+int MPID_nem_sfi_isend(struct MPIDI_VC *vc, const void *buf, int count,
+                       MPI_Datatype datatype, int dest, int tag, MPID_Comm * comm,
+                       int context_offset, struct MPID_Request **request);
+int MPID_nem_sfi_ssend(struct MPIDI_VC *vc, const void *buf, int count,
+                       MPI_Datatype datatype, int dest, int tag, MPID_Comm * comm,
+                       int context_offset, struct MPID_Request **request);
+int MPID_nem_sfi_issend(struct MPIDI_VC *vc, const void *buf, int count,
+                        MPI_Datatype datatype, int dest, int tag, MPID_Comm * comm,
+                        int context_offset, struct MPID_Request **request);
+int MPID_nem_sfi_cancel_send(struct MPIDI_VC *vc, struct MPID_Request *sreq);
+int MPID_nem_sfi_cancel_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq);
+int MPID_nem_sfi_iprobe(struct MPIDI_VC *vc, int source, int tag, MPID_Comm * comm,
+                        int context_offset, int *flag, MPI_Status * status);
+int MPID_nem_sfi_improbe(struct MPIDI_VC *vc, int source, int tag, MPID_Comm * comm,
+                         int context_offset, int *flag, MPID_Request ** message,
+                         MPI_Status * status);
+int MPID_nem_sfi_anysource_iprobe(int tag, MPID_Comm * comm, int context_offset,
+                                  int *flag, MPI_Status * status);
+int MPID_nem_sfi_anysource_improbe(int tag, MPID_Comm * comm, int context_offset,
+                                   int *flag, MPID_Request ** message, MPI_Status * status);
+void MPID_nem_sfi_anysource_posted(MPID_Request * rreq);
+int MPID_nem_sfi_anysource_matched(MPID_Request * rreq);
+int MPID_nem_sfi_send_data(cq_tagged_entry_t * wc, MPID_Request * sreq);
+int MPID_nem_sfi_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq,
+                               void *hdr, MPIDI_msg_sz_t hdr_sz);
+int MPID_nem_sfi_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz,
+                                 void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr);
+int MPID_nem_sfi_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
+                             MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz);
+
+/* ************************************************************************** */
+/* SFI utility functions : not exposed as a netmod public API                 */
+/* ************************************************************************** */
+#define MPID_NONBLOCKING_POLL 0
+#define MPID_BLOCKING_POLL 1
+int MPID_nem_sfi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p);
+int MPID_nem_sfi_finalize(void);
+int MPID_nem_sfi_vc_init(MPIDI_VC_t * vc);
+int MPID_nem_sfi_get_business_card(int my_rank, char **bc_val_p, int *val_max_sz_p);
+int MPID_nem_sfi_poll(int in_blocking_poll);
+int MPID_nem_sfi_vc_terminate(MPIDI_VC_t * vc);
+int MPID_nem_sfi_vc_connect(MPIDI_VC_t * vc);
+int MPID_nem_sfi_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc);
+int MPID_nem_sfi_vc_destroy(MPIDI_VC_t * vc);
+int MPID_nem_sfi_cm_init(MPIDI_PG_t * pg_p, int pg_rank);
+int MPID_nem_sfi_cm_finalize();
+
+extern MPID_nem_sfi_global_t gl_data;
+extern MPIDI_Comm_ops_t _g_comm_ops;
+
+#endif
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_init.c b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_init.c
new file mode 100644
index 0000000..88a6496
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_init.c
@@ -0,0 +1,461 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#include "sfi_impl.h"
+
+static inline int dump_and_choose_providers(info_t * prov, info_t ** prov_use);
+static inline int compile_time_checking();
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_init)
+int MPID_nem_sfi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
+{
+    int ret, fi_version, i, len, pmi_errno;
+    int mpi_errno = MPI_SUCCESS;
+    info_t hints, *prov_tagged, *prov_use;
+    cq_attr_t cq_attr;
+    av_attr_t av_attr;
+    char kvsname[SFI_KVSAPPSTRLEN], key[SFI_KVSAPPSTRLEN], bc[SFI_KVSAPPSTRLEN];
+    char *my_bc, *addrs, *null_addr;
+    fi_addr_t *fi_addrs = NULL;
+    MPIDI_VC_t *vc;
+
+    BEGIN_FUNC(FCNAME);
+    MPIU_CHKLMEM_DECL(2);
+
+    compile_time_checking();
+    /* ------------------------------------------------------------------------ */
+    /* Hints to filter providers                                                */
+    /* See man fi_getinfo for a list                                            */
+    /* of all filters                                                           */
+    /* mode:  Select capabilities netmod is prepared to support.                */
+    /*        In this case, netmod will pass in context into                    */
+    /*        communication calls.                                              */
+    /*        Note that we do not fill in FI_LOCAL_MR, which means this netmod  */
+    /*        does not support exchange of memory regions on communication calls */
+    /*        SFI requires that all communication calls use a registered mr     */
+    /*        but in our case this netmod is written to only support transfers  */
+    /*        on a dynamic memory region that spans all of memory.  So, we do   */
+    /*        not set the FI_LOCAL_MR mode bit, and we set the FI_DYNAMIC_MR    */
+    /*        bit to tell SFI our requirement and filter providers appropriately */
+    /* ep_type:  reliable datagram operation                                    */
+    /* caps:     Capabilities required from the provider.  The bits specified   */
+    /*           with buffered receive, cancel, and remote complete implements  */
+    /*           MPI semantics.  Tagged is used to support tag matching.        */
+    /*           We expect to register all memory up front for use with this    */
+    /*           endpoint, so the netmod requires dynamic memory regions        */
+    /* ------------------------------------------------------------------------ */
+    memset(&hints, 0, sizeof(hints));
+    hints.mode = FI_CONTEXT;
+    hints.ep_type = FI_EP_RDM;  /* Reliable datagram         */
+    hints.caps = FI_TAGGED;     /* Tag matching interface    */
+    hints.caps |= FI_BUFFERED_RECV;     /* Buffered receives         */
+    hints.caps |= FI_REMOTE_COMPLETE;   /* Remote completion         */
+    hints.caps |= FI_CANCEL;    /* Support cancel            */
+    hints.caps |= FI_DYNAMIC_MR;        /* Global dynamic mem region */
+
+    /* ------------------------------------------------------------------------ */
+    /* FI_VERSION provides binary backward and forward compatibility support    */
+    /* Specify the version of SFI is coded to, the provider will select struct  */
+    /* layouts that are compatible with this version.                           */
+    /* ------------------------------------------------------------------------ */
+    fi_version = FI_VERSION(1, 0);
+
+    /* ------------------------------------------------------------------------ */
+    /* fi_getinfo:  returns information about fabric  services for reaching a   */
+    /* remote node or service.  this does not necessarily allocate resources.   */
+    /* Pass NULL for name/service because we want a list of providers supported */
+    /* ------------------------------------------------------------------------ */
+    domain_attr_t domain_attr;
+    memset(&domain_attr, 0, sizeof(domain_attr));
+
+    tx_ctx_attr_t tx_attr;
+    memset(&tx_attr, 0, sizeof(tx_attr));
+
+    domain_attr.threading = FI_THREAD_PROGRESS;
+    domain_attr.control_progress = FI_PROGRESS_AUTO;
+    tx_attr.op_flags = FI_REMOTE_COMPLETE;
+    hints.domain_attr = &domain_attr;
+    hints.tx_attr = &tx_attr;
+
+    FI_RC(fi_getinfo(fi_version,        /* Interface version requested               */
+                     NULL,      /* Optional name or fabric to resolve        */
+                     NULL,      /* Service name or port number to request    */
+                     0ULL,      /* Flag:  node/service specify local address */
+                     &hints,    /* In:  Hints to filter available providers  */
+                     &prov_tagged),     /* Out: List of providers that match hints   */
+          getinfo);
+    MPIU_ERR_CHKANDJUMP4(prov_tagged == NULL, mpi_errno, MPI_ERR_OTHER,
+                         "**sfi_getinfo", "**sfi_getinfo %s %d %s %s",
+                         __SHORT_FILE__, __LINE__, FCNAME, "No tag matching provider found");
+    /* ------------------------------------------------------------------------ */
+    /* Open fabric                                                              */
+    /* The getinfo struct returns a fabric attribute struct that can be used to */
+    /* instantiate the virtual or physical network.  This opens a "fabric       */
+    /* provider".   We choose the first available fabric, but getinfo           */
+    /* returns a list.  see man fi_fabric for details                           */
+    /* ------------------------------------------------------------------------ */
+    dump_and_choose_providers(prov_tagged, &prov_use);
+    FI_RC(fi_fabric(prov_use->fabric_attr,      /* In:   Fabric attributes */
+                    &gl_data.fabric,    /* Out:  Fabric descriptor */
+                    NULL), openfabric); /* Context: fabric events  */
+
+    /* ------------------------------------------------------------------------ */
+    /* Create the access domain, which is the physical or virtual network or    */
+    /* hardware port/collection of ports.  Returns a domain object that can be  */
+    /* used to create endpoints.  See man fi_domain for details.                */
+    /* Refine get_info filter for additional capabilities                       */
+    /* threading:  Disable locking, MPICH handles locking model                 */
+    /* control_progress:  enable async progress                                 */
+    /* op_flags:  Specifies default operation to set on all communication.      */
+    /*            In this case, we want remote completion to be set by default  */
+    /* ------------------------------------------------------------------------ */
+    FI_RC(fi_domain(gl_data.fabric,     /* In:  Fabric object             */
+                    prov_use,   /* In:  default domain attributes */
+                    &gl_data.domain,    /* Out: domain object             */
+                    NULL), opendomain); /* Context: Domain events         */
+
+    /* ------------------------------------------------------------------------ */
+    /* Create a transport level communication endpoint.  To use the endpoint,   */
+    /* it must be bound to completion counters or event queues and enabled,     */
+    /* and the resources consumed by it, such as address vectors, counters,     */
+    /* completion queues, etc.                                                  */
+    /* see man fi_endpoint for more details                                     */
+    /* ------------------------------------------------------------------------ */
+    FI_RC(fi_endpoint(gl_data.domain,   /* In: Domain Object        */
+                      prov_use, /* In: Configuration object */
+                      &gl_data.endpoint,        /* Out: Endpoint Object     */
+                      NULL), openep);   /* Context: endpoint events */
+
+    /* ------------------------------------------------------------------------ */
+    /* Create the objects that will be bound to the endpoint.                   */
+    /* The objects include:                                                     */
+    /*     * completion queue for events                                        */
+    /*     * address vector of other endpoint addresses                         */
+    /*     * dynamic memory-spanning memory region                              */
+    /* Other objects could be created (for example), but are unused in netmod   */
+    /*     * counters for incoming writes                                       */
+    /*     * completion counters for put and get                                */
+    /* ------------------------------------------------------------------------ */
+    FI_RC(fi_mr_reg(gl_data.domain,     /* In:  Domain Object              */
+                    0,  /* In:  Lower memory address       */
+                    UINTPTR_MAX,        /* In:  Upper memory address       */
+                    FI_SEND | FI_RECV,  /* In:  Expose MR for read/write   */
+                    0ULL,       /* In:  base MR offset             */
+                    0ULL,       /* In:  requested key              */
+                    0ULL,       /* In:  No flags                   */
+                    &gl_data.mr,        /* Out: memregion object           */
+                    NULL), mr_reg);     /* Context: memregion events       */
+
+    memset(&cq_attr, 0, sizeof(cq_attr));
+    cq_attr.format = FI_CQ_FORMAT_TAGGED;
+    FI_RC(fi_cq_open(gl_data.domain,    /* In:  Domain Object         */
+                     &cq_attr,  /* In:  Configuration object  */
+                     &gl_data.cq,       /* Out: CQ Object             */
+                     NULL), opencq);    /* Context: CQ events         */
+
+    memset(&av_attr, 0, sizeof(av_attr));
+    av_attr.type = FI_AV_MAP;   /* Mapped addressing mode     */
+    FI_RC(fi_av_open(gl_data.domain,    /* In:  Domain Object         */
+                     &av_attr,  /* In:  Configuration object  */
+                     &gl_data.av,       /* Out: AV Object             */
+                     NULL), avopen);    /* Context: AV events         */
+
+    /* --------------------------------------------- */
+    /* Bind the MR, CQ and AV to the endpoint object */
+    /* --------------------------------------------- */
+    FI_RC(fi_ep_bind(gl_data.endpoint, (fid_t) gl_data.mr, 0), bind);
+    FI_RC(fi_ep_bind(gl_data.endpoint, (fid_t) gl_data.cq, FI_SEND | FI_RECV), bind);
+    FI_RC(fi_ep_bind(gl_data.endpoint, (fid_t) gl_data.av, 0), bind);
+
+    /* ------------------------------------- */
+    /* Enable the endpoint for communication */
+    /* This commits the bind operations      */
+    /* ------------------------------------- */
+    FI_RC(fi_enable(gl_data.endpoint), ep_enable);
+
+    /* --------------------------- */
+    /* Free providers info         */
+    /* --------------------------- */
+    fi_freeinfo(prov_use);
+
+    /* ---------------------------------------------------- */
+    /* Exchange endpoint addresses using scalable database  */
+    /* or job launcher, in this case, use PMI interfaces    */
+    /* ---------------------------------------------------- */
+    gl_data.bound_addrlen = sizeof(gl_data.bound_addr);
+    FI_RC(fi_getname((fid_t) gl_data.endpoint, &gl_data.bound_addr,
+                     &gl_data.bound_addrlen), getname);
+
+    /* -------------------------------- */
+    /* Get our business card            */
+    /* -------------------------------- */
+    my_bc = *bc_val_p;
+    MPI_RC(MPID_nem_sfi_get_business_card(pg_rank, bc_val_p, val_max_sz_p));
+
+    /* -------------------------------- */
+    /* Publish the business card        */
+    /* to the KVS                       */
+    /* -------------------------------- */
+    PMI_RC(PMI_KVS_Get_my_name(kvsname, SFI_KVSAPPSTRLEN), pmi);
+    sprintf(key, "SFI-%d", pg_rank);
+
+    PMI_RC(PMI_KVS_Put(kvsname, key, my_bc), pmi);
+    PMI_RC(PMI_KVS_Commit(kvsname), pmi);
+
+    /* -------------------------------- */
+    /* Set the MPI maximum tag value    */
+    /* -------------------------------- */
+    MPIR_Process.attrs.tag_ub = (1 << MPID_TAG_SHIFT) - 1;
+
+    /* --------------------------------- */
+    /* Wait for all the ranks to publish */
+    /* their business card               */
+    /* --------------------------------- */
+    PMI_Barrier();
+
+    /* --------------------------------- */
+    /* Retrieve every rank's address     */
+    /* from KVS and store them in local  */
+    /* table                             */
+    /* --------------------------------- */
+    MPIU_CHKLMEM_MALLOC(addrs, char *, pg_p->size * gl_data.bound_addrlen, mpi_errno, "addrs");
+
+    for (i = 0; i < pg_p->size; ++i) {
+        sprintf(key, "SFI-%d", i);
+
+        PMI_RC(PMI_KVS_Get(kvsname, key, bc, SFI_KVSAPPSTRLEN), pmi);
+        ret = MPIU_Str_get_binary_arg(bc, "SFI",
+                                      (char *) &addrs[i * gl_data.bound_addrlen],
+                                      gl_data.bound_addrlen, &len);
+        MPIU_ERR_CHKANDJUMP((ret != MPIU_STR_SUCCESS && ret != MPIU_STR_NOMEM) ||
+                            (size_t) len != gl_data.bound_addrlen,
+                            mpi_errno, MPI_ERR_OTHER, "**badbusinesscard");
+    }
+
+    /* ---------------------------------------------------- */
+    /* Map the addresses into an address vector             */
+    /* The addressing mode is "map", so we must provide     */
+    /* storage to store the per destination addresses       */
+    /* ---------------------------------------------------- */
+    fi_addrs = MPIU_Malloc(pg_p->size * sizeof(fi_addr_t));
+    FI_RC(fi_av_insert(gl_data.av, addrs, pg_p->size, fi_addrs, 0ULL, NULL), avmap);
+
+    /* ---------------------------------------------------- */
+    /* Insert the ANY_SRC address                           */
+    /* ---------------------------------------------------- */
+    MPIU_CHKLMEM_MALLOC(null_addr, char *, 1 * gl_data.bound_addrlen, mpi_errno, "null_addr");
+    memset(null_addr, 0, gl_data.bound_addrlen);
+
+    FI_RC(fi_av_insert(gl_data.av, null_addr, 1, &gl_data.any_addr, 0ULL, NULL), avmap);
+
+    /* --------------------------------- */
+    /* Store the direct addresses in     */
+    /* the ranks' respective VCs         */
+    /* --------------------------------- */
+    for (i = 0; i < pg_p->size; ++i) {
+        MPIDI_PG_Get_vc(pg_p, i, &vc);
+        VC_SFI(vc)->direct_addr = fi_addrs[i];
+        VC_SFI(vc)->ready = 1;
+    }
+
+    /* --------------------------------------------- */
+    /* Initialize the connection management routines */
+    /* This completes any function handlers and      */
+    /* global data structures, and posts any         */
+    /* persistent communication requests that are    */
+    /* required, like connection management and      */
+    /* startcontig messages                          */
+    /* --------------------------------------------- */
+    MPI_RC(MPID_nem_sfi_cm_init(pg_p, pg_rank));
+  fn_exit:
+    if (fi_addrs)
+        MPIU_Free(fi_addrs);
+    MPIU_CHKLMEM_FREEALL();
+    END_FUNC(FCNAME);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_finalize)
+int MPID_nem_sfi_finalize(void)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret = 0;
+    BEGIN_FUNC(FCNAME);
+
+    /* --------------------------------------------- */
+    /* Syncronization                                */
+    /* Barrier across all ranks in this world        */
+    /* --------------------------------------------- */
+    MPIR_Barrier_impl(MPIR_Process.comm_world, &ret);
+
+    /* --------------------------------------------- */
+    /* Finalize connection management routines       */
+    /* Cancels any persistent/global requests and    */
+    /* frees any resources from cm_init()            */
+    /* --------------------------------------------- */
+    MPI_RC(MPID_nem_sfi_cm_finalize());
+
+    FI_RC(fi_close((fid_t) gl_data.mr), mrclose);
+    FI_RC(fi_close((fid_t) gl_data.av), avclose);
+    FI_RC(fi_close((fid_t) gl_data.endpoint), epclose);
+    FI_RC(fi_close((fid_t) gl_data.cq), cqclose);
+    FI_RC(fi_close((fid_t) gl_data.domain), domainclose);
+    FI_RC(fi_close((fid_t) gl_data.fabric), fabricclose);
+    END_FUNC_RC(FCNAME);
+}
+
+static inline int compile_time_checking()
+{
+    SFI_COMPILE_TIME_ASSERT(sizeof(MPID_nem_sfi_vc_t) <= MPID_NEM_VC_NETMOD_AREA_LEN);
+    SFI_COMPILE_TIME_ASSERT(sizeof(MPID_nem_sfi_req_t) <= MPID_NEM_REQ_NETMOD_AREA_LEN);
+    SFI_COMPILE_TIME_ASSERT(sizeof(iovec_t) == sizeof(MPID_IOV));
+    MPIU_Assert(((void *) &(((iovec_t *) 0)->iov_base)) ==
+                ((void *) &(((MPID_IOV *) 0)->MPID_IOV_BUF)));
+    MPIU_Assert(((void *) &(((iovec_t *) 0)->iov_len)) ==
+                ((void *) &(((MPID_IOV *) 0)->MPID_IOV_LEN)));
+    MPIU_Assert(sizeof(((iovec_t *) 0)->iov_len) == sizeof(((MPID_IOV *) 0)->MPID_IOV_LEN));
+
+    /* ------------------------------------------------------------------------ */
+    /* Generate the MPICH catalog files                                         */
+    /* The high level mpich build scripts inspect MPIU_ERR_ macros to generate  */
+    /* the message catalog.  However, this netmod buries the messages under the */
+    /* FI_RC macros, so the catalog doesn't get generated.  The build system    */
+    /* likely needs a MPIU_ERR_REGISTER macro                                   */
+    /* ------------------------------------------------------------------------ */
+#if 0
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_avmap", "**sfi_avmap %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_tsendto", "**sfi_tsendto %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_trecvfrom", "**sfi_trecvfrom %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_getinfo", "**sfi_getinfo %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_openep", "**sfi_openep %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_openfabric", "**sfi_openfabric %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_opendomain", "**sfi_opendomain %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_opencq", "**sfi_opencq %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_avopen", "**sfi_avopen %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_bind", "**sfi_bind %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_ep_enable", "**sfi_ep_enable %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_getname", "**sfi_getname %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_avclose", "**sfi_avclose %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_epclose", "**sfi_epclose %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_cqclose", "**sfi_cqclose %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_fabricclose", "**sfi_fabricclose %s %d %s %s", a, b, a,
+                  a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_domainclose", "**sfi_domainclose %s %d %s %s", a, b, a,
+                  a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_tsearch", "**sfi_tsearch %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_poll", "**sfi_poll %s %d %s %s", a, b, a, a);
+    MPIU_ERR_SET2(e, MPI_ERR_OTHER, "**sfi_cancel", "**sfi_cancel %s %d %s %s", a, b, a, a);
+#endif
+    return 0;
+}
+
+
+/*
+=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
+
+cvars:
+    - name        : MPIR_CVAR_DUMP_PROVIDERS
+      category    : DEVELOPER
+      type        : boolean
+      default     : false
+      class       : device
+      verbosity   : MPI_T_VERBOSITY_MPIDEV_DETAIL
+      scope       : MPI_T_SCOPE_LOCAL
+      description : >-
+        If true, dump provider information at init
+
+=== END_MPI_T_CVAR_INFO_BLOCK ===
+*/
+static inline int dump_and_choose_providers(info_t * prov, info_t ** prov_use)
+{
+    info_t *p = prov;
+    int i = 0;
+    *prov_use = prov;
+    if (MPIR_CVAR_DUMP_PROVIDERS) {
+        fprintf(stdout, "Dumping Providers(first=%p):\n", prov);
+        while (p) {
+            fprintf(stdout, " ********** Provider %d (%p) *********\n", i++, p);
+            fprintf(stdout, "%-18s: %-#20" PRIx64 "\n", "caps", p->caps);
+            fprintf(stdout, "%-18s: %-#20" PRIx64 "\n", "mode", p->mode);
+            fprintf(stdout, "%-18s: %-#20" PRIx32 "\n", "ep_type", p->ep_type);
+            fprintf(stdout, "%-18s: %-#20" PRIx32 "\n", "addr_format", p->addr_format);
+            fprintf(stdout, "%-18s: %-20lu\n", "src_addrlen", p->src_addrlen);
+            fprintf(stdout, "%-18s: %-20lu\n", "dest_addrlen", p->dest_addrlen);
+            fprintf(stdout, "%-18s: %-20p\n", "src_addr", p->src_addr);
+            fprintf(stdout, "%-18s: %-20p\n", "dest_addr", p->dest_addr);
+            fprintf(stdout, "%-18s: %-20p\n", "connreq", p->connreq);
+            fprintf(stdout, "%-18s: %-20p\n", "tx_attr", p->tx_attr);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".caps", p->tx_attr->caps);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".mode", p->tx_attr->mode);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".op_flags", p->tx_attr->op_flags);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".msg_order", p->tx_attr->msg_order);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".inject_size", p->tx_attr->inject_size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".size", p->tx_attr->size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".iov_limit", p->tx_attr->iov_limit);
+            fprintf(stdout, "%-18s: %-20p\n", "rx_attr", p->rx_attr);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".caps", p->rx_attr->caps);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".mode", p->rx_attr->mode);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".op_flags", p->rx_attr->op_flags);
+            fprintf(stdout, "       %-18s: %-#20" PRIx64 "\n", ".msg_order", p->rx_attr->msg_order);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".total_buffered_recv",
+                    p->rx_attr->total_buffered_recv);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".size", p->rx_attr->size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".iov_limit", p->rx_attr->iov_limit);
+            fprintf(stdout, "%-18s: %-20p\n", "ep_attr", p->ep_attr);
+            fprintf(stdout, "       %-18s: %-#20" PRIx32 "\n", ".protocol", p->ep_attr->protocol);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".max_msg_size", p->ep_attr->max_msg_size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".inject_size", p->ep_attr->inject_size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".total_buffered_recv",
+                    p->ep_attr->total_buffered_recv);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".max_order_raw_size",
+                    p->ep_attr->max_order_raw_size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".max_order_war_size",
+                    p->ep_attr->max_order_war_size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".max_order_waw_size",
+                    p->ep_attr->max_order_waw_size);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".mem_tag_format",
+                    p->ep_attr->mem_tag_format);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".msg_order", p->ep_attr->msg_order);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".tx_ctx_cnt", p->ep_attr->tx_ctx_cnt);
+            fprintf(stdout, "       %-18s: %-20lu\n", ".rx_ctx_cnt", p->ep_attr->rx_ctx_cnt);
+            fprintf(stdout, "%-18s: %-20p\n", "domain_attr", p->domain_attr);
+            fprintf(stdout, "           %-18s: %-20s\n", ".name", p->domain_attr->name);
+            fprintf(stdout, "           %-18s: %-#20" PRIx32 "\n", ".threading",
+                    p->domain_attr->threading);
+            fprintf(stdout, "           %-18s: %-#20" PRIx32 "\n", ".control_progress",
+                    p->domain_attr->control_progress);
+            fprintf(stdout, "           %-18s: %-#20" PRIx32 "\n", ".data_progress",
+                    p->domain_attr->data_progress);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".mr_key_size",
+                    p->domain_attr->mr_key_size);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".cq_data_size",
+                    p->domain_attr->cq_data_size);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".ep_cnt", p->domain_attr->ep_cnt);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".tx_ctx_cnt",
+                    p->domain_attr->tx_ctx_cnt);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".rx_ctx_cnt",
+                    p->domain_attr->rx_ctx_cnt);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".max_ep_tx_ctx",
+                    p->domain_attr->max_ep_tx_ctx);
+            fprintf(stdout, "           %-18s: %-20lu\n", ".max_ep_rx_ctx",
+                    p->domain_attr->max_ep_rx_ctx);
+            fprintf(stdout, "%-18s: %-20p\n", "fabric_attr", p->fabric_attr);
+            fprintf(stdout, "           %-18s: %-20s\n", ".name", p->fabric_attr->name);
+            fprintf(stdout, "           %-18s: %-20s\n", ".prov_name", p->fabric_attr->prov_name);
+            fprintf(stdout, "           %-18s: %-#20" PRIx32 "\n", ".prov_version",
+                    p->fabric_attr->prov_version);
+            p = p->next;
+        }
+    }
+    return i;
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_msg.c b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_msg.c
new file mode 100644
index 0000000..3797f92
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_msg.c
@@ -0,0 +1,237 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#include "sfi_impl.h"
+
+/* ------------------------------------------------------------------------ */
+/* GET_PGID_AND_SET_MATCH macro looks up the process group to find the      */
+/* correct rank in multiple process groups.  The "contigmsg" family of apis */
+/* work on a global scope, not on a communicator scope(like tagged MPI.)    */
+/* The pgid matching is used for uniquely scoping the tag, usually in       */
+/* intercomms and dynamic process management where there are multiple       */
+/* global world spaces with similar ranks in the global space               */
+/* ------------------------------------------------------------------------ */
+#define GET_PGID_AND_SET_MATCH()                                        \
+({                                                                      \
+  if (vc->pg) {                                                         \
+    MPIDI_PG_IdToNum(gl_data.pg_p, &pgid);                              \
+  } else {                                                              \
+    pgid = NO_PGID;                                                     \
+  }                                                                     \
+  match_bits = (uint64_t)MPIR_Process.comm_world->rank <<               \
+    (MPID_PORT_SHIFT);                                                  \
+  if (0 == pgid) {                                                      \
+    match_bits |= (uint64_t)vc->port_name_tag<<                         \
+      (MPID_PORT_SHIFT+MPID_PSOURCE_SHIFT);                             \
+  }                                                                     \
+  match_bits |= pgid;                                                   \
+  match_bits |= MPID_MSG_RTS;                                           \
+})
+
+/* ------------------------------------------------------------------------ */
+/* START_COMM is common code used by the nemesis netmod functions:          */
+/* iSendContig                                                              */
+/* SendNoncontig                                                            */
+/* iStartContigMsg                                                          */
+/* These routines differ slightly in their behaviors, but can share common  */
+/* code to perform the send.  START_COMM provides that common code, which   */
+/* is based on a tagged rendezvous message.                                 */
+/* The rendezvous is implemented with an RTS-CTS-Data send protocol:        */
+/* CTS_POST()   |                                  |                        */
+/* RTS_SEND()   | -------------------------------> | ue_callback()(sfi_cm.c)*/
+/*              |                                  |   pack_buffer()        */
+/*              |                                  |   DATA_POST()          */
+/*              |                                  |   RTS_POST()           */
+/*              |                                  |   CTS_SEND()           */
+/* CTS_MATCH()  | <------------------------------- |                        */
+/* DATA_SEND()  | ===============================> | handle_packet()        */
+/*              |                                  |   notify_ch3_pkt()     */
+/*              v                                  v                        */
+/* ------------------------------------------------------------------------ */
+#define START_COMM()                                                    \
+  ({                                                                    \
+    GET_PGID_AND_SET_MATCH();                                           \
+    VC_READY_CHECK(vc);                                                 \
+    c = 1;                                                              \
+    MPID_cc_incr(sreq->cc_ptr, &c);                                     \
+    MPID_cc_incr(sreq->cc_ptr, &c);                                     \
+    REQ_SFI(sreq)->event_callback   = MPID_nem_sfi_data_callback;       \
+    REQ_SFI(sreq)->pack_buffer      = pack_buffer;                      \
+    REQ_SFI(sreq)->pack_buffer_size = pkt_len;                          \
+    REQ_SFI(sreq)->vc               = vc;                               \
+    REQ_SFI(sreq)->tag              = match_bits;                       \
+                                                                        \
+    MPID_nem_sfi_create_req(&cts_req, 1);                               \
+    cts_req->dev.OnDataAvail         = NULL;                            \
+    cts_req->dev.next                = NULL;                            \
+    REQ_SFI(cts_req)->event_callback = MPID_nem_sfi_cts_recv_callback;  \
+    REQ_SFI(cts_req)->parent         = sreq;                            \
+                                                                        \
+    FI_RC(fi_trecvfrom(gl_data.endpoint,                                \
+                       NULL,                                            \
+                       0,                                               \
+                       gl_data.mr,                                      \
+                       VC_SFI(vc)->direct_addr,                         \
+                       match_bits | MPID_MSG_CTS,                       \
+                       0, /* Exact tag match, no ignore bits */         \
+                       &(REQ_SFI(cts_req)->sfi_context)),trecvfrom);    \
+    FI_RC(fi_tsendto(gl_data.endpoint,                                  \
+                     &REQ_SFI(sreq)->pack_buffer_size,                  \
+                     sizeof(REQ_SFI(sreq)->pack_buffer_size),           \
+                     gl_data.mr,                                        \
+                     VC_SFI(vc)->direct_addr,                           \
+                     match_bits,                                        \
+                     &(REQ_SFI(sreq)->sfi_context)),tsendto);           \
+  })
+
+
+/* ------------------------------------------------------------------------ */
+/* General handler for RTS-CTS-Data protocol.  Waits for the cc counter     */
+/* to hit two (send RTS and receive CTS decrementers) before kicking off the*/
+/* bulk data transfer.  On data send completion, the request can be freed   */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_data_callback)
+static int MPID_nem_sfi_data_callback(cq_tagged_entry_t * wc, MPID_Request * sreq)
+{
+    int complete = 0, mpi_errno = MPI_SUCCESS;
+    MPIDI_VC_t *vc;
+    req_fn reqFn;
+    uint64_t tag = 0;
+    BEGIN_FUNC(FCNAME);
+    if (sreq->cc == 2) {
+        vc = REQ_SFI(sreq)->vc;
+        REQ_SFI(sreq)->tag = tag | MPID_MSG_DATA;
+        FI_RC(fi_tsendto(gl_data.endpoint,
+                         REQ_SFI(sreq)->pack_buffer,
+                         REQ_SFI(sreq)->pack_buffer_size,
+                         gl_data.mr,
+                         VC_SFI(vc)->direct_addr,
+                         wc->tag | MPID_MSG_DATA, (void *) &(REQ_SFI(sreq)->sfi_context)), tsendto);
+    }
+    if (sreq->cc == 1) {
+        if (REQ_SFI(sreq)->pack_buffer)
+            MPIU_Free(REQ_SFI(sreq)->pack_buffer);
+
+        reqFn = sreq->dev.OnDataAvail;
+        if (!reqFn) {
+            MPIDI_CH3U_Request_complete(sreq);
+        }
+        else {
+            vc = REQ_SFI(sreq)->vc;
+            MPI_RC(reqFn(vc, sreq, &complete));
+        }
+    }
+    else {
+        MPIDI_CH3U_Request_complete(sreq);
+    }
+    END_FUNC_RC(FCNAME);
+}
+
+/* ------------------------------------------------------------------------ */
+/* Signals the CTS has been received.  Call MPID_nem_sfi_data_callback on   */
+/* the parent send request to kick off the bulk data transfer               */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_cts_recv_callback)
+static int MPID_nem_sfi_cts_recv_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    MPI_RC(MPID_nem_sfi_data_callback(wc, REQ_SFI(rreq)->parent));
+    MPIDI_CH3U_Request_complete(rreq);
+    END_FUNC_RC(FCNAME);
+}
+
+/* ------------------------------------------------------------------------ */
+/* The nemesis API implementations:                                         */
+/* These functions currently memory copy into a pack buffer before sending  */
+/* To improve performance, we can replace the memory copy with a non-contig */
+/* send (using tsendmsg)                                                    */
+/* For now, the memory copy is the simplest implementation of these         */
+/* functions over a tagged msg interface                                    */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_iSendContig)
+int MPID_nem_sfi_iSendContig(MPIDI_VC_t * vc,
+                             MPID_Request * sreq,
+                             void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
+{
+    int pgid, c, pkt_len, mpi_errno = MPI_SUCCESS;
+    char *pack_buffer;
+    uint64_t match_bits;
+    MPID_Request *cts_req;
+
+    BEGIN_FUNC(FCNAME);
+    MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
+    MPID_nem_sfi_init_req(sreq);
+    pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz;
+    pack_buffer = MPIU_Malloc(pkt_len);
+    MPIU_Assert(pack_buffer);
+    MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
+    MPIU_Memcpy(pack_buffer + sizeof(MPIDI_CH3_Pkt_t), data, data_sz);
+    START_COMM();
+    END_FUNC_RC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_SendNoncontig)
+int MPID_nem_sfi_SendNoncontig(MPIDI_VC_t * vc,
+                               MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz)
+{
+    int c, pgid, pkt_len, mpi_errno = MPI_SUCCESS;
+    char *pack_buffer;
+    MPI_Aint data_sz;
+    uint64_t match_bits;
+    MPID_Request *cts_req;
+
+    BEGIN_FUNC(FCNAME);
+    MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
+    MPIU_Assert(sreq->dev.segment_first == 0);
+
+    data_sz = sreq->dev.segment_size;
+    pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz;
+    pack_buffer = MPIU_Malloc(pkt_len);
+    MPIU_Assert(pack_buffer);
+    MPIU_Memcpy(pack_buffer, hdr, hdr_sz);
+    MPID_Segment_pack(sreq->dev.segment_ptr, 0, &data_sz, pack_buffer + sizeof(MPIDI_CH3_Pkt_t));
+    START_COMM();
+    MPID_nem_sfi_poll(MPID_NONBLOCKING_POLL);
+    END_FUNC_RC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_iStartContigMsg)
+int MPID_nem_sfi_iStartContigMsg(MPIDI_VC_t * vc,
+                                 void *hdr,
+                                 MPIDI_msg_sz_t hdr_sz,
+                                 void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr)
+{
+    int pkt_len, c, pgid, mpi_errno = MPI_SUCCESS;
+    MPID_Request *sreq;
+    MPID_Request *cts_req;
+    char *pack_buffer;
+    uint64_t match_bits;
+    BEGIN_FUNC(FCNAME);
+    MPIU_Assert(hdr_sz <= (MPIDI_msg_sz_t) sizeof(MPIDI_CH3_Pkt_t));
+
+    MPID_nem_sfi_create_req(&sreq, 2);
+    sreq->kind = MPID_REQUEST_SEND;
+    sreq->dev.OnDataAvail = NULL;
+    sreq->dev.next = NULL;
+    pkt_len = sizeof(MPIDI_CH3_Pkt_t) + data_sz;
+    pack_buffer = MPIU_Malloc(pkt_len);
+    MPIU_Assert(pack_buffer);
+    MPIU_Memcpy((void *) pack_buffer, hdr, hdr_sz);
+    if (data_sz)
+        MPIU_Memcpy((void *) (pack_buffer + sizeof(MPIDI_CH3_Pkt_t)), data, data_sz);
+    START_COMM();
+    *sreq_ptr = sreq;
+    END_FUNC_RC(FCNAME);
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_progress.c b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_progress.c
new file mode 100644
index 0000000..8f40aeb
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_progress.c
@@ -0,0 +1,291 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#include "sfi_impl.h"
+
+#define TSEARCH_INIT      0
+#define TSEARCH_NOT_FOUND 1
+#define TSEARCH_FOUND     2
+
+/* ------------------------------------------------------------------------ */
+/* This routine looks up the request that contains a context object         */
+/* ------------------------------------------------------------------------ */
+static inline MPID_Request *context_to_req(void *sfi_context)
+{
+    return (MPID_Request *) container_of(sfi_context, MPID_Request, ch.netmod_area.padding);
+}
+
+/* ------------------------------------------------------------------------ */
+/* Populate the status object from the return of the tsearch                */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(search_complete)
+static int search_complete(uint64_t tag, size_t msglen, MPID_Request * rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    rreq->status.MPI_SOURCE = get_source(tag);
+    rreq->status.MPI_TAG = get_tag(tag);
+    rreq->status.MPI_ERROR = MPI_SUCCESS;
+    MPIR_STATUS_SET_COUNT(rreq->status, msglen);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+/* ------------------------------------------------------------------------ */
+/* Check if wc->data is filled.  If wc->data a message was found            */
+/* and we fill out the status.  Otherwise, it's not found, and we set the   */
+/* state of the search request to 1, not found                              */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(tsearch_callback)
+static int tsearch_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    if (wc->data) {
+        REQ_SFI(rreq)->match_state = TSEARCH_FOUND;
+        rreq->status.MPI_SOURCE = get_source(wc->tag);
+        rreq->status.MPI_TAG = get_tag(wc->tag);
+        MPIR_STATUS_SET_COUNT(rreq->status, wc->len);
+        rreq->status.MPI_ERROR = MPI_SUCCESS;
+    }
+    else {
+        REQ_SFI(rreq)->match_state = TSEARCH_NOT_FOUND;
+    }
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_iprobe_impl)
+int MPID_nem_sfi_iprobe_impl(struct MPIDI_VC *vc,
+                             int source,
+                             int tag,
+                             MPID_Comm * comm,
+                             int context_offset,
+                             int *flag, MPI_Status * status, MPID_Request ** rreq_ptr)
+{
+    int ret, mpi_errno = MPI_SUCCESS;
+    fi_addr_t remote_proc = 0;
+    uint64_t match_bits, mask_bits;
+    size_t len;
+    MPID_Request rreq_s, *rreq;
+
+    BEGIN_FUNC(FCNAME);
+    if (rreq_ptr) {
+        MPIDI_Request_create_rreq(rreq, mpi_errno, goto fn_exit);
+        *rreq_ptr = rreq;
+        rreq->comm = comm;
+        rreq->dev.match.parts.rank = source;
+        rreq->dev.match.parts.tag = tag;
+        rreq->dev.match.parts.context_id = comm->context_id;
+        MPIR_Comm_add_ref(comm);
+    }
+    else {
+        rreq = &rreq_s;
+        rreq->dev.OnDataAvail = NULL;
+    }
+    REQ_SFI(rreq)->event_callback = tsearch_callback;
+    REQ_SFI(rreq)->match_state = TSEARCH_INIT;
+    SFI_ADDR_INIT(source, vc, remote_proc);
+    match_bits = init_recvtag(&mask_bits, comm->context_id + context_offset, source, tag);
+
+    /* ------------------------------------------------------------------------ */
+    /* fi_tsearch:                                                              */
+    /* Initiate a search for a match in the hardware or software queue.         */
+    /* The search can complete immediately with a match found (or not, ENOMSG). */
+    /* It can also enqueue a context entry into the completion queue to make the */
+    /* search nonblocking.  This code will poll until the entry is complete.    */
+    /* ------------------------------------------------------------------------ */
+    ret = fi_tsearch(gl_data.endpoint,  /* Tagged Endpoint      */
+                     &match_bits,       /* Match bits           */
+                     mask_bits, /* Bits to ignore       */
+                     0, /* Flags                */
+                     &remote_proc,      /* Remote Address       */
+                     &len,      /* Out:  incoming msglen */
+                     &(REQ_SFI(rreq)->sfi_context));    /* Nonblocking context  */
+    if (ret == -FI_ENOMSG) {
+        *flag = 0;
+        goto fn_exit;
+    }
+    else if (ret == 1) {
+        *flag = 1;
+        search_complete(match_bits, len, rreq);
+        *status = rreq->status;
+        goto fn_exit;
+    }
+    else {
+        MPIU_ERR_CHKANDJUMP4((ret < 0), mpi_errno, MPI_ERR_OTHER,
+                             "**sfi_tsearch", "**sfi_tsearch %s %d %s %s",
+                             __SHORT_FILE__, __LINE__, FCNAME, fi_strerror(-ret));
+    }
+    while (TSEARCH_INIT == REQ_SFI(rreq)->match_state)
+        MPID_nem_sfi_poll(MPID_BLOCKING_POLL);
+
+    if (REQ_SFI(rreq)->match_state == TSEARCH_NOT_FOUND) {
+        if (rreq_ptr) {
+            MPIDI_CH3_Request_destroy(rreq);
+            *rreq_ptr = NULL;
+        }
+        *flag = 0;
+    }
+    else {
+        *status = rreq->status;
+        *flag = 1;
+    }
+    END_FUNC_RC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_iprobe)
+int MPID_nem_sfi_iprobe(struct MPIDI_VC *vc,
+                        int source,
+                        int tag,
+                        MPID_Comm * comm, int context_offset, int *flag, MPI_Status * status)
+{
+    int rc;
+    BEGIN_FUNC(FCNAME);
+    rc = MPID_nem_sfi_iprobe_impl(vc, source, tag, comm, context_offset, flag, status, NULL);
+    END_FUNC(FCNAME);
+    return rc;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_improbe)
+int MPID_nem_sfi_improbe(struct MPIDI_VC *vc,
+                         int source,
+                         int tag,
+                         MPID_Comm * comm,
+                         int context_offset,
+                         int *flag, MPID_Request ** message, MPI_Status * status)
+{
+    int old_error = status->MPI_ERROR;
+    int s;
+    BEGIN_FUNC(FCNAME);
+    s = MPID_nem_sfi_iprobe_impl(vc, source, tag, comm, context_offset, flag, status, message);
+    if (flag && *flag) {
+        status->MPI_ERROR = old_error;
+        (*message)->kind = MPID_REQUEST_MPROBE;
+    }
+    END_FUNC(FCNAME);
+    return s;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_anysource_iprobe)
+int MPID_nem_sfi_anysource_iprobe(int tag,
+                                  MPID_Comm * comm,
+                                  int context_offset, int *flag, MPI_Status * status)
+{
+    int rc;
+    BEGIN_FUNC(FCNAME);
+    rc = MPID_nem_sfi_iprobe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, status);
+    END_FUNC(FCNAME);
+    return rc;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_anysource_improbe)
+int MPID_nem_sfi_anysource_improbe(int tag,
+                                   MPID_Comm * comm,
+                                   int context_offset,
+                                   int *flag, MPID_Request ** message, MPI_Status * status)
+{
+    int rc;
+    BEGIN_FUNC(FCNAME);
+    rc = MPID_nem_sfi_improbe(NULL, MPI_ANY_SOURCE, tag, comm,
+                              context_offset, flag, message, status);
+    END_FUNC(FCNAME);
+    return rc;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_poll)
+int MPID_nem_sfi_poll(int in_blocking_poll)
+{
+    int complete = 0, mpi_errno = MPI_SUCCESS;
+    ssize_t ret;
+    cq_tagged_entry_t wc;
+    cq_err_entry_t error;
+    MPIDI_VC_t *vc;
+    MPID_Request *req;
+    req_fn reqFn;
+    BEGIN_FUNC(FCNAME);
+    do {
+        /* ----------------------------------------------------- */
+        /* Poll the completion queue                             */
+        /* The strategy here is                                  */
+        /* ret>0 successfull poll, events returned               */
+        /* ret==0 empty poll, no events/no error                 */
+        /* ret<0, error, but some error instances should not     */
+        /* cause MPI to terminate                                */
+        /* ----------------------------------------------------- */
+        ret = fi_cq_read(gl_data.cq,    /* Tagged completion queue       */
+                         (void *) &wc,  /* OUT:  Tagged completion entry */
+                         1);    /* Number of entries to poll     */
+        if (ret > 0) {
+            if (NULL != wc.op_context) {
+                req = context_to_req(wc.op_context);
+                if (REQ_SFI(req)->event_callback) {
+                    MPI_RC(REQ_SFI(req)->event_callback(&wc, req));
+                    continue;
+                }
+                reqFn = req->dev.OnDataAvail;
+                if (reqFn) {
+                    if (REQ_SFI(req)->pack_buffer) {
+                        MPIU_Free(REQ_SFI(req)->pack_buffer);
+                    }
+                    vc = REQ_SFI(req)->vc;
+
+                    complete = 0;
+                    MPI_RC(reqFn(vc, req, &complete));
+                    continue;
+                }
+                else {
+                    MPIU_Assert(0);
+                }
+            }
+            else {
+                MPIU_Assert(0);
+            }
+        }
+        else if (ret < 0) {
+            if (ret == -FI_EAVAIL) {
+                ret = fi_cq_readerr(gl_data.cq, (void *) &error, sizeof(error), 0);
+                if (error.err == FI_EMSGSIZE) {
+                    /* ----------------------------------------------------- */
+                    /* This error message should only be delivered on send   */
+                    /* events.  We want to ignore truncation errors          */
+                    /* on the sender side, but complete the request anyway   */
+                    /* Other kinds of requests, this is fatal.               */
+                    /* ----------------------------------------------------- */
+                    req = context_to_req(error.op_context);
+                    if (req->kind == MPID_REQUEST_SEND) {
+                        mpi_errno = REQ_SFI(req)->event_callback(NULL, req);
+                    }
+                    else if (req->kind == MPID_REQUEST_RECV) {
+                        mpi_errno = REQ_SFI(req)->event_callback(&wc, req);
+                        req->status.MPI_ERROR = MPI_ERR_TRUNCATE;
+                        req->status.MPI_TAG = error.tag;
+                    }
+                    else {
+                        mpi_errno = MPI_ERR_OTHER;
+                    }
+                }
+            }
+            else {
+                MPIU_ERR_CHKANDJUMP4(1, mpi_errno, MPI_ERR_OTHER, "**sfi_poll",
+                                     "**sfi_poll %s %d %s %s", __SHORT_FILE__,
+                                     __LINE__, FCNAME, fi_strerror(-ret));
+            }
+        }
+    } while (in_blocking_poll && (ret > 0));
+    END_FUNC_RC(FCNAME);
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_tagged.c b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_tagged.c
new file mode 100644
index 0000000..2d88c10
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_tagged.c
@@ -0,0 +1,399 @@
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ *
+ *  Portions of this code were written by Intel Corporation.
+ *  Copyright (C) 2011-2012 Intel Corporation.  Intel provides this material
+ *  to Argonne National Laboratory subject to Software Grant and Corporate
+ *  Contributor License Agreement dated February 8, 2012.
+ */
+#include "sfi_impl.h"
+
+#define MPID_NORMAL_SEND 0
+
+/* ------------------------------------------------------------------------ */
+/* Receive callback called after sending a syncronous send acknowledgement. */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_sync_recv_callback)
+static inline int MPID_nem_sfi_sync_recv_callback(cq_tagged_entry_t * wc ATTRIBUTE((unused)),
+                                                  MPID_Request * rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    BEGIN_FUNC(FCNAME);
+
+    MPIDI_CH3U_Recvq_DP(REQ_SFI(rreq)->parent);
+    MPIDI_CH3U_Request_complete(REQ_SFI(rreq)->parent);
+    MPIDI_CH3U_Request_complete(rreq);
+
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+/* ------------------------------------------------------------------------ */
+/* Send done callback                                                       */
+/* Free any temporary/pack buffers and complete the send request            */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_send_callback)
+static inline int MPID_nem_sfi_send_callback(cq_tagged_entry_t * wc ATTRIBUTE((unused)),
+                                             MPID_Request * sreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    if (REQ_SFI(sreq)->pack_buffer)
+        MPIU_Free(REQ_SFI(sreq)->pack_buffer);
+    MPIDI_CH3U_Request_complete(sreq);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+/* ------------------------------------------------------------------------ */
+/* Receive done callback                                                    */
+/* Handle an incoming receive completion event                              */
+/* ------------------------------------------------------------------------ */
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_recv_callback)
+static inline int MPID_nem_sfi_recv_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
+{
+    int err0, err1, src, mpi_errno = MPI_SUCCESS;
+    uint64_t ssend_bits;
+    MPIDI_msg_sz_t sz;
+    MPIDI_VC_t *vc;
+    MPID_Request *sync_req;
+    BEGIN_FUNC(FCNAME);
+    /* ---------------------------------------------------- */
+    /* Populate the MPI Status and unpack noncontig buffer  */
+    /* ---------------------------------------------------- */
+    rreq->status.MPI_ERROR = MPI_SUCCESS;
+    rreq->status.MPI_SOURCE = get_source(wc->tag);
+    rreq->status.MPI_TAG = get_tag(wc->tag);
+    REQ_SFI(rreq)->req_started = 1;
+    MPIR_STATUS_SET_COUNT(rreq->status, wc->len);
+
+    if (REQ_SFI(rreq)->pack_buffer) {
+        MPIDI_CH3U_Buffer_copy(REQ_SFI(rreq)->pack_buffer,
+                               MPIR_STATUS_GET_COUNT(rreq->status),
+                               MPI_BYTE, &err0, rreq->dev.user_buf,
+                               rreq->dev.user_count, rreq->dev.datatype, &sz, &err1);
+        MPIR_STATUS_SET_COUNT(rreq->status, sz);
+        MPIU_Free(REQ_SFI(rreq)->pack_buffer);
+        if (err0 || err1) {
+            rreq->status.MPI_ERROR = MPI_ERR_TYPE;
+        }
+    }
+
+    if ((wc->tag & MPID_PROTOCOL_MASK) == MPID_SYNC_SEND) {
+        /* ---------------------------------------------------- */
+        /* Ack the sync send and wait for the send request      */
+        /* completion(when callback executed.  A protocol bit   */
+        /* MPID_SYNC_SEND_ACK is set in the tag bits to provide */
+        /* separation of MPI messages and protocol messages     */
+        /* ---------------------------------------------------- */
+        vc = REQ_SFI(rreq)->vc;
+        if (!vc) {      /* MPI_ANY_SOURCE -- Post message from status, complete the VC */
+            src = get_source(wc->tag);
+            vc = rreq->comm->vcr[src];
+            MPIU_Assert(vc);
+        }
+        ssend_bits = init_sendtag(rreq->dev.match.parts.context_id,
+                                  rreq->comm->rank, rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK);
+        MPID_nem_sfi_create_req(&sync_req, 1);
+        sync_req->dev.OnDataAvail = NULL;
+        sync_req->dev.next = NULL;
+        REQ_SFI(sync_req)->event_callback = MPID_nem_sfi_sync_recv_callback;
+        REQ_SFI(sync_req)->parent = rreq;
+        FI_RC(fi_tsendto(gl_data.endpoint,
+                         NULL,
+                         0,
+                         gl_data.mr,
+                         VC_SFI(vc)->direct_addr,
+                         ssend_bits, &(REQ_SFI(sync_req)->sfi_context)), tsendto);
+    }
+    else {
+        /* ---------------------------------------------------- */
+        /* Non-syncronous send, complete normally               */
+        /* by removing from the CH3 queue and completing the    */
+        /* request object                                       */
+        /* ---------------------------------------------------- */
+        MPIDI_CH3U_Recvq_DP(rreq);
+        MPIDI_CH3U_Request_complete(rreq);
+    }
+    END_FUNC_RC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(do_isend)
+static inline int do_isend(struct MPIDI_VC *vc,
+                           const void *buf,
+                           int count,
+                           MPI_Datatype datatype,
+                           int dest,
+                           int tag,
+                           MPID_Comm * comm,
+                           int context_offset, struct MPID_Request **request, uint64_t type)
+{
+    int err0, err1, dt_contig, mpi_errno = MPI_SUCCESS;
+    char *send_buffer;
+    uint64_t match_bits, ssend_match, ssend_mask;
+    MPI_Aint dt_true_lb;
+    MPID_Request *sreq = NULL, *sync_req = NULL;
+    MPIDI_msg_sz_t data_sz;
+    MPID_Datatype *dt_ptr;
+    BEGIN_FUNC(FCNAME);
+    VC_READY_CHECK(vc);
+
+    /* ---------------------------------------------------- */
+    /* Create the MPI request                               */
+    /* ---------------------------------------------------- */
+    MPID_nem_sfi_create_req(&sreq, 2);
+    sreq->kind = MPID_REQUEST_SEND;
+    sreq->dev.OnDataAvail = NULL;
+    REQ_SFI(sreq)->event_callback = MPID_nem_sfi_send_callback;
+    REQ_SFI(sreq)->vc = vc;
+
+    /* ---------------------------------------------------- */
+    /* Create the pack buffer (if required), and allocate   */
+    /* a send request                                       */
+    /* ---------------------------------------------------- */
+    match_bits = init_sendtag(comm->context_id + context_offset, comm->rank, tag, type);
+    sreq->dev.match.parts.tag = match_bits;
+    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+    send_buffer = (char *) buf + dt_true_lb;
+    if (!dt_contig) {
+        send_buffer = (char *) MPIU_Malloc(data_sz);
+        MPIU_ERR_CHKANDJUMP1(send_buffer == NULL, mpi_errno,
+                             MPI_ERR_OTHER, "**nomem", "**nomem %s", "Send buffer alloc");
+        MPIDI_CH3U_Buffer_copy(buf, count, datatype, &err0,
+                               send_buffer, data_sz, MPI_BYTE, &data_sz, &err1);
+        REQ_SFI(sreq)->pack_buffer = send_buffer;
+    }
+
+    if (type == MPID_SYNC_SEND) {
+        /* ---------------------------------------------------- */
+        /* For syncronous send, we post a receive to catch the  */
+        /* match ack, but use the tag protocol bits to avoid    */
+        /* matching with MPI level messages.                    */
+        /* ---------------------------------------------------- */
+        int c = 1;
+        MPID_cc_incr(sreq->cc_ptr, &c);
+        MPID_nem_sfi_create_req(&sync_req, 1);
+        sync_req->dev.OnDataAvail = NULL;
+        sync_req->dev.next = NULL;
+        REQ_SFI(sync_req)->event_callback = MPID_nem_sfi_sync_recv_callback;
+        REQ_SFI(sync_req)->parent = sreq;
+        ssend_match = init_recvtag(&ssend_mask, comm->context_id + context_offset, dest, tag);
+        ssend_match |= MPID_SYNC_SEND_ACK;
+        FI_RC(fi_trecvfrom(gl_data.endpoint,    /* endpoint    */
+                           NULL,        /* recvbuf     */
+                           0,   /* data sz     */
+                           gl_data.mr,  /* dynamic mr  */
+                           VC_SFI(vc)->direct_addr,     /* remote proc */
+                           ssend_match, /* match bits  */
+                           0ULL,        /* mask bits   */
+                           &(REQ_SFI(sync_req)->sfi_context)), trecvfrom);
+    }
+    FI_RC(fi_tsendto(gl_data.endpoint,  /* Endpoint                       */
+                     send_buffer,       /* Send buffer(packed or user)    */
+                     data_sz,   /* Size of the send               */
+                     gl_data.mr,        /* Dynamic memory region          */
+                     VC_SFI(vc)->direct_addr,   /* Use the address of this VC     */
+                     match_bits,        /* Match bits                     */
+                     &(REQ_SFI(sreq)->sfi_context)), tsendto);
+    *request = sreq;
+    END_FUNC_RC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_recv_posted)
+int MPID_nem_sfi_recv_posted(struct MPIDI_VC *vc, struct MPID_Request *rreq)
+{
+    int mpi_errno = MPI_SUCCESS, dt_contig, src, tag;
+    uint64_t match_bits = 0, mask_bits = 0;
+    fi_addr_t remote_proc = 0;
+    MPIDI_msg_sz_t data_sz;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype *dt_ptr;
+    MPIR_Context_id_t context_id;
+    char *recv_buffer;
+    BEGIN_FUNC(FCNAME);
+
+    /* ------------------------ */
+    /* Initialize the request   */
+    /* ------------------------ */
+    MPID_nem_sfi_init_req(rreq);
+    REQ_SFI(rreq)->event_callback = MPID_nem_sfi_recv_callback;
+    REQ_SFI(rreq)->vc = vc;
+
+    /* ---------------------------------------------------- */
+    /* Fill out the match info, and allocate the pack buffer */
+    /* a send request                                       */
+    /* ---------------------------------------------------- */
+    src = rreq->dev.match.parts.rank;
+    tag = rreq->dev.match.parts.tag;
+    context_id = rreq->dev.match.parts.context_id;
+    match_bits = init_recvtag(&mask_bits, context_id, src, tag);
+    SFI_ADDR_INIT(src, vc, remote_proc);
+    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype,
+                            dt_contig, data_sz, dt_ptr, dt_true_lb);
+    if (dt_contig) {
+        recv_buffer = (char *) rreq->dev.user_buf + dt_true_lb;
+    }
+    else {
+        recv_buffer = (char *) MPIU_Malloc(data_sz);
+        MPIU_ERR_CHKANDJUMP1(recv_buffer == NULL, mpi_errno, MPI_ERR_OTHER,
+                             "**nomem", "**nomem %s", "Recv Pack Buffer alloc");
+        REQ_SFI(rreq)->pack_buffer = recv_buffer;
+    }
+
+    /* ---------------- */
+    /* Post the receive */
+    /* ---------------- */
+    FI_RC(fi_trecvfrom(gl_data.endpoint,
+                       recv_buffer,
+                       data_sz,
+                       gl_data.mr,
+                       remote_proc,
+                       match_bits, mask_bits, &(REQ_SFI(rreq)->sfi_context)), trecvfrom);
+    MPID_nem_sfi_poll(MPID_NONBLOCKING_POLL);
+    END_FUNC_RC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_send)
+int MPID_nem_sfi_send(struct MPIDI_VC *vc,
+                      const void *buf,
+                      int count,
+                      MPI_Datatype datatype,
+                      int dest,
+                      int tag, MPID_Comm * comm, int context_offset, struct MPID_Request **request)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    BEGIN_FUNC(FCNAME);
+    mpi_errno = do_isend(vc, buf, count, datatype, dest, tag,
+                         comm, context_offset, request, MPID_NORMAL_SEND);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_isend)
+int MPID_nem_sfi_isend(struct MPIDI_VC *vc,
+                       const void *buf,
+                       int count,
+                       MPI_Datatype datatype,
+                       int dest,
+                       int tag, MPID_Comm * comm, int context_offset, struct MPID_Request **request)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    mpi_errno = do_isend(vc, buf, count, datatype, dest,
+                         tag, comm, context_offset, request, MPID_NORMAL_SEND);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_ssend)
+int MPID_nem_sfi_ssend(struct MPIDI_VC *vc,
+                       const void *buf,
+                       int count,
+                       MPI_Datatype datatype,
+                       int dest,
+                       int tag, MPID_Comm * comm, int context_offset, struct MPID_Request **request)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    mpi_errno = do_isend(vc, buf, count, datatype, dest,
+                         tag, comm, context_offset, request, MPID_SYNC_SEND);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_issend)
+int MPID_nem_sfi_issend(struct MPIDI_VC *vc,
+                        const void *buf,
+                        int count,
+                        MPI_Datatype datatype,
+                        int dest,
+                        int tag,
+                        MPID_Comm * comm, int context_offset, struct MPID_Request **request)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    mpi_errno = do_isend(vc, buf, count, datatype, dest,
+                         tag, comm, context_offset, request, MPID_SYNC_SEND);
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
+
+#define DO_CANCEL(req)                                  \
+({                                                      \
+  int mpi_errno = MPI_SUCCESS;                          \
+  int ret;                                              \
+  BEGIN_FUNC(FCNAME);                                   \
+  MPID_nem_sfi_poll(MPID_NONBLOCKING_POLL);             \
+  ret = fi_cancel((fid_t)gl_data.endpoint,              \
+                  &(REQ_SFI(req)->sfi_context));        \
+  if (ret == 0) {                                        \
+    MPIR_STATUS_SET_CANCEL_BIT(req->status, TRUE);      \
+  } else {                                              \
+    MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);     \
+  }                                                     \
+  END_FUNC(FCNAME);                                     \
+  return mpi_errno;                                     \
+})
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_cancel_send)
+int MPID_nem_sfi_cancel_send(struct MPIDI_VC *vc ATTRIBUTE((unused)), struct MPID_Request *sreq)
+{
+    DO_CANCEL(sreq);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_cancel_recv)
+int MPID_nem_sfi_cancel_recv(struct MPIDI_VC *vc ATTRIBUTE((unused)), struct MPID_Request *rreq)
+{
+    DO_CANCEL(rreq);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_anysource_posted)
+void MPID_nem_sfi_anysource_posted(MPID_Request * rreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+    BEGIN_FUNC(FCNAME);
+    mpi_errno = MPID_nem_sfi_recv_posted(NULL, rreq);
+    MPIU_Assert(mpi_errno == MPI_SUCCESS);
+    END_FUNC(FCNAME);
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_sfi_anysource_matched)
+int MPID_nem_sfi_anysource_matched(MPID_Request * rreq)
+{
+    int mpi_errno = FALSE;
+    int ret;
+    BEGIN_FUNC(FCNAME);
+    /* ----------------------------------------------------- */
+    /* Netmod has notified us that it has matched an any     */
+    /* source request on another device.  We have the chance */
+    /* to cancel this shared request if it has been posted   */
+    /* ----------------------------------------------------- */
+    ret = fi_cancel((fid_t) gl_data.endpoint, &(REQ_SFI(rreq)->sfi_context));
+    if (ret == 0) {
+        /* --------------------------------------------------- */
+        /* Request cancelled:  cancel and complete the request */
+        /* --------------------------------------------------- */
+        mpi_errno = TRUE;
+        MPIR_STATUS_SET_CANCEL_BIT(rreq->status, TRUE);
+        MPIR_STATUS_SET_COUNT(rreq->status, 0);
+        MPIDI_CH3U_Request_complete(rreq);
+    }
+    END_FUNC(FCNAME);
+    return mpi_errno;
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/sfi/subconfigure.m4 b/src/mpid/ch3/channels/nemesis/netmod/sfi/subconfigure.m4
new file mode 100644
index 0000000..361f7d0
--- /dev/null
+++ b/src/mpid/ch3/channels/nemesis/netmod/sfi/subconfigure.m4
@@ -0,0 +1,24 @@
+[#] start of __file__
+dnl MPICH_SUBCFG_AFTER=src/mpid/ch3/channels/nemesis
+
+AC_DEFUN([PAC_SUBCFG_PREREQ_]PAC_SUBCFG_AUTO_SUFFIX,[
+    AM_COND_IF([BUILD_CH3_NEMESIS],[
+        for net in $nemesis_networks ; do
+            AS_CASE([$net],[sfi],[build_nemesis_netmod_sfi=yes])
+        done
+    ])
+    AM_CONDITIONAL([BUILD_NEMESIS_NETMOD_SFI],[test "X$build_nemesis_netmod_sfi" = "Xyes"])
+])dnl
+
+AC_DEFUN([PAC_SUBCFG_BODY_]PAC_SUBCFG_AUTO_SUFFIX,[
+AM_COND_IF([BUILD_NEMESIS_NETMOD_SFI],[
+    AC_MSG_NOTICE([RUNNING CONFIGURE FOR ch3:nemesis:sfi])
+
+    PAC_SET_HEADER_LIB_PATH(sfi)
+    PAC_CHECK_HEADER_LIB_FATAL(sfi, rdma/fabric.h, fabric, fi_getinfo)
+
+    AC_DEFINE([ENABLE_COMM_OVERRIDES], 1, [define to add per-vc function pointers to override send and recv functions])
+])dnl end AM_COND_IF(BUILD_NEMESIS_NETMOD_SFI,...)
+])dnl end _BODY
+
+[#] end of __file__

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/Makefile.mk   |    1 +
 .../ch3/channels/nemesis/netmod/sfi/Makefile.mk    |   19 +
 .../ch3/channels/nemesis/netmod/sfi/errnames.txt   |   42 ++
 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_cm.c  |  577 ++++++++++++++++++++
 .../ch3/channels/nemesis/netmod/sfi/sfi_data.c     |   58 ++
 .../ch3/channels/nemesis/netmod/sfi/sfi_impl.h     |  342 ++++++++++++
 .../ch3/channels/nemesis/netmod/sfi/sfi_init.c     |  461 ++++++++++++++++
 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_msg.c |  237 ++++++++
 .../ch3/channels/nemesis/netmod/sfi/sfi_progress.c |  291 ++++++++++
 .../ch3/channels/nemesis/netmod/sfi/sfi_tagged.c   |  399 ++++++++++++++
 .../channels/nemesis/netmod/sfi/subconfigure.m4    |   24 +
 11 files changed, 2451 insertions(+), 0 deletions(-)
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/Makefile.mk
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/errnames.txt
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_cm.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_data.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_impl.h
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_init.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_msg.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_progress.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/sfi_tagged.c
 create mode 100644 src/mpid/ch3/channels/nemesis/netmod/sfi/subconfigure.m4


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list