[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-232-gffe22da

Service Account noreply at mpich.org
Tue Mar 1 16:15:11 CST 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  ffe22da48f81a07cc9b262dbab3928860051233d (commit)
       via  3adf88e0dc2a0276d639b7adadb90f138fb1c17b (commit)
       via  3efdc578124b8b3b3004b8da376b8710bd05b399 (commit)
      from  5799e199620d5fcd3173a62cf5ac80bd75db7192 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/ffe22da48f81a07cc9b262dbab3928860051233d

commit ffe22da48f81a07cc9b262dbab3928860051233d
Author: Mikhail Shiryaev <mikhail.shiryaev at intel.com>
Date:   Tue Feb 2 20:17:05 2016 +0300

    OFI-netmod: formatting changes.
    
    (intel) Change-Id: Icf96eaae1753365c4932e3190c39003ca2ac6859

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index def7b68..0cb92dc 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -69,23 +69,25 @@ typedef int (*req_fn) (MPIDI_VC_t *, MPID_Request *, int *);
 /* ******************************** */
 /* Global Object for state tracking */
 /* ******************************** */
-typedef struct MPID_nem_ofi_global_t {
-    char bound_addr[OFI_MAX_ADDR_LEN];       /* This ranks bound address    */
-    size_t bound_addrlen;       /* length of the bound address */
-    struct fid_fabric *fabric;  /* fabric object               */
-    struct fid_domain *domain;  /* domain object               */
-    struct fid_ep *endpoint;    /* endpoint object             */
-    struct fid_cq *cq;          /* completion queue            */
-    struct fid_av *av;          /* address vector              */
-    struct fid_mr *mr;          /* memory region               */
-    size_t iov_limit;           /* Max send iovec limit        */
-    size_t max_buffered_send;   /* Buffered send threshold     */
-    int rts_cts_in_flight;
-    int api_set;
-    MPID_Request *persistent_req;       /* Unexpected request queue    */
-    MPID_Request *conn_req;      /* Connection request          */
-    MPIDI_PG_t *pg_p;            /* MPI Process group           */
-    MPIDI_VC_t *cm_vcs;          /* temporary VC's              */
+typedef struct {
+    char bound_addr[OFI_MAX_ADDR_LEN]; /* This ranks bound address    */
+    size_t bound_addrlen;              /* length of the bound address */
+    struct fid_fabric *fabric;         /* fabric object               */
+    struct fid_domain *domain;         /* domain object               */
+    struct fid_ep *endpoint;           /* endpoint object             */
+    struct fid_cq *cq;                 /* completion queue            */
+    struct fid_av *av;                 /* address vector              */
+    struct fid_mr *mr;                 /* memory region               */
+    size_t iov_limit;                  /* Max send iovec limit        */
+    size_t max_buffered_send;          /* Buffered send threshold     */
+    int rts_cts_in_flight;             /* Count of incompleted        */
+                                       /*   RTS-CTS-DATA exchanges    */
+    int api_set;                       /* Used OFI API for send       */
+                                       /*   operations                */
+    MPID_Request *persistent_req;      /* Unexpected request queue    */
+    MPID_Request *conn_req;            /* Connection request          */
+    MPIDI_PG_t *pg_p;                  /* MPI Process group           */
+    MPIDI_VC_t *cm_vcs;                /* temporary VC's              */
 } MPID_nem_ofi_global_t __attribute__ ((aligned (MPID_NEM_CACHE_LINE_LEN)));
 
 /* ******************************** */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index ec8248d..26c5b5b 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -124,9 +124,9 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     /* returns a list.  see man fi_fabric for details                           */
     /* ------------------------------------------------------------------------ */
     dump_and_choose_providers(prov_tagged, &prov_use);
-    FI_RC(fi_fabric(prov_use->fabric_attr,      /* In:   Fabric attributes */
-                    &gl_data.fabric,    /* Out:  Fabric descriptor */
-                    NULL), openfabric); /* Context: fabric events  */
+    FI_RC(fi_fabric(prov_use->fabric_attr, /* In:   Fabric attributes */
+                    &gl_data.fabric,       /* Out:  Fabric descriptor */
+                    NULL), openfabric);    /* Context: fabric events  */
 
     gl_data.iov_limit = prov_use->tx_attr->iov_limit;
     gl_data.max_buffered_send = prov_use->tx_attr->inject_size;
@@ -142,7 +142,7 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     /*            In this case, we want remote completion to be set by default  */
     /* ------------------------------------------------------------------------ */
     FI_RC(fi_domain(gl_data.fabric,     /* In:  Fabric object             */
-                    prov_use,   /* In:  default domain attributes */
+                    prov_use,           /* In:  default domain attributes */
                     &gl_data.domain,    /* Out: domain object             */
                     NULL), opendomain); /* Context: Domain events         */
 
@@ -153,10 +153,10 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     /* completion queues, etc.                                                  */
     /* see man fi_endpoint for more details                                     */
     /* ------------------------------------------------------------------------ */
-    FI_RC(fi_endpoint(gl_data.domain,   /* In: Domain Object        */
-                      prov_use, /* In: Configuration object */
-                      &gl_data.endpoint,        /* Out: Endpoint Object     */
-                      NULL), openep);   /* Context: endpoint events */
+    FI_RC(fi_endpoint(gl_data.domain,    /* In: Domain Object        */
+                      prov_use,          /* In: Configuration object */
+                      &gl_data.endpoint, /* Out: Endpoint Object     */
+                      NULL), openep);    /* Context: endpoint events */
 
     /* ------------------------------------------------------------------------ */
     /* Create the objects that will be bound to the endpoint.                   */
@@ -171,14 +171,14 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     memset(&cq_attr, 0, sizeof(cq_attr));
     cq_attr.format = FI_CQ_FORMAT_TAGGED;
     FI_RC(fi_cq_open(gl_data.domain,    /* In:  Domain Object         */
-                     &cq_attr,  /* In:  Configuration object  */
+                     &cq_attr,          /* In:  Configuration object  */
                      &gl_data.cq,       /* Out: CQ Object             */
                      NULL), opencq);    /* Context: CQ events         */
 
     memset(&av_attr, 0, sizeof(av_attr));
-    av_attr.type = FI_AV_MAP;   /* Mapped addressing mode     */
+    av_attr.type = FI_AV_MAP;           /* Mapped addressing mode     */
     FI_RC(fi_av_open(gl_data.domain,    /* In:  Domain Object         */
-                     &av_attr,  /* In:  Configuration object  */
+                     &av_attr,          /* In:  Configuration object  */
                      &gl_data.av,       /* Out: AV Object             */
                      NULL), avopen);    /* Context: AV events         */
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
index 3405722..c66191e 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c
@@ -79,7 +79,7 @@
         REQ_OFI(cts_req)->event_callback = MPID_nem_ofi_cts_recv_callback; \
         REQ_OFI(cts_req)->parent         = sreq;                        \
                                                                         \
-        FI_RC_RETRY(fi_trecv(gl_data.endpoint,                                \
+        FI_RC_RETRY(fi_trecv(gl_data.endpoint,                          \
                        NULL,                                            \
                        0,                                               \
                        gl_data.mr,                                      \
@@ -88,7 +88,7 @@
                        0, /* Exact tag match, no ignore bits */         \
                        &(REQ_OFI(cts_req)->ofi_context)),trecv);        \
         if (gl_data.api_set == API_SET_1){                              \
-            FI_RC_RETRY(fi_tsend(gl_data.endpoint,                            \
+            FI_RC_RETRY(fi_tsend(gl_data.endpoint,                      \
                            &REQ_OFI(sreq)->pack_buffer_size,            \
                            sizeof(REQ_OFI(sreq)->pack_buffer_size),     \
                            gl_data.mr,                                  \
@@ -96,7 +96,7 @@
                            match_bits,                                  \
                            &(REQ_OFI(sreq)->ofi_context)),tsend);       \
         }else{                                                          \
-            FI_RC_RETRY(fi_tsenddata(gl_data.endpoint,                        \
+            FI_RC_RETRY(fi_tsenddata(gl_data.endpoint,                  \
                                &REQ_OFI(sreq)->pack_buffer_size,        \
                                sizeof(REQ_OFI(sreq)->pack_buffer_size), \
                                gl_data.mr,                              \
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
index 76260c2..f57ceee 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
@@ -169,13 +169,13 @@ static inline int ADD_SUFFIX(send_normal)(struct MPIDI_VC *vc,
         ssend_match = init_recvtag_2(&ssend_mask, comm->context_id + context_offset, tag);
 #endif
         ssend_match |= MPID_SYNC_SEND_ACK;
-        FI_RC_RETRY(fi_trecv(gl_data.endpoint,    /* endpoint    */
-                           NULL,        /* recvbuf     */
-                           0,   /* data sz     */
-                           gl_data.mr,  /* dynamic mr  */
-                           VC_OFI(vc)->direct_addr,     /* remote proc */
-                           ssend_match, /* match bits  */
-                           0ULL,        /* mask bits   */
+        FI_RC_RETRY(fi_trecv(gl_data.endpoint,      /* endpoint    */
+                           NULL,                    /* recvbuf     */
+                           0,                       /* data sz     */
+                           gl_data.mr,              /* dynamic mr  */
+                           VC_OFI(vc)->direct_addr, /* remote proc */
+                           ssend_match,             /* match bits  */
+                           0ULL,                    /* mask bits   */
                            &(REQ_OFI(sync_req)->ofi_context)), trecv);
     }
 
@@ -196,19 +196,19 @@ static inline int ADD_SUFFIX(send_normal)(struct MPIDI_VC *vc,
     }
     else
 #if API_SET == API_SET_1
-    FI_RC_RETRY(fi_tsend(gl_data.endpoint,  /* Endpoint                       */
+        FI_RC_RETRY(fi_tsend(gl_data.endpoint,          /* Endpoint                       */
 #elif API_SET == API_SET_2
-    FI_RC_RETRY(fi_tsenddata(gl_data.endpoint,  /* Endpoint                       */
+        FI_RC_RETRY(fi_tsenddata(gl_data.endpoint,      /* Endpoint                       */
 #endif
-        send_buffer,       /* Send buffer(packed or user)    */
-        data_sz,   /* Size of the send               */
-        gl_data.mr,        /* Dynamic memory region          */
+                               send_buffer,             /* Send buffer(packed or user)    */
+                               data_sz,                 /* Size of the send               */
+                               gl_data.mr,              /* Dynamic memory region          */
 #if API_SET == API_SET_2
-        comm->rank,
+                               comm->rank,
 #endif
-        VC_OFI(vc)->direct_addr,   /* Use the address of this VC     */
-        match_bits,        /* Match bits                     */
-        &(REQ_OFI(sreq)->ofi_context)), tsend);
+                               VC_OFI(vc)->direct_addr, /* Use the address of this VC     */
+                               match_bits,              /* Match bits                     */
+                               &(REQ_OFI(sreq)->ofi_context)), tsend);
 
     *request = sreq;
 

http://git.mpich.org/mpich.git/commitdiff/3adf88e0dc2a0276d639b7adadb90f138fb1c17b

commit 3adf88e0dc2a0276d639b7adadb90f138fb1c17b
Author: Mikhail Shiryaev <mikhail.shiryaev at intel.com>
Date:   Tue Feb 2 15:54:26 2016 +0300

    OFI-netmod: reorganize global structure.
    
    Removed unused fields (any_addr, comm_ops).
    Aligned structure by cache line.
    Placed max_buffered_send and endpoint in one cache line (they are boths used on critical send path).
    
    (intel) Change-Id: Ia082bcadcab3305f849080bb128b6129667cc137

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index ccf17d5..def7b68 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -69,9 +69,8 @@ typedef int (*req_fn) (MPIDI_VC_t *, MPID_Request *, int *);
 /* ******************************** */
 /* Global Object for state tracking */
 /* ******************************** */
-typedef struct {
+typedef struct MPID_nem_ofi_global_t {
     char bound_addr[OFI_MAX_ADDR_LEN];       /* This ranks bound address    */
-    fi_addr_t any_addr;         /* Specifies any source        */
     size_t bound_addrlen;       /* length of the bound address */
     struct fid_fabric *fabric;  /* fabric object               */
     struct fid_domain *domain;  /* domain object               */
@@ -79,16 +78,15 @@ typedef struct {
     struct fid_cq *cq;          /* completion queue            */
     struct fid_av *av;          /* address vector              */
     struct fid_mr *mr;          /* memory region               */
-    MPIDI_PG_t *pg_p;           /* MPI Process group           */
-    MPIDI_VC_t *cm_vcs;         /* temporary VC's              */
-    MPID_Request *persistent_req;       /* Unexpected request queue    */
-    MPID_Request *conn_req;     /* Connection request          */
-    MPIDI_Comm_ops_t comm_ops;
     size_t iov_limit;           /* Max send iovec limit        */
     size_t max_buffered_send;   /* Buffered send threshold     */
     int rts_cts_in_flight;
     int api_set;
-} MPID_nem_ofi_global_t;
+    MPID_Request *persistent_req;       /* Unexpected request queue    */
+    MPID_Request *conn_req;      /* Connection request          */
+    MPIDI_PG_t *pg_p;            /* MPI Process group           */
+    MPIDI_VC_t *cm_vcs;          /* temporary VC's              */
+} MPID_nem_ofi_global_t __attribute__ ((aligned (MPID_NEM_CACHE_LINE_LEN)));
 
 /* ******************************** */
 /* Device channel specific data     */
@@ -224,7 +222,7 @@ fn_fail:                      \
     remote_proc = VC_OFI(vc)->direct_addr;  \
   } else {                                  \
     MPIU_Assert(vc == NULL);                \
-    remote_proc = gl_data.any_addr;         \
+    remote_proc = FI_ADDR_UNSPEC;           \
   }                                         \
 })
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index ac9061c..ec8248d 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -268,12 +268,6 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
     fi_addrs = MPL_malloc(pg_p->size * sizeof(fi_addr_t));
     FI_RC(fi_av_insert(gl_data.av, addrs, pg_p->size, fi_addrs, 0ULL, NULL), avmap);
 
-    /* ---------------------------------------------------- */
-    /* Insert the ANY_SRC address                           */
-    /* ---------------------------------------------------- */
-
-    gl_data.any_addr = FI_ADDR_UNSPEC;
-
     /* --------------------------------- */
     /* Store the direct addresses in     */
     /* the ranks' respective VCs         */

http://git.mpich.org/mpich.git/commitdiff/3efdc578124b8b3b3004b8da376b8710bd05b399

commit 3efdc578124b8b3b3004b8da376b8710bd05b399
Author: Mikhail Shiryaev <mikhail.shiryaev at intel.com>
Date:   Tue Feb 2 15:03:44 2016 +0300

    OFI-netmod: added fi_tinject on send path.
    
    Ported fi_tinject approach for send path from CH4/OFI-netmod.
    Added creation of lightweight request for quick path.
    
    (intel) Change-Id: Ic6f6dd118cf7203bf8c80b31bd52e0e76b354d10

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index b66bfae..ccf17d5 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -85,6 +85,7 @@ typedef struct {
     MPID_Request *conn_req;     /* Connection request          */
     MPIDI_Comm_ops_t comm_ops;
     size_t iov_limit;           /* Max send iovec limit        */
+    size_t max_buffered_send;   /* Buffered send threshold     */
     int rts_cts_in_flight;
     int api_set;
 } MPID_nem_ofi_global_t;
@@ -249,12 +250,48 @@ static inline int MPID_nem_ofi_create_req(MPID_Request ** request, int refcnt)
     MPID_Request *req;
     req = MPID_Request_create();
     MPIU_Assert(req);
+    MPIDI_Request_clear_dbg(req);
     MPIU_Object_set_ref(req, refcnt);
     MPID_nem_ofi_init_req(req);
     *request = req;
     return mpi_errno;
 }
 
+static inline int MPID_nem_ofi_create_req_lw(MPID_Request ** request, int refcnt)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Request *req;
+
+    req = (MPID_Request *) MPIU_Handle_obj_alloc(&MPID_Request_mem);
+    if (req == NULL)
+        MPID_Abort(NULL, MPI_ERR_NO_SPACE, -1, "Cannot allocate Request");
+
+    MPIU_Assert(req != NULL);
+    MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);
+
+    MPIU_Object_set_ref(req, refcnt);
+    req->kind = MPID_REQUEST_SEND;
+    MPIR_cc_set(&req->cc, 0); // request is already completed
+    req->cc_ptr  = &req->cc;
+    req->status.MPI_ERROR  = MPI_SUCCESS;
+    MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
+    req->comm = NULL;
+    req->greq_fns = NULL;
+    req->errflag = MPIR_ERR_NONE;
+    req->request_completed_cb = NULL;
+    req->dev.state = 0;
+    req->dev.datatype_ptr = NULL;
+    req->dev.segment_ptr = NULL;
+    req->dev.flags = MPIDI_CH3_PKT_FLAG_NONE;
+    req->dev.OnDataAvail = NULL;
+    req->dev.ext_hdr_ptr = NULL;
+    MPIDI_Request_clear_dbg(req);
+
+    MPID_nem_ofi_init_req(req);
+
+    *request = req;
+    return mpi_errno;
+}
 
 /* ************************************************************************** */
 /* MPICH Comm Override and Netmod functions                                   */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index c778af8..ac9061c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -129,6 +129,7 @@ int MPID_nem_ofi_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_
                     NULL), openfabric); /* Context: fabric events  */
 
     gl_data.iov_limit = prov_use->tx_attr->iov_limit;
+    gl_data.max_buffered_send = prov_use->tx_attr->inject_size;
     gl_data.api_set = API_SET_1;
     /* ------------------------------------------------------------------------ */
     /* Create the access domain, which is the physical or virtual network or    */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged.c
index cd05d03..e616586 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged.c
@@ -10,6 +10,10 @@
 #include "ofi_impl.h"
 
 #define MPID_NORMAL_SEND 0
+
+#define MPID_CREATE_REQ      0
+#define MPID_DONT_CREATE_REQ 1
+
 static inline int
 MPID_nem_ofi_sync_recv_callback(cq_tagged_entry_t * wc ATTRIBUTE((unused)),
                                 MPID_Request * rreq);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
index 458b455..76260c2 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_tagged_template.c
@@ -104,29 +104,22 @@ int ADD_SUFFIX(MPID_nem_ofi_recv_callback)(cq_tagged_entry_t * wc, MPID_Request
     END_FUNC_RC(FCNAME);
 }
 
-
 #undef FCNAME
-#define FCNAME DECL_FUNC(do_isend)
-static inline int
-ADD_SUFFIX(do_isend)(struct MPIDI_VC *vc,
-         const void *buf,
-         MPI_Aint count,
-         MPI_Datatype datatype,
-         int dest,
-         int tag,
-         MPID_Comm * comm,
-         int context_offset, struct MPID_Request **request, uint64_t type)
+#define FCNAME DECL_FUNC(send_normal)
+static inline int ADD_SUFFIX(send_normal)(struct MPIDI_VC *vc,
+                              const void *buf, int count, MPI_Datatype datatype,
+                              int dest, int tag, MPID_Comm *comm,
+                              int context_offset, MPID_Request **request,
+                              int dt_contig,
+                              intptr_t data_sz,
+                              MPID_Datatype *dt_ptr,
+                              MPI_Aint dt_true_lb,
+                              uint64_t send_type)
 {
-    int err0, err1, dt_contig, mpi_errno = MPI_SUCCESS;
+    int err0, err1, mpi_errno = MPI_SUCCESS;
     char *send_buffer;
     uint64_t match_bits, ssend_match, ssend_mask;
-    MPI_Aint dt_true_lb;
     MPID_Request *sreq = NULL, *sync_req = NULL;
-    intptr_t data_sz;
-    MPID_Datatype *dt_ptr;
-    BEGIN_FUNC(FCNAME);
-    VC_READY_CHECK(vc);
-
     /* ---------------------------------------------------- */
     /* Create the MPI request                               */
     /* ---------------------------------------------------- */
@@ -141,13 +134,12 @@ ADD_SUFFIX(do_isend)(struct MPIDI_VC *vc,
     /* a send request                                       */
     /* ---------------------------------------------------- */
 #if API_SET == API_SET_1
-    match_bits = init_sendtag(comm->context_id + context_offset, comm->rank, tag, type);
+    match_bits = init_sendtag(comm->context_id + context_offset, comm->rank, tag, send_type);
 #elif API_SET == API_SET_2
-    match_bits = init_sendtag_2(comm->context_id + context_offset, tag, type);
+    match_bits = init_sendtag_2(comm->context_id + context_offset, tag, send_type);
 #endif
 
     sreq->dev.match.parts.tag = match_bits;
-    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
     send_buffer = (char *) buf + dt_true_lb;
     if (!dt_contig) {
         send_buffer = (char *) MPL_malloc(data_sz);
@@ -158,7 +150,7 @@ ADD_SUFFIX(do_isend)(struct MPIDI_VC *vc,
         REQ_OFI(sreq)->pack_buffer = send_buffer;
     }
 
-    if (type == MPID_SYNC_SEND) {
+    if (send_type == MPID_SYNC_SEND) {
         /* ---------------------------------------------------- */
         /* For syncronous send, we post a receive to catch the  */
         /* match ack, but use the tag protocol bits to avoid    */
@@ -186,6 +178,23 @@ ADD_SUFFIX(do_isend)(struct MPIDI_VC *vc,
                            0ULL,        /* mask bits   */
                            &(REQ_OFI(sync_req)->ofi_context)), trecv);
     }
+
+    if (data_sz <= gl_data.max_buffered_send) {
+#if API_SET == API_SET_1
+        FI_RC_RETRY(fi_tinject(gl_data.endpoint,
+#elif API_SET == API_SET_2
+        FI_RC_RETRY(fi_tinjectdata(gl_data.endpoint,
+#endif
+                               send_buffer,
+                               data_sz,
+#if API_SET == API_SET_2
+                               comm->rank,
+#endif
+                               VC_OFI(vc)->direct_addr,
+                               match_bits), tinject);
+        MPID_nem_ofi_send_callback(NULL, sreq);
+    }
+    else
 #if API_SET == API_SET_1
     FI_RC_RETRY(fi_tsend(gl_data.endpoint,  /* Endpoint                       */
 #elif API_SET == API_SET_2
@@ -200,8 +209,97 @@ ADD_SUFFIX(do_isend)(struct MPIDI_VC *vc,
         VC_OFI(vc)->direct_addr,   /* Use the address of this VC     */
         match_bits,        /* Match bits                     */
         &(REQ_OFI(sreq)->ofi_context)), tsend);
+
     *request = sreq;
-    END_FUNC_RC(FCNAME);
+
+fn_exit:
+    return mpi_errno;
+fn_fail:
+    goto fn_exit;
+}
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(send_lightweight)
+static inline int
+ADD_SUFFIX(send_lightweight)(struct MPIDI_VC *vc,
+                             const void *buf,
+                             intptr_t data_sz,
+                             int rank,
+                             int tag,
+                             MPID_Comm *comm,
+                             int context_offset)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+#if API_SET == API_SET_1
+    uint64_t match_bits = init_sendtag(comm->context_id + context_offset, comm->rank, tag, MPID_NORMAL_SEND);
+#elif API_SET == API_SET_2
+    uint64_t match_bits = init_sendtag_2(comm->context_id + context_offset, tag, MPID_NORMAL_SEND);
+#endif
+
+    MPIU_Assert(data_sz <= gl_data.max_buffered_send);
+
+#if API_SET == API_SET_1
+    FI_RC_RETRY(fi_tinject(gl_data.endpoint,
+#elif API_SET == API_SET_2
+    FI_RC_RETRY(fi_tinjectdata(gl_data.endpoint,
+#endif
+                           buf,
+                           data_sz,
+#if API_SET == API_SET_2
+                           comm->rank,
+#endif
+                           VC_OFI(vc)->direct_addr,
+                           match_bits), tinject);
+  fn_exit:
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+
+#undef FCNAME
+#define FCNAME DECL_FUNC(do_isend)
+static inline int
+ADD_SUFFIX(do_isend)(struct MPIDI_VC *vc,
+         const void *buf,
+         MPI_Aint count,
+         MPI_Datatype datatype,
+         int dest,
+         int tag,
+         MPID_Comm * comm,
+         int context_offset,
+         struct MPID_Request **request,
+         int should_create_req,
+         uint64_t send_type)
+{
+    int dt_contig, mpi_errno = MPI_SUCCESS;
+    MPI_Aint dt_true_lb;
+    intptr_t data_sz;
+    MPID_Datatype *dt_ptr;
+    BEGIN_FUNC(FCNAME);
+
+    VC_READY_CHECK(vc);
+    *request = NULL;
+
+    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    if (likely((send_type != MPID_SYNC_SEND) &&
+                dt_contig &&
+                (data_sz <= gl_data.max_buffered_send)))
+    {
+        if (should_create_req == MPID_CREATE_REQ)
+            MPID_nem_ofi_create_req_lw(request, 1);
+
+        mpi_errno = ADD_SUFFIX(send_lightweight)(vc, (char *) buf + dt_true_lb, data_sz,
+                                                 dest, tag, comm, context_offset);
+    }
+    else
+        mpi_errno = ADD_SUFFIX(send_normal)(vc, buf, count, datatype, dest, tag, comm,
+                                context_offset, request, dt_contig,
+                                data_sz, dt_ptr, dt_true_lb, send_type);
+
+    END_FUNC_RC(MPID_STATE_DO_ISEND);
 }
 
 #undef FCNAME
@@ -217,7 +315,7 @@ int ADD_SUFFIX(MPID_nem_ofi_send)(struct MPIDI_VC *vc,
 
     BEGIN_FUNC(FCNAME);
     mpi_errno = ADD_SUFFIX(do_isend)(vc, buf, count, datatype, dest, tag,
-                         comm, context_offset, request, MPID_NORMAL_SEND);
+                         comm, context_offset, request, MPID_DONT_CREATE_REQ, MPID_NORMAL_SEND);
     END_FUNC(FCNAME);
     return mpi_errno;
 }
@@ -234,7 +332,7 @@ int ADD_SUFFIX(MPID_nem_ofi_isend)(struct MPIDI_VC *vc,
     int mpi_errno = MPI_SUCCESS;
     BEGIN_FUNC(FCNAME);
     mpi_errno = ADD_SUFFIX(do_isend)(vc, buf, count, datatype, dest,
-                         tag, comm, context_offset, request, MPID_NORMAL_SEND);
+                         tag, comm, context_offset, request, MPID_CREATE_REQ, MPID_NORMAL_SEND);
     END_FUNC(FCNAME);
     return mpi_errno;
 }
@@ -251,7 +349,7 @@ int ADD_SUFFIX(MPID_nem_ofi_ssend)(struct MPIDI_VC *vc,
     int mpi_errno = MPI_SUCCESS;
     BEGIN_FUNC(FCNAME);
     mpi_errno = ADD_SUFFIX(do_isend)(vc, buf, count, datatype, dest,
-                         tag, comm, context_offset, request, MPID_SYNC_SEND);
+                         tag, comm, context_offset, request, MPID_CREATE_REQ, MPID_SYNC_SEND);
     END_FUNC(FCNAME);
     return mpi_errno;
 }
@@ -268,8 +366,8 @@ int ADD_SUFFIX(MPID_nem_ofi_issend)(struct MPIDI_VC *vc,
 {
     int mpi_errno = MPI_SUCCESS;
     BEGIN_FUNC(FCNAME);
-    mpi_errno = do_isend(vc, buf, count, datatype, dest,
-                         tag, comm, context_offset, request, MPID_SYNC_SEND);
+    mpi_errno = ADD_SUFFIX(do_isend)(vc, buf, count, datatype, dest,
+                         tag, comm, context_offset, request, MPID_CREATE_REQ, MPID_SYNC_SEND);
     END_FUNC(FCNAME);
     return mpi_errno;
 }

-----------------------------------------------------------------------

Summary of changes:
 .../ch3/channels/nemesis/netmod/ofi/ofi_impl.h     |   75 ++++++--
 .../ch3/channels/nemesis/netmod/ofi/ofi_init.c     |   29 ++--
 src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_msg.c |    6 +-
 .../ch3/channels/nemesis/netmod/ofi/ofi_tagged.c   |    4 +
 .../nemesis/netmod/ofi/ofi_tagged_template.c       |  186 +++++++++++++++-----
 5 files changed, 217 insertions(+), 83 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list