[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b3-93-g5324a41

Service Account noreply at mpich.org
Sun Jun 14 21:13:11 CDT 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  5324a41f661085fdcd6c6365635accf9fa65839a (commit)
       via  c83b6b2dbb34238ab371a08b2233bd84d2b463ce (commit)
       via  eef0c70ac80ae06d88314c7f0174d18a25f97c16 (commit)
      from  82c2d6523528b56309beada7f76d28f866cd6ab1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/5324a41f661085fdcd6c6365635accf9fa65839a

commit 5324a41f661085fdcd6c6365635accf9fa65839a
Author: Min Si <msi at il.is.s.u-tokyo.ac.jp>
Date:   Wed Jun 10 12:00:03 2015 -0500

    Expose AM flush ordering and issue per OP flush if unordered.
    
    This patch includes three changes:
    (1) Added netmod API get_ordering to allow netmod to expose the network
    ordering. A netmod may issue some packets via multiple connections in
    parallel if those packets (such as RMA) do not require ordering, and
    thus the packets may be unordered. This patch sets the network ordering
    in every existing netmod (tcp|mxm|ofi|portals|llc) to true, since all
    packets are sent orderly via one connection.
    (2) Nemesis exposes the window packet orderings such as AM flush
    ordering at init time. It supports ordered packets only when netmod
    supports ordered network.
    (3) If AM flush is ordered (flush must be finished after all previous
    operations), then CH3 RMA only requests FLUSH ACK on the last operation.
    Otherwise, CH3 must request per-OP FLUSH ACK to ensure all operations
    are remotely completed.
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_nets.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_nets.h
index 350a887..91975fd 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_nets.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_nets.h
@@ -27,6 +27,7 @@ typedef int (* MPID_nem_net_module_anysource_improbe_t)(int tag, MPID_Comm *comm
                                                         MPID_Request **message, MPI_Status *status);
 
 typedef void (* MPID_nem_net_module_vc_dbg_print_sendq_t)(FILE *stream, MPIDI_VC_t *vc);
+typedef int (* MPID_nem_net_module_get_ordering_t)(int *ordering);
 
 typedef struct MPID_nem_netmod_funcs
 {
@@ -45,6 +46,7 @@ typedef struct MPID_nem_netmod_funcs
     MPID_nem_net_module_vc_terminate_t vc_terminate;
     MPID_nem_net_module_anysource_iprobe_t anysource_iprobe;
     MPID_nem_net_module_anysource_improbe_t anysource_improbe;
+    MPID_nem_net_module_get_ordering_t get_ordering;
 } MPID_nem_netmod_funcs_t;
 
 extern MPID_nem_net_module_vc_dbg_print_sendq_t  MPID_nem_net_module_vc_dbg_print_sendq;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_impl.h b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_impl.h
index 421fc0c..9eb64be 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_impl.h
@@ -81,6 +81,7 @@ int MPID_nem_llc_anysource_iprobe(int tag, MPID_Comm * comm, int context_offset,
                                   MPI_Status * status);
 int MPID_nem_llc_anysource_improbe(int tag, MPID_Comm * comm, int context_offset, int *flag,
                                    MPID_Request ** message, MPI_Status * status);
+int MPID_nem_llc_get_ordering(int *ordering);
 
 int MPID_nem_llc_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
                              void *data, MPIDI_msg_sz_t data_sz);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c
index 366940a..11acfbd 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/llc/llc_init.c
@@ -36,6 +36,7 @@ MPID_nem_netmod_funcs_t MPIDI_nem_llc_funcs = {
     .vc_terminate = MPID_nem_llc_vc_terminate,
     .anysource_iprobe = MPID_nem_llc_anysource_iprobe,
     .anysource_improbe = MPID_nem_llc_anysource_improbe,
+    .get_ordering = MPID_nem_llc_get_ordering,
 };
 
 int MPID_nem_llc_my_llc_rank;
@@ -221,3 +222,13 @@ int MPID_nem_llc_anysource_improbe(int tag, MPID_Comm * comm, int context_offset
     return MPID_nem_llc_improbe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, message,
                                 status);
 }
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_llc_get_ordering
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_llc_get_ordering(int *ordering)
+{
+    (*ordering) = 1;
+    return MPI_SUCCESS;
+}
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
index e44a0a3..0c4be8b 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_impl.h
@@ -26,6 +26,7 @@ int MPID_nem_mxm_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
 int MPID_nem_mxm_vc_init(MPIDI_VC_t * vc);
 int MPID_nem_mxm_vc_destroy(MPIDI_VC_t * vc);
 int MPID_nem_mxm_vc_terminate(MPIDI_VC_t * vc);
+int MPID_nem_mxm_get_ordering(int *ordering);
 
 /* alternate interface */
 int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
diff --git a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
index d7704ca..84bbe06 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/mxm/mxm_init.c
@@ -71,7 +71,8 @@ MPID_nem_netmod_funcs_t MPIDI_nem_mxm_funcs = {
     MPID_nem_mxm_vc_destroy,
     MPID_nem_mxm_vc_terminate,
     MPID_nem_mxm_anysource_iprobe,
-    MPID_nem_mxm_anysource_improbe
+    MPID_nem_mxm_anysource_improbe,
+    MPID_nem_mxm_get_ordering
 };
 
 static MPIDI_Comm_ops_t comm_ops = {
@@ -388,6 +389,16 @@ int MPID_nem_mxm_vc_terminate(MPIDI_VC_t * vc)
     goto fn_exit;
 }
 
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_mxm_get_ordering
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_mxm_get_ordering(int *ordering)
+{
+    (*ordering) = 1;
+    return MPI_SUCCESS;
+}
+
 static int _mxm_conf(void)
 {
     int mpi_errno = MPI_SUCCESS;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_data.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_data.c
index d89270a..9bdc325 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_data.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_data.c
@@ -55,4 +55,5 @@ MPID_nem_netmod_funcs_t MPIDI_nem_ofi_funcs = {
     MPID_nem_ofi_vc_terminate,
     MPID_nem_ofi_anysource_iprobe,
     MPID_nem_ofi_anysource_improbe,
+    MPID_nem_ofi_get_ordering,
 };
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
index 22560da..25d5570 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_impl.h
@@ -289,6 +289,7 @@ int MPID_nem_ofi_connect_to_root(const char *business_card, MPIDI_VC_t * new_vc)
 int MPID_nem_ofi_vc_destroy(MPIDI_VC_t * vc);
 int MPID_nem_ofi_cm_init(MPIDI_PG_t * pg_p, int pg_rank);
 int MPID_nem_ofi_cm_finalize();
+int MPID_nem_ofi_get_ordering(int *ordering);
 
 extern MPID_nem_ofi_global_t gl_data;
 extern MPIDI_Comm_ops_t _g_comm_ops;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
index cc8eec8..7e221f7 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ofi/ofi_init.c
@@ -326,6 +326,14 @@ int MPID_nem_ofi_finalize(void)
     END_FUNC_RC(FCNAME);
 }
 
+#undef FCNAME
+#define FCNAME DECL_FUNC(MPID_nem_ofi_get_ordering)
+int MPID_nem_ofi_get_ordering(int *ordering)
+{
+    (*ordering) = 1;
+    return MPI_SUCCESS;
+}
+
 static inline int compile_time_checking()
 {
     OFI_COMPILE_TIME_ASSERT(sizeof(MPID_nem_ofi_vc_t) <= MPID_NEM_VC_NETMOD_AREA_LEN);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
index d924954..8647a8c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
@@ -203,6 +203,7 @@ int MPID_nem_ptl_anysource_improbe(int tag, MPID_Comm * comm, int context_offset
 void MPID_nem_ptl_anysource_posted(MPID_Request *rreq);
 int MPID_nem_ptl_anysource_matched(MPID_Request *rreq);
 int MPID_nem_ptl_init_id(MPIDI_VC_t *vc);
+int MPID_nem_ptl_get_ordering(int *ordering);
 
 int MPID_nem_ptl_lmt_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *rts_pkt, MPID_Request *req);
 int MPID_nem_ptl_lmt_start_recv(MPIDI_VC_t *vc,  MPID_Request *rreq, MPID_IOV s_cookie);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
index fd972bb..b29475c 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
@@ -60,7 +60,8 @@ MPID_nem_netmod_funcs_t MPIDI_nem_portals4_funcs = {
     vc_destroy,
     vc_terminate,
     MPID_nem_ptl_anysource_iprobe,
-    MPID_nem_ptl_anysource_improbe
+    MPID_nem_ptl_anysource_improbe,
+    MPID_nem_ptl_get_ordering
 };
 
 static MPIDI_Comm_ops_t comm_ops = {
@@ -647,6 +648,16 @@ int MPID_nem_ptl_init_id(MPIDI_VC_t *vc)
     goto fn_exit;
 }
 
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_ptl_get_ordering
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_ptl_get_ordering(int *ordering)
+{
+    (*ordering) = 1;
+    return MPI_SUCCESS;
+}
+
 
 #define CASE_STR(x) case x: return #x
 
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h
index aab5019..8c8b6b9 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_impl.h
@@ -108,6 +108,7 @@ void MPID_nem_tcp_vc_dbg_print_sendq(FILE *stream, MPIDI_VC_t *vc);
 int MPID_nem_tcp_socksm_finalize(void);
 int MPID_nem_tcp_socksm_init(void);
 int MPID_nem_tcp_vc_terminated(MPIDI_VC_t *vc);
+int MPID_nem_tcp_get_ordering(int *ordering);
 
 
 int MPID_nem_tcp_pkt_unpause_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c
index 14ba19a..94600a3 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_init.c
@@ -104,7 +104,9 @@ MPID_nem_netmod_funcs_t MPIDI_nem_tcp_funcs = {
     MPID_nem_tcp_vc_init,
     MPID_nem_tcp_vc_destroy,
     MPID_nem_tcp_vc_terminate,
-    NULL /* anysource iprobe */
+    NULL, /* anysource iprobe */
+    NULL, /* anysource_improbe */
+    MPID_nem_tcp_get_ordering
 };
 
 /* in case there are no packet types defined (e.g., they're ifdef'ed out) make sure the array is not zero length */
@@ -726,3 +728,12 @@ int MPID_nem_tcp_vc_terminated(MPIDI_VC_t *vc)
     goto fn_exit;
 }
 
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_get_ordering
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_get_ordering(int *ordering)
+{
+    (*ordering) = 1;
+    return MPI_SUCCESS;
+}
diff --git a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
index 8a8e40d..998aa95 100644
--- a/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c
@@ -74,6 +74,41 @@ int MPIDI_CH3_Win_hooks_init(MPIDI_CH3U_Win_hooks_t * win_hooks)
 
 
 #undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_Win_pkt_orderings_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_orderings)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int netmod_ordering = 0;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+
+    win_pkt_orderings->am_flush_ordered = 0;
+
+    if (MPID_nem_netmod_func && MPID_nem_netmod_func->get_ordering) {
+        mpi_errno = MPID_nem_netmod_func->get_ordering(&netmod_ordering);
+        if (mpi_errno)
+            MPIU_ERR_POP(mpi_errno);
+    }
+
+    if (netmod_ordering > 0) {
+        /* Guarantees ordered AM flush only on ordered network.
+         * In other words, it is ordered only when both intra-node and inter-node
+         * connections are ordered. Otherwise we have to maintain the ordering per
+         * connection, which causes expensive O(P) structure or per-OP function calls.*/
+        win_pkt_orderings->am_flush_ordered = 1;
+    }
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
 #define FUNCNAME MPIDI_CH3_Win_init
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
diff --git a/src/mpid/ch3/channels/sock/src/ch3_win_fns.c b/src/mpid/ch3/channels/sock/src/ch3_win_fns.c
index 92b34b3..8942099 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_win_fns.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_win_fns.c
@@ -43,3 +43,25 @@ int MPIDI_CH3_Win_hooks_init(MPIDI_CH3U_Win_hooks_t *win_hooks)
 
     return mpi_errno;
 }
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_Win_pkt_orderings_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_orderings)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int netmod_ordering = 0;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+
+    /* Guarantees ordered AM flush. */
+    win_pkt_orderings->am_flush_ordered = 1;
+
+  fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_WIN_PKT_ORDERINGS_INIT);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 70c6ac5..7692704 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1149,6 +1149,18 @@ typedef struct {
 
 extern MPIDI_CH3U_Win_hooks_t MPIDI_CH3U_Win_hooks;
 
+typedef struct MPIDI_CH3U_Win_pkt_ordering {
+
+    /* Ordered AM flush.
+     * It means whether AM flush is guaranteed to be finished after all previous
+     * RMA operations. It initialized by Nemesis and used by CH3.
+     * Note that we use single global flag for all targets including both
+     * intra-node and inter-node processes.*/
+    int am_flush_ordered;
+} MPIDI_CH3U_Win_pkt_ordering_t;
+
+extern MPIDI_CH3U_Win_pkt_ordering_t MPIDI_CH3U_Win_pkt_orderings;
+
 /* CH3 and Channel window functions initializers */
 int MPIDI_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns);
 int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns);
@@ -1156,6 +1168,8 @@ int MPIDI_CH3_Win_fns_init(MPIDI_CH3U_Win_fns_t *win_fns);
 /* Channel window hooks initializer */
 int MPIDI_CH3_Win_hooks_init(MPIDI_CH3U_Win_hooks_t *win_hooks);
 
+int MPIDI_CH3_Win_pkt_orderings_init(MPIDI_CH3U_Win_pkt_ordering_t * win_pkt_orderings);
+
 /* Default window creation functions provided by CH3 */
 int MPIDI_CH3U_Win_create(void *, MPI_Aint, int, MPID_Info *, MPID_Comm *,
                          MPID_Win **);
diff --git a/src/mpid/ch3/include/mpidpre.h b/src/mpid/ch3/include/mpidpre.h
index 255b9d3..390c5ea 100644
--- a/src/mpid/ch3/include/mpidpre.h
+++ b/src/mpid/ch3/include/mpidpre.h
@@ -298,14 +298,6 @@ typedef struct MPIDI_Win_basic_info {
     MPI_Win win_handle;
 } MPIDI_Win_basic_info_t;
 
-typedef struct MPIDI_RMA_Pkt_orderings {
-    int flush_remote; /* ordered FLUSH, for remote completion */
-    /* FIXME: in future we should also add local completin
-       ordering: WAW, WAR, RAW, RAR. */
-} MPIDI_RMA_Pkt_orderings_t;
-
-extern MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings;
-
 #define MPIDI_DEV_WIN_DECL                                               \
     volatile int at_completion_counter;  /* completion counter for operations \
                                  targeting this window */                \
diff --git a/src/mpid/ch3/src/ch3u_rma_progress.c b/src/mpid/ch3/src/ch3u_rma_progress.c
index fdff895..f6a7ae3 100644
--- a/src/mpid/ch3/src/ch3u_rma_progress.c
+++ b/src/mpid/ch3/src/ch3u_rma_progress.c
@@ -337,20 +337,32 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t * targ
             first_op = 0;
         }
 
+        /* piggyback FLUSH on every OP if ordered flush is not guaranteed. */
+        if (!MPIDI_CH3U_Win_pkt_orderings.am_flush_ordered)
+            flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
+
         if (curr_op->next == NULL) {
             /* piggyback on last OP. */
             if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
                 flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
-                target->sync.outstanding_acks++;
-
                 if (target->win_complete_flag)
                     flags |= MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER;
             }
             else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
                 flags |= MPIDI_CH3_PKT_FLAG_RMA_UNLOCK;
+
+                /* if piggyback UNLOCK then unset FLUSH (set for every
+                 * operation on out-of-order network). */
+                flags &= ~MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
             }
         }
 
+        /* only increase ack counter when FLUSH or UNLOCK flag is set,
+         * but without LOCK piggyback. */
+        if (((flags & MPIDI_CH3_PKT_FLAG_RMA_FLUSH)
+             || (flags & MPIDI_CH3_PKT_FLAG_RMA_UNLOCK)))
+            target->sync.outstanding_acks++;
+
         mpi_errno = issue_rma_op(curr_op, win_ptr, target, flags);
         if (mpi_errno != MPI_SUCCESS)
             MPIU_ERR_POP(mpi_errno);
diff --git a/src/mpid/ch3/src/mpid_init.c b/src/mpid/ch3/src/mpid_init.c
index e541f71..6373ae2 100644
--- a/src/mpid/ch3/src/mpid_init.c
+++ b/src/mpid/ch3/src/mpid_init.c
@@ -41,6 +41,7 @@ MPIDI_Process_t MPIDI_Process = { NULL };
 MPIDI_CH3U_SRBuf_element_t * MPIDI_CH3U_SRBuf_pool = NULL;
 MPIDI_CH3U_Win_fns_t MPIDI_CH3U_Win_fns = { NULL };
 MPIDI_CH3U_Win_hooks_t MPIDI_CH3U_Win_hooks = { NULL };
+MPIDI_CH3U_Win_pkt_ordering_t MPIDI_CH3U_Win_pkt_orderings = { 0 };
 
 
 #undef FUNCNAME
@@ -193,6 +194,9 @@ int MPID_Init(int *argc, char ***argv, int requested, int *provided,
     mpi_errno = MPIDI_CH3U_Recvq_init();
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
+    /* Ask channel to expose Window packet ordering. */
+    MPIDI_CH3_Win_pkt_orderings_init(&MPIDI_CH3U_Win_pkt_orderings);
+
     /*
      * Initialize the MPI_COMM_WORLD object
      */
diff --git a/src/mpid/ch3/src/mpidi_rma.c b/src/mpid/ch3/src/mpidi_rma.c
index 4cbec01..28c9063 100644
--- a/src/mpid/ch3/src/mpidi_rma.c
+++ b/src/mpid/ch3/src/mpidi_rma.c
@@ -95,7 +95,6 @@ MPIDI_RMA_Op_t *global_rma_op_pool = NULL, *global_rma_op_pool_tail =
     NULL, *global_rma_op_pool_start = NULL;
 MPIDI_RMA_Target_t *global_rma_target_pool = NULL, *global_rma_target_pool_tail =
     NULL, *global_rma_target_pool_start = NULL;
-MPIDI_RMA_Pkt_orderings_t *MPIDI_RMA_Pkt_orderings = NULL;
 
 #undef FUNCNAME
 #define FUNCNAME MPIDI_RMA_init
@@ -128,12 +127,6 @@ int MPIDI_RMA_init(void)
                       &(global_rma_target_pool_start[i]));
     }
 
-    MPIU_CHKPMEM_MALLOC(MPIDI_RMA_Pkt_orderings, struct MPIDI_RMA_Pkt_orderings *,
-                        sizeof(struct MPIDI_RMA_Pkt_orderings), mpi_errno, "RMA packet orderings");
-    /* FIXME: here we should let channel to set ordering flags. For now we just set them
-     * in CH3 layer. */
-    MPIDI_RMA_Pkt_orderings->flush_remote = 1;
-
   fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_RMA_INIT);
     return mpi_errno;
@@ -156,7 +149,6 @@ void MPIDI_RMA_finalize(void)
 
     MPIU_Free(global_rma_op_pool_start);
     MPIU_Free(global_rma_target_pool_start);
-    MPIU_Free(MPIDI_RMA_Pkt_orderings);
 
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_RMA_FINALIZE);
 }

http://git.mpich.org/mpich.git/commitdiff/c83b6b2dbb34238ab371a08b2233bd84d2b463ce

commit c83b6b2dbb34238ab371a08b2233bd84d2b463ce
Author: Min Si <msi at il.is.s.u-tokyo.ac.jp>
Date:   Wed Jun 10 12:29:43 2015 -0500

    Always free issued OPs when window resource is used up.
    
    When win resource is used up, the current code frees OPs before
    completion only if flush_remote is ordered. However, we can always free
    them even on out-of-order network. Because remote completion is waited
    by ack counter, and local completion (flush_local) is translated to
    remote completion (flush).
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/include/mpid_rma_oplist.h b/src/mpid/ch3/include/mpid_rma_oplist.h
index dd91a12..4cfebf2 100644
--- a/src/mpid/ch3/include/mpid_rma_oplist.h
+++ b/src/mpid/ch3/include/mpid_rma_oplist.h
@@ -554,11 +554,9 @@ static inline int MPIDI_CH3I_Win_get_op(MPID_Win * win_ptr, MPIDI_RMA_Op_t ** e)
         if (new_ptr != NULL)
             break;
 
-        if (MPIDI_RMA_Pkt_orderings->flush_remote) {
-            mpi_errno = MPIDI_CH3I_RMA_Free_ops_before_completion(win_ptr);
-            if (mpi_errno != MPI_SUCCESS)
-                MPIU_ERR_POP(mpi_errno);
-        }
+        mpi_errno = MPIDI_CH3I_RMA_Free_ops_before_completion(win_ptr);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIU_ERR_POP(mpi_errno);
 
         MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
         new_ptr = MPIDI_CH3I_Win_op_alloc(win_ptr);

http://git.mpich.org/mpich.git/commitdiff/eef0c70ac80ae06d88314c7f0174d18a25f97c16

commit eef0c70ac80ae06d88314c7f0174d18a25f97c16
Author: Min Si <msi at il.is.s.u-tokyo.ac.jp>
Date:   Wed Jun 10 11:43:07 2015 -0500

    Move outstanding_acks increment to flush sending step.
    
    The outstanding_acks counter was increased at each sync call (such as
    fence and flush). However, the counter had to be decreased again if
    flush ack is not required. It is more straightforward if increasing it
    only when the flush packet is issued (FLUSH flag piggyback or a separate
    flush message).
    
    Signed-off-by: Xin Zhao <xinzhao3 at illinois.edu>
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/src/ch3u_rma_progress.c b/src/mpid/ch3/src/ch3u_rma_progress.c
index 74731b0..fdff895 100644
--- a/src/mpid/ch3/src/ch3u_rma_progress.c
+++ b/src/mpid/ch3/src/ch3u_rma_progress.c
@@ -192,9 +192,6 @@ static inline int check_and_switch_target_state(MPID_Win * win_ptr, MPIDI_RMA_Ta
                  * function will clean it up. */
                 target->access_state = MPIDI_RMA_LOCK_GRANTED;
 
-                target->sync.outstanding_acks--;
-                MPIU_Assert(target->sync.outstanding_acks >= 0);
-
                 /* We are done with ending synchronization, unset target's sync_flag. */
                 target->sync.sync_flag = MPIDI_RMA_SYNC_NONE;
 
@@ -214,23 +211,15 @@ static inline int check_and_switch_target_state(MPID_Win * win_ptr, MPIDI_RMA_Ta
     case MPIDI_RMA_NONE:
         if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
             if (target->pending_op_list_head == NULL) {
-                if (target->target_rank == rank) {
-                    target->sync.outstanding_acks--;
-                    MPIU_Assert(target->sync.outstanding_acks >= 0);
-                }
-                else {
+                if (target->target_rank != rank) {
                     if (target->put_acc_issued) {
+
+                        target->sync.outstanding_acks++;
+
                         mpi_errno = send_flush_msg(target->target_rank, win_ptr);
                         if (mpi_errno != MPI_SUCCESS)
                             MPIU_ERR_POP(mpi_errno);
                     }
-                    else {
-                        /* We did not issue PUT/ACC since the last
-                         * synchronization call, therefore here we
-                         * don't need ACK back */
-                        target->sync.outstanding_acks--;
-                        MPIU_Assert(target->sync.outstanding_acks >= 0);
-                    }
                 }
 
                 /* We are done with ending synchronization, unset target's sync_flag. */
@@ -242,9 +231,6 @@ static inline int check_and_switch_target_state(MPID_Win * win_ptr, MPIDI_RMA_Ta
         else if (target->sync.sync_flag == MPIDI_RMA_SYNC_UNLOCK) {
             if (target->pending_op_list_head == NULL) {
                 if (target->target_rank == rank) {
-                    target->sync.outstanding_acks--;
-                    MPIU_Assert(target->sync.outstanding_acks >= 0);
-
                     mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
                     if (mpi_errno != MPI_SUCCESS)
                         MPIU_ERR_POP(mpi_errno);
@@ -255,11 +241,12 @@ static inline int check_and_switch_target_state(MPID_Win * win_ptr, MPIDI_RMA_Ta
                         /* We did not issue PUT/ACC since the last
                          * synchronization call, therefore here we
                          * don't need ACK back */
-                        target->sync.outstanding_acks--;
-                        MPIU_Assert(target->sync.outstanding_acks >= 0);
 
                         flag = MPIDI_CH3_PKT_FLAG_RMA_UNLOCK_NO_ACK;
                     }
+                    else {
+                        target->sync.outstanding_acks++;
+                    }
                     mpi_errno = send_unlock_msg(target->target_rank, win_ptr, flag);
                     if (mpi_errno != MPI_SUCCESS)
                         MPIU_ERR_POP(mpi_errno);
@@ -354,6 +341,8 @@ static inline int issue_ops_target(MPID_Win * win_ptr, MPIDI_RMA_Target_t * targ
             /* piggyback on last OP. */
             if (target->sync.sync_flag == MPIDI_RMA_SYNC_FLUSH) {
                 flags |= MPIDI_CH3_PKT_FLAG_RMA_FLUSH;
+                target->sync.outstanding_acks++;
+
                 if (target->win_complete_flag)
                     flags |= MPIDI_CH3_PKT_FLAG_RMA_DECR_AT_COUNTER;
             }
@@ -680,7 +669,6 @@ int MPIDI_CH3I_RMA_Cleanup_target_aggressive(MPID_Win * win_ptr, MPIDI_RMA_Targe
         curr_target = win_ptr->slots[i].target_list_head;
         if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
             curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-            curr_target->sync.outstanding_acks++;
         }
 
         /* Issue out all operations. */
diff --git a/src/mpid/ch3/src/ch3u_rma_sync.c b/src/mpid/ch3/src/ch3u_rma_sync.c
index 731a94c..5fb34d8 100644
--- a/src/mpid/ch3/src/ch3u_rma_sync.c
+++ b/src/mpid/ch3/src/ch3u_rma_sync.c
@@ -374,7 +374,6 @@ int MPIDI_Win_fence(int assert, MPID_Win * win_ptr)
             /* set sync_flag in sync struct */
             if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
                 curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-                curr_target->sync.outstanding_acks++;
             }
             curr_target = curr_target->next;
         }
@@ -753,7 +752,6 @@ int MPIDI_Win_complete(MPID_Win * win_ptr)
             /* set sync_flag in sync struct */
             if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
                 curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-                curr_target->sync.outstanding_acks++;
             }
             curr_target->win_complete_flag = 1;
         }
@@ -1088,7 +1086,6 @@ int MPIDI_Win_unlock(int dest, MPID_Win * win_ptr)
         sync_flag = MPIDI_RMA_SYNC_UNLOCK;
     if (target->sync.sync_flag < sync_flag) {
         target->sync.sync_flag = sync_flag;
-        target->sync.outstanding_acks++;
     }
 
     /* Issue out all operations. */
@@ -1210,7 +1207,6 @@ int MPIDI_Win_flush(int dest, MPID_Win * win_ptr)
     /* Set sync_flag in sync struct. */
     if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
         target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-        target->sync.outstanding_acks++;
     }
 
     /* Issue out all operations. */
@@ -1318,7 +1314,6 @@ int MPIDI_Win_flush_local(int dest, MPID_Win * win_ptr)
     if (target->sync.upgrade_flush_local) {
         if (target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
             target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-            target->sync.outstanding_acks++;
         }
     }
     else {
@@ -1532,7 +1527,6 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
             while (curr_target != NULL) {
                 if (curr_target->sync.sync_flag < sync_flag) {
                     curr_target->sync.sync_flag = sync_flag;
-                    curr_target->sync.outstanding_acks++;
                 }
                 curr_target = curr_target->next;
             }
@@ -1551,7 +1545,6 @@ int MPIDI_Win_unlock_all(MPID_Win * win_ptr)
             if (curr_target != NULL) {
                 if (curr_target->sync.sync_flag < sync_flag) {
                     curr_target->sync.sync_flag = sync_flag;
-                    curr_target->sync.outstanding_acks++;
                 }
             }
             else {
@@ -1672,7 +1665,6 @@ int MPIDI_Win_flush_all(MPID_Win * win_ptr)
         while (curr_target != NULL) {
             if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
                 curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-                curr_target->sync.outstanding_acks++;
             }
 
             /* ENDING synchronization: correctly decrement the following counters. */
@@ -1767,7 +1759,6 @@ int MPIDI_Win_flush_local_all(MPID_Win * win_ptr)
             if (curr_target->sync.upgrade_flush_local) {
                 if (curr_target->sync.sync_flag < MPIDI_RMA_SYNC_FLUSH) {
                     curr_target->sync.sync_flag = MPIDI_RMA_SYNC_FLUSH;
-                    curr_target->sync.outstanding_acks++;
                 }
                 upgrade_flush_local_cnt++;
             }

-----------------------------------------------------------------------

Summary of changes:
 .../ch3/channels/nemesis/include/mpid_nem_nets.h   |    2 +
 .../ch3/channels/nemesis/netmod/llc/llc_impl.h     |    1 +
 .../ch3/channels/nemesis/netmod/llc/llc_init.c     |   11 +++++
 .../ch3/channels/nemesis/netmod/mxm/mxm_impl.h     |    1 +
 .../ch3/channels/nemesis/netmod/mxm/mxm_init.c     |   13 ++++++-
 .../ch3/channels/nemesis/netmod/ofi/ofi_data.c     |    1 +
 .../ch3/channels/nemesis/netmod/ofi/ofi_impl.h     |    1 +
 .../ch3/channels/nemesis/netmod/ofi/ofi_init.c     |    8 ++++
 .../channels/nemesis/netmod/portals4/ptl_impl.h    |    1 +
 .../channels/nemesis/netmod/portals4/ptl_init.c    |   13 ++++++-
 .../ch3/channels/nemesis/netmod/tcp/tcp_impl.h     |    1 +
 .../ch3/channels/nemesis/netmod/tcp/tcp_init.c     |   13 ++++++-
 src/mpid/ch3/channels/nemesis/src/ch3_win_fns.c    |   35 ++++++++++++++++
 src/mpid/ch3/channels/sock/src/ch3_win_fns.c       |   22 ++++++++++
 src/mpid/ch3/include/mpid_rma_oplist.h             |    8 +--
 src/mpid/ch3/include/mpidimpl.h                    |   14 +++++++
 src/mpid/ch3/include/mpidpre.h                     |    8 ----
 src/mpid/ch3/src/ch3u_rma_progress.c               |   42 ++++++++++----------
 src/mpid/ch3/src/ch3u_rma_sync.c                   |    9 ----
 src/mpid/ch3/src/mpid_init.c                       |    4 ++
 src/mpid/ch3/src/mpidi_rma.c                       |    8 ----
 21 files changed, 162 insertions(+), 54 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list