[mpich-commits] [mpich] MPICH primary repository branch, master,	updated. v3.2b1-62-gdaf29e3
    Service Account 
    noreply at mpich.org
       
    Fri Apr 10 15:27:14 CDT 2015
    
    
  
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
       via  daf29e33662359f73dcd903bda42633752b96dda (commit)
       via  2f97f4297beffe3994bb4a7ba25fc22217c81a55 (commit)
      from  79e239819cd638f7e0fc2c6c9b597ab3b685805b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/daf29e33662359f73dcd903bda42633752b96dda
commit daf29e33662359f73dcd903bda42633752b96dda
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Wed Apr 8 09:59:42 2015 -0500
    portals4: tuning
    
    Changes the value of various static limits in the Portals4 netmod, based
    on experimentation results and suggestions from collaborators.
    
    1. Bump most ni_limits from 32K to 64K. These limits relate closely to
       queue depth. We can reasonably expect to support a queue depth
       of 64K.
    
    2. Limit issued origin events to 500. This translates to sending ~250
       operations to Portals at a time, which over IB is roughly the
       saturation point. TODO: turn this into a CVAR.
    
    3. Limit per target issued operations to 50. This will give the target a
       better chance to process events without being overwhelmed by a single
       process. TODO: turn this into a CVAR, also.
    
    4. Allocate more buffer space for incoming control messages. Observed
       results, especially with larger messages, showed that more buffer space
       cuts down on flow-control events.
    
    Signed-off-by: Antonio J. Pena <apenya at mcs.anl.gov>
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
index ae9099f..fd972bb 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_init.c
@@ -12,10 +12,13 @@
 #error Checkpointing not implemented
 #endif
 
-#define UNEXPECTED_HDR_COUNT 32768
-#define EVENT_COUNT          32768
-#define LIST_SIZE            32768
-#define ENTRY_COUNT          32768
+#define UNEXPECTED_HDR_COUNT (1024*64)
+#define EVENT_COUNT          (1024*64)
+#define LIST_SIZE            (1024*64)
+#define MAX_ENTRIES          (1024*64)
+#define ENTRY_COUNT          (1024*64)
+/* FIXME: turn ORIGIN_EVENTS into a CVAR */
+#define ORIGIN_EVENTS        (500)
 #define NID_KEY  "NID"
 #define PID_KEY  "PID"
 #define PTI_KEY  "PTI"
@@ -245,7 +248,7 @@ static int ptl_init(MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));
 
     /* currently, rportlas only works with a single NI and EQ */
-    ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, EVENT_COUNT, get_target_info);
+    ret = MPID_nem_ptl_rptl_init(MPIDI_Process.my_pg->size, ORIGIN_EVENTS, get_target_info);
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlniinit", "**ptlniinit %s", MPID_nem_ptl_strerror(ret));
 
     /* allow rportal to manage the primary portal and retransmit if needed */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
index 01c0c43..bb1d361 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
@@ -8,8 +8,8 @@
 #include <mpl_utlist.h>
 #include "rptl.h"
 
-#define NUM_RECV_BUFS 8
-#define BUFSIZE (1024*1024)
+#define NUM_RECV_BUFS 2
+#define BUFSIZE (1024*1024*50)
 #define CTL_TAG 0
 #define GET_TAG 1
 #define PAYLOAD_SIZE  (PTL_MAX_EAGER - sizeof(MPIDI_CH3_Pkt_t))
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
index 9ae3be1..a94eb8a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
@@ -103,7 +103,7 @@ static int append_overflow(int i)
     me.match_id = id_any;
     me.match_bits = 0;
     me.ignore_bits = ~((ptl_match_bits_t)0);
-    me.min_free = PTL_MAX_EAGER;
+    me.min_free = PTL_LARGE_THRESHOLD;
     
     /* if there is no space to append the entry, process outstanding events and try again */
     ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c
index 74a39ee..c3742fc 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.c
@@ -7,6 +7,10 @@
 #include "ptl_impl.h"
 #include "rptl.h"
 
+/* FIXME: turn this into a CVAR, or fraction of the event limit from
+   rptl_init */
+#define PER_TARGET_THRESHOLD 50
+
 /*
  * Prereqs:
  *
@@ -93,6 +97,7 @@ static int find_target(ptl_process_t id, struct rptl_target **target)
         t->op_pool = NULL;
         t->data_op_list = NULL;
         t->control_op_list = NULL;
+        t->issued_data_ops = 0;
     }
 
     *target = t;
@@ -220,7 +225,7 @@ static int poke_progress(void)
             /* we should not get any NACKs on the control portal */
             assert(op->state != RPTL_OP_STATE_NACKED);
 
-            if (rptl_info.origin_events_left < 2) {
+            if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                 /* too few origin events left.  we can't issue this op
                  * or any following op to this target in order to
                  * maintain ordering */
@@ -228,6 +233,7 @@ static int poke_progress(void)
             }
 
             rptl_info.origin_events_left -= 2;
+            target->issued_data_ops++;
 
             /* force request for an ACK even if the user didn't ask
              * for it.  replace the user pointer with the OP id. */
@@ -255,7 +261,7 @@ static int poke_progress(void)
                 if (op->state == RPTL_OP_STATE_NACKED)
                     break;
 
-                if (rptl_info.origin_events_left < 2) {
+                if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                     /* too few origin events left.  we can't issue
                      * this op or any following op to this target in
                      * order to maintain ordering */
@@ -263,6 +269,7 @@ static int poke_progress(void)
                 }
 
                 rptl_info.origin_events_left -= 2;
+                target->issued_data_ops++;
 
                 /* force request for an ACK even if the user didn't
                  * ask for it.  replace the user pointer with the OP
@@ -283,7 +290,7 @@ static int poke_progress(void)
                 if (op->state == RPTL_OP_STATE_NACKED)
                     break;
 
-                if (rptl_info.origin_events_left < 1) {
+                if (rptl_info.origin_events_left < 1 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                     /* too few origin events left.  we can't issue
                      * this op or any following op to this target in
                      * order to maintain ordering */
@@ -291,6 +298,7 @@ static int poke_progress(void)
                 }
 
                 rptl_info.origin_events_left--;
+                target->issued_data_ops++;
 
                 ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length,
                              op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits,
@@ -539,6 +547,7 @@ static int get_event_info(ptl_event_t * event, struct rptl **ret_rptl, struct rp
         op = (struct rptl_op *) event->user_ptr;
 
         rptl_info.origin_events_left++;
+        op->target->issued_data_ops--;
 
         /* see if there are any pending ops to be issued */
         ret = poke_progress();
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.h b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.h
index f99523c..c57b8dd 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/rptl.h
@@ -158,6 +158,8 @@ struct rptl_target {
 
     struct rptl_target *next;
     struct rptl_target *prev;
+
+    int issued_data_ops;
 };
 
 struct rptl_info {
http://git.mpich.org/mpich.git/commitdiff/2f97f4297beffe3994bb4a7ba25fc22217c81a55
commit 2f97f4297beffe3994bb4a7ba25fc22217c81a55
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date:   Fri Apr 3 11:29:13 2015 -0500
    portals4: revert [722d85a4] and [d459c025]
    
    The 2 commits being reverted introduced a "safe" PtlMEAppend function
    that would call MPID_nem_ptl_poll to process some events in case there
    was no space to append the match list entry. However the poll function
    is not reentrant safe, which could lead to ordering problems.
    
    The increased list entry limit from [c6c0d6f6] should prevent PTL_NO_SPACE
    errors from happening, except in the extreme case. If we still find we are
    hitting this error, a proper fix can be done in the Rportals layer.
    
    Signed-off-by: Antonio J. Pena <apenya at mcs.anl.gov>
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
index 4071f72..8f39d73 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_impl.h
@@ -208,26 +208,6 @@ int MPID_nem_ptl_lmt_handle_cookie(MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV s
 int MPID_nem_ptl_lmt_done_send(MPIDI_VC_t *vc, MPID_Request *req);
 int MPID_nem_ptl_lmt_done_recv(MPIDI_VC_t *vc, MPID_Request *req);
 
-/* a safe PtlMEAppend for when there is no space available */
-static inline int MPID_nem_ptl_me_append(ptl_handle_ni_t  ni_handle,
-                                         ptl_pt_index_t   pt_index,
-                                         const ptl_me_t  *me,
-                                         ptl_list_t       ptl_list,
-                                         void            *user_ptr,
-                                         ptl_handle_me_t *me_handle)
-{
-    int ret;
-
-    while (1) {
-        ret = PtlMEAppend(ni_handle, pt_index, me, ptl_list, user_ptr, me_handle);
-        if (ret != PTL_NO_SPACE)
-            break;
-        MPID_nem_ptl_poll(1);
-    }
-
-    return ret;
-}
-
 /* packet handlers */
 
 int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
index 39f8d8f..01c0c43 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_nm.c
@@ -51,8 +51,8 @@ int MPID_nem_ptl_nm_init(void)
 
     for (i = 0; i < NUM_RECV_BUFS; ++i) {
         overflow_me.start = recvbufs + (i * BUFSIZE);
-        ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &overflow_me,
-                                     PTL_OVERFLOW_LIST, (void *)(size_t)i, &me_handles[i]);
+        ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &overflow_me,
+                          PTL_OVERFLOW_LIST, (void *)(size_t)i, &me_handles[i]);
         MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s",
                              MPID_nem_ptl_strerror(ret));
     }
@@ -122,8 +122,8 @@ static inline int meappend_large(ptl_process_t id, MPID_Request *req, ptl_match_
 
         ++REQ_PTL(req)->num_gets;
 
-        ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &me, PTL_PRIORITY_LIST, req,
-                                     &foo_me_handle);
+        ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &me, PTL_PRIORITY_LIST, req,
+                          &foo_me_handle);
         MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s",
                              MPID_nem_ptl_strerror(ret));
         MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "PtlMEAppend(req=%p tag=%#lx)", req, NPTL_MATCH_GET_TAG(match_bits)));
@@ -494,8 +494,8 @@ int MPID_nem_ptl_nm_ctl_event_handler(const ptl_event_t *e)
 
             overflow_me.start = recvbufs + (buf_idx * BUFSIZE);
 
-            ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &overflow_me,
-                                         PTL_OVERFLOW_LIST, e->user_ptr, &me_handles[buf_idx]);
+            ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_control_pt, &overflow_me,
+                              PTL_OVERFLOW_LIST, e->user_ptr, &me_handles[buf_idx]);
             MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s",
                                  MPID_nem_ptl_strerror(ret));
         }
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
index 195791d..9ae3be1 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_poll.c
@@ -106,8 +106,8 @@ static int append_overflow(int i)
     me.min_free = PTL_MAX_EAGER;
     
     /* if there is no space to append the entry, process outstanding events and try again */
-    ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
-                                 &overflow_me_handle[i]);
+    ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
+                      &overflow_me_handle[i]);
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
 
  fn_exit:
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
index e69e9b1..ec6d90a 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_recv.c
@@ -543,7 +543,7 @@ int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPID_Request *rreq)
         
     }
 
-    ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_PRIORITY_LIST, rreq, &REQ_PTL(rreq)->put_me);
+    ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_PRIORITY_LIST, rreq, &REQ_PTL(rreq)->put_me);
     MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
     DBG_MSG_MEAPPEND("REG", vc ? vc->pg_rank : MPI_ANY_SOURCE, me, rreq);
     MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "    buf=%p", me.start);
diff --git a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
index f742ae8..32c4275 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/portals4/ptl_send.c
@@ -41,8 +41,8 @@ static void big_meappend(void *buf, ptl_size_t left_to_send, MPIDI_VC_t *vc, ptl
         else
             me.length = left_to_send;
 
-        ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
-                                     &REQ_PTL(sreq)->get_me_p[i]);
+        ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
+                          &REQ_PTL(sreq)->get_me_p[i]);
         DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
         MPIU_Assert(ret == 0);
         /* increment the cc for each get operation */
@@ -276,8 +276,8 @@ static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *
 
                 MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");
 
-                ret = MPID_nem_ptl_me_append(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
-                                             &REQ_PTL(sreq)->get_me_p[0]);
+                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
+                                  &REQ_PTL(sreq)->get_me_p[0]);
                 MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
                 DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
                 /* increment the cc for the get operation */
-----------------------------------------------------------------------
Summary of changes:
 .../channels/nemesis/netmod/portals4/ptl_impl.h    |   20 --------------------
 .../channels/nemesis/netmod/portals4/ptl_init.c    |   13 ++++++++-----
 .../ch3/channels/nemesis/netmod/portals4/ptl_nm.c  |   16 ++++++++--------
 .../channels/nemesis/netmod/portals4/ptl_poll.c    |    6 +++---
 .../channels/nemesis/netmod/portals4/ptl_recv.c    |    2 +-
 .../channels/nemesis/netmod/portals4/ptl_send.c    |    8 ++++----
 .../ch3/channels/nemesis/netmod/portals4/rptl.c    |   15 ++++++++++++---
 .../ch3/channels/nemesis/netmod/portals4/rptl.h    |    2 ++
 8 files changed, 38 insertions(+), 44 deletions(-)
hooks/post-receive
-- 
MPICH primary repository
    
    
More information about the commits
mailing list