[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.3-71-g0086b7b

Service Account noreply at mpich.org
Mon Nov 3 08:59:10 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  0086b7bf7f2ec4e4ef39bca1c153c1000e137a3c (commit)
       via  017668158a62efeb696c0af1efad8a0ddd8f64b7 (commit)
      from  72a1e6f871e16943e364dd580e10b392d1e904a4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/0086b7bf7f2ec4e4ef39bca1c153c1000e137a3c

commit 0086b7bf7f2ec4e4ef39bca1c153c1000e137a3c
Author: Wesley Bland <wbland at anl.gov>
Date:   Sun Nov 2 16:04:20 2014 -0600

    Add CVAR for initial size of RTS queue
    
    Rather than having a static value for the initial size of the RTS queue,
    have a CVAR to define it.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
index 69b145d..c5b9111 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h
@@ -55,9 +55,9 @@ typedef struct MPID_nem_pkt_lmt_rts
 }
 MPID_nem_pkt_lmt_rts_t;
 
-#define MPID_NEM_LMT_RTS_QUEUE_SIZE 1024
 extern int *MPID_nem_lmt_rts_queue;
 extern int MPID_nem_lmt_rts_queue_last_inserted;
+extern int MPID_nem_lmt_rts_queue_size;
 
 typedef struct MPID_nem_pkt_lmt_cts
 {
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
index 8fcd181..5ef65de 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
@@ -6,6 +6,24 @@
 
 #include "mpid_nem_impl.h"
 
+/*
+=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
+
+cvars:
+   - name       : MPIR_CVAR_NEM_LMT_RTS_QUEUE_SIZE
+     category   : CH3
+     type       : int
+     default    : 1024
+     class      : device
+     verbosity  : MPI_T_VERBOSITY_USER_BASIC
+     scope      : MPI_T_SCOPE_ALL_EQ
+     description : >-
+       The initial size of the NEM_LMT_RTS_QUEUE used to track RTS
+       messages before the LMT setup.
+
+=== END_MPI_T_CVAR_INFO_BLOCK ===
+*/
+
 #define set_request_info(rreq_, pkt_, msg_type_)		\
 {								\
     (rreq_)->status.MPI_SOURCE = (pkt_)->match.parts.rank;	\
@@ -112,7 +130,7 @@ int MPID_nem_lmt_RndvSend(MPID_Request **sreq_p, const void * buf, int count,
          * queue. It will print a message to warn the user. This should only
          * affect FT and not matching so we'll consider this ok for now. */
         for (i = MPID_nem_lmt_rts_queue_last_inserted + 1; ; i++) {
-            if (i == MPID_NEM_LMT_RTS_QUEUE_SIZE) {
+            if (i == MPID_nem_lmt_rts_queue_size) {
                 i = -1;
                 continue;
             }
@@ -329,7 +347,7 @@ static int pkt_CTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t
     for (i = MPID_nem_lmt_rts_queue_last_inserted + 1;
             i != MPID_nem_lmt_rts_queue_last_inserted;
             i++) {
-        if (i == MPID_NEM_LMT_RTS_QUEUE_SIZE) {
+        if (i == MPID_nem_lmt_rts_queue_size) {
             i = -1;
             continue;
         }
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
index 6a1715b..d312f4c 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
@@ -814,7 +814,7 @@ int MPID_nem_lmt_shm_vc_terminated(MPIDI_VC_t *vc)
 
         /* If there is anything in the RTS queue, it needs to be cleared out. */
         MPIU_THREAD_CS_ENTER(LMT,);
-        for (i = 0; i < MPID_NEM_LMT_RTS_QUEUE_SIZE; i++) {
+        for (i = 0; i < MPID_nem_lmt_rts_queue_size; i++) {
             if (MPI_REQUEST_NULL != MPID_nem_lmt_rts_queue[i]) {
                 MPID_Request_get_ptr(MPID_nem_lmt_rts_queue[i], req);
                 MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Checking RTS message");
diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c
index 9e7fc5b..ee267bd 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c
@@ -24,6 +24,7 @@ MPID_nem_cell_ptr_t MPID_nem_prefetched_cell = 0;
 unsigned short *MPID_nem_recv_seqno = 0;
 
 int *MPID_nem_lmt_rts_queue;
+int MPID_nem_lmt_rts_queue_size;
 int MPID_nem_lmt_rts_queue_last_inserted = 0;
 
 #undef FUNCNAME
@@ -71,8 +72,9 @@ MPID_nem_mpich_init(void)
      * RTS requests. If we run out of space, we'll just drop the extra
      * requests. This won't cause a matching problem, it will just prevent FT
      * from working for those requests that get dropped. */
-    MPIU_CHKPMEM_MALLOC(MPID_nem_lmt_rts_queue, int *, sizeof(int) * MPID_NEM_LMT_RTS_QUEUE_SIZE, mpi_errno, "lmt rts queue");
-    for (i = 0; i < MPID_NEM_LMT_RTS_QUEUE_SIZE; i++)
+    MPID_nem_lmt_rts_queue_size = MPIR_CVAR_NEM_LMT_RTS_QUEUE_SIZE;
+    MPIU_CHKPMEM_MALLOC(MPID_nem_lmt_rts_queue, int *, sizeof(int) * MPID_nem_lmt_rts_queue_size, mpi_errno, "lmt rts queue");
+    for (i = 0; i < MPID_nem_lmt_rts_queue_size; i++)
         MPID_nem_lmt_rts_queue[i] = MPI_REQUEST_NULL;
 
     MPIU_CHKPMEM_COMMIT();

http://git.mpich.org/mpich.git/commitdiff/017668158a62efeb696c0af1efad8a0ddd8f64b7

commit 017668158a62efeb696c0af1efad8a0ddd8f64b7
Author: Wesley Bland <wbland at anl.gov>
Date:   Sun Nov 2 16:04:20 2014 -0600

    Add a warning if the RTS queue is overflowed
    
    When the RTS queue fills up the first time, print out a warning to let
    the user know that they've done it and FT won't be provided anymore.
    
    Signed-off-by: Huiwei Lu <huiweilu at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
index f7396c8..8fcd181 100644
--- a/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
+++ b/src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c
@@ -28,6 +28,8 @@ static int pkt_CTS_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t
 static int pkt_DONE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
 static int pkt_COOKIE_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp);
 
+static int warning_printed = 0;
+
 #undef FUNCNAME
 #define FUNCNAME MPID_nem_lmt_pkthandler_init
 #undef FCNAME
@@ -107,11 +109,9 @@ int MPID_nem_lmt_RndvSend(MPID_Request **sreq_p, const void * buf, int count,
     if (MPI_SUCCESS == mpi_errno) {
         /* If this loops all the way around and can't find a place to put the
          * RTS request, it will just drop the request and leave it out of the
-         * queue silently. This should only affect FT and not matching so we'll
-         * consider this ok for now. */
-        for (i = MPID_nem_lmt_rts_queue_last_inserted + 1;
-             i != MPID_nem_lmt_rts_queue_last_inserted;
-             i++) {
+         * queue. It will print a message to warn the user. This should only
+         * affect FT and not matching so we'll consider this ok for now. */
+        for (i = MPID_nem_lmt_rts_queue_last_inserted + 1; ; i++) {
             if (i == MPID_NEM_LMT_RTS_QUEUE_SIZE) {
                 i = -1;
                 continue;
@@ -122,6 +122,12 @@ int MPID_nem_lmt_RndvSend(MPID_Request **sreq_p, const void * buf, int count,
                 MPID_nem_lmt_rts_queue_last_inserted = i;
                 break;
             }
+
+            if (i == MPID_nem_lmt_rts_queue_last_inserted && !warning_printed) {
+                MPIU_Internal_error_printf("LMT RTS queue exceeded. FT not provided for overflowed messages.\n");
+                warning_printed = 1;
+                break;
+            }
         }
     }
     MPIU_THREAD_CS_EXIT(LMT,);

-----------------------------------------------------------------------

Summary of changes:
 .../ch3/channels/nemesis/include/mpid_nem_impl.h   |    2 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c   |   38 ++++++++++++++++----
 .../ch3/channels/nemesis/src/mpid_nem_lmt_shm.c    |    2 +-
 src/mpid/ch3/channels/nemesis/src/mpid_nem_mpich.c |    6 ++-
 4 files changed, 37 insertions(+), 11 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list