[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.2-96-gc4ee1fb

Service Account noreply at mpich.org
Mon Aug 18 03:05:23 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  c4ee1fbe686735d42a05cfe06e867da1e2d93536 (commit)
       via  14958c758e435ad246e8282ccbb72e12eec94acc (commit)
       via  b6564c6374ce3ae9d553f212d1f707aabffb84c2 (commit)
      from  e8585d7c7f5666a29bf9d74ef6daa56db84f4f86 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c4ee1fbe686735d42a05cfe06e867da1e2d93536

commit c4ee1fbe686735d42a05cfe06e867da1e2d93536
Author: Norio Yamaguchi <norio.yamaguchi at riken.jp>
Date:   Fri Aug 8 14:11:49 2014 +0900

    Fix memory leak in netmod-IB
    
    Some memories allocated in an initialization process are not
    released when using shared memory for communication.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_init.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_init.c
index 35b2ab2..fba362b 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_init.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_init.c
@@ -604,7 +604,16 @@ int MPID_nem_ib_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_m
 #endif
 #else /* define(MPID_NEM_IB_ONDEMAND)  */
     /* We need to communicate with all other ranks in close sequence.  */
-    MPID_nem_ib_conns_ref_count = MPID_nem_ib_nranks - 1;
+    MPID_nem_ib_conns_ref_count = MPID_nem_ib_nranks - MPID_nem_mem_region.num_local;
+
+    for (i = 0; i < MPID_nem_mem_region.num_local; i++) {
+        if (MPID_nem_mem_region.local_procs[i] != MPID_nem_ib_myrank) {
+            ibcom_errno =
+                MPID_nem_ib_com_close(MPID_nem_ib_scratch_pad_fds
+                                      [MPID_nem_mem_region.local_procs[i]]);
+            MPID_nem_ib_scratch_pad_fds_ref_count--;
+        }
+    }
 #endif
 
     MPIU_Free(remote_rank_str);

http://git.mpich.org/mpich.git/commitdiff/14958c758e435ad246e8282ccbb72e12eec94acc

commit 14958c758e435ad246e8282ccbb72e12eec94acc
Author: Norio Yamaguchi <norio.yamaguchi at riken.jp>
Date:   Mon Aug 4 16:15:07 2014 +0900

    Fix memory release operation in race-condition
    
    There is a case to operate a memory released in race-condition of
    CM-operation. In order to avoid it, set ref_count to 3 and release
    the memory after completing the operation.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
index 710c1de..7523127 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
@@ -2561,6 +2561,7 @@ int MPID_nem_ib_cm_drain_scq()
             dprintf("cm_drain_scq,tx=%d\n", shadow_cm->req->ibcom->outstanding_connection_tx);
             dprintf("cm_drain_scq,syn,buf_from=%p,sz=%d\n", shadow_cm->buf_from,
                     shadow_cm->buf_from_sz);
+            MPID_nem_ib_cm_request_release(shadow_cm->req);
             MPID_nem_ib_rdmawr_from_free(shadow_cm->buf_from, shadow_cm->buf_from_sz);
             MPIU_Free(shadow_cm);
             break;
@@ -3228,9 +3229,11 @@ int MPID_nem_ib_cm_poll()
                 MPID_nem_ib_send_progress(MPID_nem_ib_conns[req->responder_rank].vc);
                 /* Let the following connection request go */
                 VC_FIELD(MPID_nem_ib_conns[req->responder_rank].vc, connection_guard) = 0;
-                /* free memory : req->ref_count is 2, so call MPIU_Free() directly */
-                //MPID_nem_ib_cm_request_release(req);
-                MPIU_Free(req);
+                /* Call cm_request_release twice.
+                 * If ref_count == 2, the memory of request is released here.
+                 * If ref_count == 3, the memory of request will be released on draining SCQ of SYN. */
+                MPID_nem_ib_cm_request_release(req);
+                MPID_nem_ib_cm_request_release(req);
             }
             //goto common_tail;
             break;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_send.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_send.c
index 66ef497..901dbd8 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_send.c
@@ -1677,7 +1677,7 @@ int MPID_nem_ib_cm_cas(MPIDI_VC_t * vc, uint32_t ask_on_connect)
     MPIU_ERR_CHKANDJUMP(!req, mpi_errno, MPI_ERR_OTHER, "**malloc");
     dprintf("req=%p\n", req);
     req->state = MPID_NEM_IB_CM_CAS;
-    req->ref_count = 2; /* Released on receiving ACK2 and draining SCQ of ACK1 */
+    req->ref_count = 3; /* Released on receiving ACK2 and draining SCQ of SYN and ACK1 */
     req->retry_backoff = 0;
     req->initiator_rank = MPID_nem_ib_myrank;
     req->responder_rank = vc->pg_rank;

http://git.mpich.org/mpich.git/commitdiff/b6564c6374ce3ae9d553f212d1f707aabffb84c2

commit b6564c6374ce3ae9d553f212d1f707aabffb84c2
Author: Masamichi Takagi <masamichi.takagi at riken.jp>
Date:   Fri Aug 1 19:08:35 2014 +0900

    Remove always-true comparison
    
    Remove (0 <= <variable>) comparison using a variable with the type
    of uint16_t to eliminate a compile-time warning.
    
    Signed-off-by: Pavan Balaji <balaji at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
index 4195152..710c1de 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c
@@ -1477,7 +1477,7 @@ int MPID_nem_ib_recv_buf_released(struct MPIDI_VC *vc, void *user_data)
         (unsigned long) ((uint8_t *) user_data -
                          (uint8_t *) vc_ib->ibcom->remote_ringbuf->start) /
         MPID_NEM_IB_COM_RDMABUF_SZSEG;
-    MPIU_Assert(0 <= index_slot && index_slot < (uint16_t) (vc_ib->ibcom->remote_ringbuf->nslot));
+    MPIU_Assert(index_slot < (uint16_t) (vc_ib->ibcom->remote_ringbuf->nslot));
     dprintf("released,user_data=%p,mem=%p,sub=%08lx,index_slot=%d\n",
             user_data, vc_ib->ibcom->remote_ringbuf->start,
             (unsigned long) user_data -

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/ib/ib_init.c |   11 ++++++++++-
 src/mpid/ch3/channels/nemesis/netmod/ib/ib_poll.c |   11 +++++++----
 src/mpid/ch3/channels/nemesis/netmod/ib/ib_send.c |    2 +-
 3 files changed, 18 insertions(+), 6 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list