[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-427-g524aeed

Service Account noreply at mpich.org
Fri Sep 2 23:34:44 CDT 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  524aeedc0918261477c46b736fa2ce72085d5115 (commit)
       via  710d18392e4356593ebeea34bf5c1f28e1747645 (commit)
       via  4f0d6c12a6f20387db09d1e6557ed71fe9882fae (commit)
       via  532430583eb9ec01d2c20aefafad15874c0da00f (commit)
       via  e049ef4358cdcbab8f3e6ac4da3cefa7cc88ce62 (commit)
       via  7ededee9220b263f87668b991e8d1c642ff76072 (commit)
      from  1c63de860856b416864625ef7be4b379e729d99a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/524aeedc0918261477c46b736fa2ce72085d5115

commit 524aeedc0918261477c46b736fa2ce72085d5115
Author: Min Si <msi at anl.gov>
Date:   Thu Jun 23 18:01:48 2016 -0500

    Added empty symbol for dynamic process routines.
    
    In original code, the dynamic process routine can be disabled by setting
    MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS. Functions exposed to external code
    must have empty symbols to avoid compiling failure.
    
    Signed-off-by: Yanfei Guo <yguo at anl.gov>

diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 44e05b2..f6959ad 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1016,6 +1016,7 @@ typedef struct MPIDI_Port_Ops {
 #define MPIDI_PORTFNS_VERSION 1
 int MPIDI_CH3_PortFnsInit( MPIDI_PortFns * );
 
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
 /* Utility routines provided in src/ch3u_port.c for working with connection
    queues */
 int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag);
@@ -1023,6 +1024,15 @@ int MPIDI_Port_finalize(void);
 
 int MPIDI_CH3I_Port_init(int port_name_tag);
 int MPIDI_CH3I_Port_destroy(int port_name_tag);
+#else
+/* Need empty symbols to avoid failure at compile time if defined
+ * MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS. */
+#define MPIDI_CH3I_Acceptq_enqueue(vc, port_name_tag) (MPI_SUCCESS)
+#define MPIDI_Port_finalize() (MPI_SUCCESS)
+
+#define MPIDI_CH3I_Port_init(port_name_tag) (MPI_SUCCESS)
+#define MPIDI_CH3I_Port_destroy(port_name_tag) (MPI_SUCCESS)
+#endif
 /*--------------------------
   END MPI PORT SECTION 
   --------------------------*/
@@ -1661,8 +1671,14 @@ int MPIDI_CH3_InitCompleted( void );
 #endif
 /* Routines in support of ch3 */
 
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
 /* Routine to return the tag associated with a port */
 int MPIDI_GetTagFromPort( const char *, int * );
+#else
+/* Need empty symbol to avoid failure at compile time if defined
+ * MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS. */
+#define MPIDI_GetTagFromPort(port_name, port_name_tag) (MPI_SUCCESS)
+#endif
 
 /* Here are the packet handlers */
 int MPIDI_CH3_PktHandler_EagerSend( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 

http://git.mpich.org/mpich.git/commitdiff/710d18392e4356593ebeea34bf5c1f28e1747645

commit 710d18392e4356593ebeea34bf5c1f28e1747645
Author: Min Si <msi at anl.gov>
Date:   Fri Jun 17 18:08:09 2016 -0500

    Added info "timeout" (in seconds) for MPI_Comm_connect.
    
    The default threshold of connection timeout is set by
    MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT. User code can specify per connect
    threshold through "timeout" info (in seconds).
    
    Signed-off-by: Yanfei Guo <yguo at anl.gov>

diff --git a/src/mpid/ch3/errnames.txt b/src/mpid/ch3/errnames.txt
index 3cd2831..9204942 100644
--- a/src/mpid/ch3/errnames.txt
+++ b/src/mpid/ch3/errnames.txt
@@ -33,6 +33,7 @@
 **ch3|pmi_finalize:PMI_Finalize failed
 **ch3|pmi_finalize %d:PMI_Finalize failed, error %d
 **ch3|conntimeout:Connection timed out
+**ch3|conntimeout %d:Connection timed out in %d seconds
 **ch3|portclose:Port is unexpectedly closed
 
 #
diff --git a/src/mpid/ch3/src/ch3u_port.c b/src/mpid/ch3/src/ch3u_port.c
index 8dab65d..6fb1ed0 100644
--- a/src/mpid/ch3/src/ch3u_port.c
+++ b/src/mpid/ch3/src/ch3u_port.c
@@ -252,7 +252,7 @@ static MPIDI_CH3I_Port_connreq_q_t revoked_connreq_q = {NULL, NULL, 0};
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
 static int MPIDI_Create_inter_root_communicator_connect(const char *port_name, 
-							MPIR_Comm **comm_pptr,
+							int timeout, MPIR_Comm **comm_pptr,
 							MPIDI_VC_t **vc_pptr)
 {
     int mpi_errno = MPI_SUCCESS;
@@ -290,6 +290,9 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
         MPID_Time_t time_sta, time_now;
         double time_gap = 0;
 
+        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                       (MPL_DBG_FDEST, "connect: waiting accept in %d(s)", timeout));
+
         MPID_Wtime(&time_sta);
         do {
             mpi_errno = MPID_Progress_poke();
@@ -300,7 +303,7 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
             MPID_Wtime_diff(&time_sta, &time_now, &time_gap);
             /* FIXME: not thread-safe */
         } while (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_INITED
-                 && (int) time_gap < MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT);
+                 && (int) time_gap < timeout);
     }
 
     switch (connreq->stat) {
@@ -317,7 +320,8 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
 
         MPIDI_CH3I_Port_connreq_q_enqueue(&revoked_connreq_q, connreq);
         MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, REVOKE);
-        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_PORT, "**ch3|conntimeout");
+        MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_PORT, "**ch3|conntimeout",
+                             "**ch3|conntimeout %d", timeout);
         break;
 
     case MPIDI_CH3I_PORT_CONNREQ_ERR_CLOSE:
@@ -627,10 +631,22 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
 
     if (rank == root)
     {
+        int timeout = MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT;
+
+        /* Check if user specifies timeout threshold. */
+        if (info != NULL) {
+            int info_flag = 0;
+            char info_value[MPI_MAX_INFO_VAL + 1];
+            MPIR_Info_get_impl(info, "timeout", MPI_MAX_INFO_VAL, info_value, &info_flag);
+            if (info_flag) {
+                timeout = atoi(info_value);
+            }
+        }
+
 	/* Establish a communicator to communicate with the root on the 
 	   other side. */
 	mpi_errno = MPIDI_Create_inter_root_communicator_connect(
-	    port_name, &tmp_comm, &new_vc);
+	    port_name, timeout, &tmp_comm, &new_vc);
 	if (mpi_errno != MPI_SUCCESS) {
 	    MPIR_ERR_POP_LABEL(mpi_errno, no_port);
 	}

http://git.mpich.org/mpich.git/commitdiff/4f0d6c12a6f20387db09d1e6557ed71fe9882fae

commit 4f0d6c12a6f20387db09d1e6557ed71fe9882fae
Author: Min Si <msi at anl.gov>
Date:   Fri Jun 17 17:36:10 2016 -0500

    Removed unused function declaration.
    
    Signed-off-by: Yanfei Guo <yguo at anl.gov>

diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index 2202130..44e05b2 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -1019,11 +1019,6 @@ int MPIDI_CH3_PortFnsInit( MPIDI_PortFns * );
 /* Utility routines provided in src/ch3u_port.c for working with connection
    queues */
 int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag);
-#ifdef MPIDI_CH3_CHANNEL_AVOIDS_SELECT
-int MPIDI_CH3_Complete_Acceptq_dequeue(MPIDI_VC_t * vc);
-#else
-#define MPIDI_CH3_Complete_Acceptq_dequeue(vc)  MPI_SUCCESS
-#endif
 int MPIDI_Port_finalize(void);
 
 int MPIDI_CH3I_Port_init(int port_name_tag);

http://git.mpich.org/mpich.git/commitdiff/532430583eb9ec01d2c20aefafad15874c0da00f

commit 532430583eb9ec01d2c20aefafad15874c0da00f
Author: Min Si <msi at anl.gov>
Date:   Fri Jun 17 17:06:35 2016 -0500

    Added support for MPI_Comm_connect timeout.
    
    To support connection timeout (defined in standard), we add additional
    handshake protocol in the vc establishing step in accept/connect
    routines. If no matching accept issued on the server process, the
    connect call on client process can return after specified threshold
    MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT. An MPI error MPI_ERR_PORT will be
    returned. Detailed algorithm is described at the beginning of
    ch3u_port.c.
    
    Signed-off-by: Yanfei Guo <yguo at anl.gov>

diff --git a/src/mpid/ch3/errnames.txt b/src/mpid/ch3/errnames.txt
index a3308bc..3cd2831 100644
--- a/src/mpid/ch3/errnames.txt
+++ b/src/mpid/ch3/errnames.txt
@@ -32,6 +32,8 @@
 **ch3|close_progress:an error occurred while the device was waiting for all open connections to close
 **ch3|pmi_finalize:PMI_Finalize failed
 **ch3|pmi_finalize %d:PMI_Finalize failed, error %d
+**ch3|conntimeout:Connection timed out
+**ch3|portclose:Port is unexpectedly closed
 
 #
 # RMA errors
diff --git a/src/mpid/ch3/include/mpid_port.h b/src/mpid/ch3/include/mpid_port.h
new file mode 100644
index 0000000..4463d39
--- /dev/null
+++ b/src/mpid/ch3/include/mpid_port.h
@@ -0,0 +1,200 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2016 by Argonne National Laboratory.
+ *     See COPYRIGHT in top-level directory.
+ */
+#ifndef MPID_PORT_H_
+#define MPID_PORT_H_
+
+#include "mpl_utlist.h"
+
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+
+/* Header file only used by port connect/accept routines (ch3u_port.c) */
+
+typedef enum {
+    MPIDI_CH3I_PORT_CONNREQ_INITED,     /* Connection request initialized. */
+    MPIDI_CH3I_PORT_CONNREQ_REVOKE,     /* Client started revoking a timed out connection request. */
+    MPIDI_CH3I_PORT_CONNREQ_ACCEPT,     /* Server started accepting a connection request. */
+    MPIDI_CH3I_PORT_CONNREQ_ACCEPTED,   /* Server successfully accepted this request (i.e.,
+                                         * client does not revoke it). */
+    MPIDI_CH3I_PORT_CONNREQ_ERR_CLOSE,  /* Error state -- Server closed unexpectedly.
+                                         * (called close_port or finalize while client is
+                                         * still waiting acceptance).
+                                         * Note: revoke-state request will be directly
+                                         * changed to free-state. */
+    MPIDI_CH3I_PORT_CONNREQ_FREE,       /* Started freeing a connection request.
+                                         * (VC has been locally closed and ready for blocking
+                                         * wait on termination).
+                                         * A connection request can be eventually freed when:
+                                         * (1) connection normally completed;
+                                         * (2) a timed out connection (revoke) request received
+                                         *     accept packet;
+                                         * (3) server closed unexpectedly.*/
+} MPIDI_CH3I_Port_connreq_stat_t;
+
+typedef struct MPIDI_CH3I_Port_connreq {
+    MPIDI_VC_t *vc;
+    MPIDI_CH3I_Port_connreq_stat_t stat;
+    struct MPIDI_CH3I_Port_connreq *next;
+} MPIDI_CH3I_Port_connreq_t;
+
+typedef struct MPIDI_CH3I_Port_connreq_q {
+    MPIDI_CH3I_Port_connreq_t *head;
+    MPIDI_CH3I_Port_connreq_t *tail;
+    int size;
+} MPIDI_CH3I_Port_connreq_q_t;
+
+typedef struct MPIDI_CH3I_Port {
+    int port_name_tag;
+    MPIDI_CH3I_Port_connreq_q_t accept_connreq_q;
+    struct MPIDI_CH3I_Port *next;
+} MPIDI_CH3I_Port_t;
+
+typedef struct MPIDI_CH3I_Port_q {
+    MPIDI_CH3I_Port_t *head;
+    MPIDI_CH3I_Port_t *tail;
+    int size;
+} MPIDI_CH3I_Port_q_t;
+
+#define MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, new_stat)     \
+                (connreq->stat = MPIDI_CH3I_PORT_CONNREQ_##new_stat)
+
+
+/* Start VC closing protocol -- locally close VC.
+ * It is the 1st step of VC closing protocol (see ch3u_handle_connection.c):
+ *      local_closed / remote_closed -> close_acked -> closed. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_local_close_vc
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline int MPIDI_CH3I_Port_local_close_vc(MPIDI_VC_t * vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    /* Note that this routine is usually called in comm_release after matched in
+     * an accept call. Here we do not have comm for the VC, thus just close it
+     * with a dummy rank (this parameter is only for debugging) */
+    if (vc->state == MPIDI_VC_STATE_ACTIVE || vc->state == MPIDI_VC_STATE_REMOTE_CLOSE) {
+        mpi_errno = MPIDI_CH3U_VC_SendClose(vc, 0 /* dummy rank */);
+    }
+
+    return mpi_errno;
+}
+
+/*** Utility routines for connection request queues ***/
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_connreq_q_enqueue
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline void MPIDI_CH3I_Port_connreq_q_enqueue(MPIDI_CH3I_Port_connreq_q_t * connreq_q,
+                                                     MPIDI_CH3I_Port_connreq_t * connreq)
+{
+    MPL_LL_APPEND(connreq_q->head, connreq_q->tail, connreq);
+    connreq_q->size++;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_connreq_q_dequeue
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline void MPIDI_CH3I_Port_connreq_q_dequeue(MPIDI_CH3I_Port_connreq_q_t * connreq_q,
+                                                     MPIDI_CH3I_Port_connreq_t ** connreq_ptr)
+{
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL;
+
+    /* delete head */
+    if (connreq_q->head != NULL) {
+        connreq = connreq_q->head;
+
+        MPL_LL_DELETE(connreq_q->head, connreq_q->tail, connreq);
+        connreq_q->size--;
+    }
+
+    (*connreq_ptr) = connreq;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_connreq_q_delete
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline void MPIDI_CH3I_Port_connreq_q_delete(MPIDI_CH3I_Port_connreq_q_t * connreq_q,
+                                                    MPIDI_CH3I_Port_connreq_t * connreq)
+{
+    MPL_LL_DELETE(connreq_q->head, connreq_q->tail, connreq);
+    connreq_q->size--;
+}
+
+
+
+/*** Utility routines for issuing ACK packets in accept/connect routine ***/
+
+/* Reply ACK packet to server accept routine. Client replies this packet after
+ * received an accept packet from server. Handled in MPIDI_CH3_PktHandler_AcceptAck.
+ * ACK - True means client matched with server acceptance;
+ * ACK - False means client already started revoking, thus acceptance fails. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_issue_accept_ack
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline int MPIDI_CH3I_Port_issue_accept_ack(MPIDI_VC_t * vc, int ack)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIR_Request *req_ptr = NULL;
+    MPIDI_CH3_Pkt_t spkt;
+    MPIDI_CH3_Pkt_accept_ack_t *ack_pkt = &spkt.accept_ack;
+
+    MPIDI_Pkt_init(ack_pkt, MPIDI_CH3_PKT_ACCEPT_ACK);
+    ack_pkt->ack = ack;
+
+    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                    (MPL_DBG_FDEST, "issuing accpet_ack packet to vc %p, ack=%d", vc, ack));
+
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, ack_pkt, sizeof(MPIDI_CH3_Pkt_t), &req_ptr);
+    if (mpi_errno != MPI_SUCCESS)
+        return mpi_errno;
+
+    if (req_ptr != NULL)
+        MPIR_Request_free(req_ptr);     /* Only reduce reference, released after pkt sent. */
+
+    return mpi_errno;
+}
+
+/* Reply ACK packet to client connect routine. Server replies this packet in
+ * accept call for dequeued connection request, or replies immediately when
+ * received a request for non-existing port. Handled in MPIDI_CH3_PktHandler_ConnAck
+ * on client.
+ * ACK - True means server started accepting this connection request;
+ * ACK - False means port does not exist or being closed on server, thus connection
+ *       fails. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_issue_conn_ack
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static inline int MPIDI_CH3I_Port_issue_conn_ack(MPIDI_VC_t * vc, int ack)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIR_Request *req_ptr = NULL;
+    MPIDI_CH3_Pkt_t spkt;
+    MPIDI_CH3_Pkt_conn_ack_t *ack_pkt = &spkt.conn_ack;
+
+    MPIDI_Pkt_init(ack_pkt, MPIDI_CH3_PKT_CONN_ACK);
+    ack_pkt->ack = ack;
+
+    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                    (MPL_DBG_FDEST, "issuing conn_ack packet to vc %p, ack=%d", vc, ack));
+
+    mpi_errno = MPIDI_CH3_iStartMsg(vc, ack_pkt, sizeof(MPIDI_CH3_Pkt_t), &req_ptr);
+    if (mpi_errno != MPI_SUCCESS)
+        return mpi_errno;
+
+    if (req_ptr != NULL)
+        MPIR_Request_free(req_ptr);     /* Only reduce reference, released after pkt sent. */
+
+    return mpi_errno;
+}
+
+#endif /* MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS */
+
+#endif /* MPID_PORT_H_ */
diff --git a/src/mpid/ch3/include/mpidimpl.h b/src/mpid/ch3/include/mpidimpl.h
index e7d2456..2202130 100644
--- a/src/mpid/ch3/include/mpidimpl.h
+++ b/src/mpid/ch3/include/mpidimpl.h
@@ -694,6 +694,10 @@ typedef struct MPIDI_VC
     /* port name tag */ 
     int port_name_tag; /* added to handle dynamic process mgmt */
     
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+    void *connreq_obj;  /* pointer to dynamic connection mgmt object */
+#endif
+
 #if defined(MPID_USE_SEQUENCE_NUMBERS)
     /* Sequence number of the next packet to be sent */
     MPID_Seqnum_t seqnum_send;
@@ -1015,12 +1019,15 @@ int MPIDI_CH3_PortFnsInit( MPIDI_PortFns * );
 /* Utility routines provided in src/ch3u_port.c for working with connection
    queues */
 int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag);
-int MPIDI_CH3I_Acceptq_dequeue(MPIDI_VC_t ** vc, int port_name_tag);
 #ifdef MPIDI_CH3_CHANNEL_AVOIDS_SELECT
 int MPIDI_CH3_Complete_Acceptq_dequeue(MPIDI_VC_t * vc);
 #else
 #define MPIDI_CH3_Complete_Acceptq_dequeue(vc)  MPI_SUCCESS
 #endif
+int MPIDI_Port_finalize(void);
+
+int MPIDI_CH3I_Port_init(int port_name_tag);
+int MPIDI_CH3I_Port_destroy(int port_name_tag);
 /*--------------------------
   END MPI PORT SECTION 
   --------------------------*/
@@ -1723,6 +1730,15 @@ int MPIDI_CH3_PktHandler_FlowCntlUpdate( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
 					 intptr_t *, MPIR_Request ** );
 int MPIDI_CH3_PktHandler_Close( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 
 				intptr_t *, MPIR_Request ** );
+
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+/* packet handlers used in dynamic process connection. */
+int MPIDI_CH3_PktHandler_ConnAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                 intptr_t * buflen, MPIR_Request ** rreqp);
+int MPIDI_CH3_PktHandler_AcceptAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                   intptr_t * buflen, MPIR_Request ** rreqp);
+#endif /* end of MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS */
+
 int MPIDI_CH3_PktHandler_EndCH3( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *,
 				 intptr_t *, MPIR_Request ** );
 int MPIDI_CH3_PktHandler_Revoke(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
diff --git a/src/mpid/ch3/include/mpidpkt.h b/src/mpid/ch3/include/mpidpkt.h
index 1abc133..5566589 100644
--- a/src/mpid/ch3/include/mpidpkt.h
+++ b/src/mpid/ch3/include/mpidpkt.h
@@ -103,6 +103,11 @@ typedef enum {
     MPIDI_CH3_PKT_FLOW_CNTL_UPDATE,     /* FIXME: Unused */
     MPIDI_CH3_PKT_CLOSE,
     MPIDI_CH3_PKT_REVOKE,
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+    /* Dynamic Connection Management */
+    MPIDI_CH3_PKT_CONN_ACK,
+    MPIDI_CH3_PKT_ACCEPT_ACK,
+#endif
     MPIDI_CH3_PKT_END_CH3,
     /* The channel can define additional types by defining the value
      * MPIDI_CH3_PKT_ENUM */
@@ -840,6 +845,16 @@ typedef struct MPIDI_CH3_Pkt_close {
     int ack;
 } MPIDI_CH3_Pkt_close_t;
 
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+/* packet types used in dynamic process connection. */
+typedef struct MPIDI_CH3_Pkt_conn_ack {
+    MPIDI_CH3_Pkt_type_t type;
+    int ack;
+} MPIDI_CH3_Pkt_conn_ack_t;
+
+typedef MPIDI_CH3_Pkt_conn_ack_t MPIDI_CH3_Pkt_accept_ack_t;
+#endif /* end of MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS */
+
 typedef struct MPIDI_CH3_Pkt_revoke {
     MPIDI_CH3_Pkt_type_t type;
     MPIR_Context_id_t revoked_comm;
@@ -872,6 +887,10 @@ typedef union MPIDI_CH3_Pkt {
     MPIDI_CH3_Pkt_ack_t ack;
     MPIDI_CH3_Pkt_decr_at_counter_t decr_at_cnt;
     MPIDI_CH3_Pkt_close_t close;
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+    MPIDI_CH3_Pkt_conn_ack_t conn_ack;
+    MPIDI_CH3_Pkt_accept_ack_t accept_ack;
+#endif
     MPIDI_CH3_Pkt_cas_t cas;
     MPIDI_CH3_Pkt_cas_resp_t cas_resp;
     MPIDI_CH3_Pkt_fop_t fop;
diff --git a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
index 9f0673e..e66c6d6 100644
--- a/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
+++ b/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
@@ -563,6 +563,13 @@ int MPIDI_CH3_PktHandler_Init( MPIDI_CH3_PktHandler_Fcn *pktArray[],
     pktArray[MPIDI_CH3_PKT_CLOSE] =
 	MPIDI_CH3_PktHandler_Close;
 
+#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
+    /* Dynamic Connection Management */
+    pktArray[MPIDI_CH3_PKT_CONN_ACK] =
+            MPIDI_CH3_PktHandler_ConnAck;
+    pktArray[MPIDI_CH3_PKT_ACCEPT_ACK] =
+            MPIDI_CH3_PktHandler_AcceptAck;
+#endif
     /* Provision for flow control */
     pktArray[MPIDI_CH3_PKT_FLOW_CNTL_UPDATE] = 0;
 
diff --git a/src/mpid/ch3/src/ch3u_port.c b/src/mpid/ch3/src/ch3u_port.c
index a24d350..8dab65d 100644
--- a/src/mpid/ch3/src/ch3u_port.c
+++ b/src/mpid/ch3/src/ch3u_port.c
@@ -5,6 +5,27 @@
  */
 
 #include "mpidi_ch3_impl.h"
+#include "mpid_port.h"
+
+/*
+=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
+
+cvars:
+    - name        : MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT
+      category    : CH3
+      type        : int
+      default     : 180
+      class       : none
+      verbosity   : MPI_T_VERBOSITY_USER_BASIC
+      scope       : MPI_T_SCOPE_GROUP_EQ
+      description : >-
+        The default time out period in seconds for a connection attempt to the
+        server communicator where the named port exists but no pending accept.
+        User can change the value for a specified connection through its info
+        argument.
+
+=== END_MPI_T_CVAR_INFO_BLOCK ===
+*/
 
 /*
  * This file replaces ch3u_comm_connect.c and ch3u_comm_accept.c .  These
@@ -50,6 +71,17 @@ static int SetupNewIntercomm( MPIR_Comm *comm_ptr, int remote_comm_size,
 			      MPIR_Comm *intercomm );
 static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
 					  MPIDI_VC_t *vc_ptr, int is_low_group, int context_id_offset);
+
+static int MPIDI_CH3I_Acceptq_dequeue(MPIDI_CH3I_Port_connreq_t ** connreq_ptr, int port_name_tag);
+static int MPIDI_CH3I_Acceptq_cleanup(MPIDI_CH3I_Port_connreq_q_t * accept_connreq_q);
+
+static int MPIDI_CH3I_Revokeq_cleanup(void);
+
+static int MPIDI_CH3I_Port_connreq_create(MPIDI_VC_t * vc,
+                                          MPIDI_CH3I_Port_connreq_t ** connreq_ptr);
+static int MPIDI_CH3I_Port_connreq_free(MPIDI_CH3I_Port_connreq_t * connreq);
+
+
 /* ------------------------------------------------------------------------- */
 /*
  * Structure of this file and the connect/accept algorithm:
@@ -78,6 +110,98 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
  * routine MPIDI_CH3I_Acceptq_dequeue).  This routine returns the matched
  * virtual connection (VC).
  *
+ * -----------------------------------------------------------------------------
+ * To support connection timeout, we add additional handshake protocol in above
+ * VC establishing step. The implementation includes init/destroy processing in
+ * port open/close routine and MPI_Finalize, packet handlers, and modified code
+ * in following subroutines:
+ *   - MPIDI_Create_inter_root_communicator_connect
+ *   - and MPIDI_Create_inter_root_communicator_accept
+ *
+ * 1. Every connection attempt is described as an *request object* on both sides,
+ *    with following state change (see MPIDI_CH3I_Port_connreq_stat_t).
+ *    - Connecting side: INITED -> REVOKE / ACCEPTED / ERR_CLOSE -> FREE
+ *    - Accepting side : INITED -> ACCEPT -> (ACCEPTED) -> FREE
+ *
+ * 2. Handshake protocol:
+ *    I. Connecting side:
+ *      a. After VC is created, a connection *request object* is created with
+ *         INITED state, then we pokes progress to wait the state being changed
+ *         in specified timeout period (default CVAR).
+ *      b. If waiting time exceeded threshold, set state to REVOKE.
+ *      c. Progress engine receives a MPIDI_CH3_PKT_CONN_ACK packet indicating
+ *         whether the other side could accept the request.
+ *         - ACK (TRUE): if request state is INITED, then set to ACCEPTED and
+ *           reply a MPIDI_CH3_PKT_ACCEPT_ACK packet with TRUE ACK; otherwise
+ *           such request is already being revoked, then set to FREE and reply
+ *           packet with FALSE ACK.
+ *         - ACK (FALSE): if request state is INITED, then set to ERR_CLOSE;
+ *           otherwise set to FREE (see MPIDI_CH3_PktHandler_ConnAck).
+ *      d. If the process is still waiting, then it checks the changed state:
+ *         if request is ACCEPTED, then continue connection; otherwise report
+ *         MPI_ERR_PORT -- unexpected port closing.
+ *
+ *    II. Accepting side:
+ *      a. If port exists, the new VC is enqueued into the accept_queue and
+ *         described as a *request object* with INITED state; otherwise, reply a
+ *         MPIDI_CH3_PKT_CONN_ACK packet with FALSE ACK (closed port)
+ *         (see MPIDI_CH3I_Acceptq_dequeue routine).
+ *      b. After VC is dequeued in accept routine, send a MPIDI_CH3_PKT_CONN_ACK
+ *         packet with TRUE ACK to connecting side and change state to ACCEPT.
+ *         Then we poll progress to wait for state change.
+ *      c. Progress engine receives a MPIDI_CH3_PKT_ACCEPT_ACK packet indicating
+ *         whether the other side can match the acceptance (not being revoked).
+ *         - ACK (TRUE): change request state to ACCEPTED
+ *         - ACK (FALSE): set state to FREE
+ *         (see MPIDI_CH3_PktHandler_AcceptAck).
+ *      d. In accept routine, process checks the changed state. If it became
+ *         ACCEPTED, then finish acceptance; otherwise free this request and VC
+ *         and then wait for next coming request.
+ *
+ * 3. Resource cleanup. In case of timed out connection, following user code
+ *    shall be considered as correct program (no description in standard yet).
+ *    Thus the resource of first connect must be cleaned up before exit.
+ *              ================================================
+ *              Server:               Client:
+ *              open_port();          connect(); ** timed out **
+ *              accept();             connect();
+ *              close_port();
+ *              ================================================
+ *    (* The most critical part is closing VC because of closing synchronization
+ *    with the other side (see note in MPIDI_CH3I_Port_local_close_vc and
+ *    ch3u_handle_connection.c), VC object cannot be freed before it became
+ *    INACTIVE state, otherwise segfault !)
+ *
+ *    Here is the design to ensure resource cleanup. It also covers most
+ *    incorrect user code (e.g., no accept, no close_port), except no finalize.
+ *    I. Connecting side :
+ *      (use revoked_connreq_q)
+ *      a. A REVOKE state request is enqueued to revoked_connreq_q (in 2-I-b).
+ *      b. If request changed state INITED->ERR_CLOSE in 2-I-c, we free such
+ *         request in the connect call before return (step 2-I-d).
+ *      c. If REVOKE->FREE in step 2-I-c, we start VC closing in packet handler,
+ *         and free request and VC at finalize (see MPIDI_CH3I_Revokeq_cleanup).
+ *      d. For any REVOKE state request at finalize, both request object and VC
+ *         will be eventually freed once the other side started finalize.
+ *
+ *    II. Accepting side :
+ *      (use port_queue, port->accept_queue, global unexpected_queue)
+ *      a. We create a port object for every opened port and each of them holds
+ *         a separate accept queue (to ensure no mess in multiple-ports).
+ *      b. If request becomes FREE in 2-II-a, we start VC closing there and
+ *         enqueue it to global unexpected_queue.
+ *      c. If request updated state ACCEPT->FREE in 2-II-c, we start VC closing
+ *         in packet handler and free both request and VC in accept call (2-II-d).
+ *      d. For any requests still in port->accept_queue at close_port or finalize,
+ *         issue MPIDI_CH3_PKT_CONN_ACK packet with FALSE ACK to the other side,
+ *         then blocking free both VC and request object there. It also ensures
+ *         3-I-d (see MPIDI_CH3I_Acceptq_cleanup).
+ *      e. For any requests in global unexpected_queue, we blocking free both VC
+ *         and request at finalize (see MPIDI_Port_finalize).
+ *
+ * - END of connection timeout support.
+ * -----------------------------------------------------------------------------
+ *
  * Once both sides have established there VC, they both invoke
  * MPIDI_CH3I_Initialize_tmp_comm to create a temporary intercommunicator.
  * A temporary intercommunicator is constructed so that we can use
@@ -102,7 +226,21 @@ static int MPIDI_CH3I_Initialize_tmp_comm(MPIR_Comm **comm_pptr,
  */
 /* ------------------------------------------------------------------------- */
 
-/* 
+
+/*** Queues for supporting connection timeout ***/
+
+/* Server side queues
+ *  - unexpected connection requests queue (global)
+ *  - active port queue */
+static MPIDI_CH3I_Port_connreq_q_t unexpt_connreq_q = {NULL, NULL, 0};
+static MPIDI_CH3I_Port_q_t active_portq = {NULL, NULL, 0};
+
+/* Client side queue
+ * - revoked connection requests (i.e., timeout) */
+static MPIDI_CH3I_Port_connreq_q_t revoked_connreq_q = {NULL, NULL, 0};
+
+
+/*
  * These next two routines are used to create a virtual connection
  * (VC) and a temporary intercommunicator that can be used to 
  * communicate between the two "root" processes for the 
@@ -121,6 +259,8 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
     MPIR_Comm *tmp_comm;
     MPIDI_VC_t *connect_vc = NULL;
     int port_name_tag;
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL;
+
     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
 
     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
@@ -140,6 +280,67 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
 	MPIR_ERR_POP(mpi_errno);
     }
 
+    mpi_errno = MPIDI_CH3I_Port_connreq_create(connect_vc, &connreq);
+    MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Can't create communicator connection object.");
+
+    /* Poke progress to wait server response for the connection request
+     * before timed out. The response is handled in MPIDI_CH3_PktHandler_ConnResp
+     * in progress.*/
+    {
+        MPID_Time_t time_sta, time_now;
+        double time_gap = 0;
+
+        MPID_Wtime(&time_sta);
+        do {
+            mpi_errno = MPID_Progress_poke();
+            if (mpi_errno != MPI_SUCCESS)
+                MPIR_ERR_POP(mpi_errno);
+
+            MPID_Wtime(&time_now);
+            MPID_Wtime_diff(&time_sta, &time_now, &time_gap);
+            /* FIXME: not thread-safe */
+        } while (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_INITED
+                 && (int) time_gap < MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT);
+    }
+
+    switch (connreq->stat) {
+    case MPIDI_CH3I_PORT_CONNREQ_ACCEPTED:
+        /* Successfully matched an acceptance, then finish connection. */
+        MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT, VERBOSE, "Matched with server accept");
+        break;
+
+    case MPIDI_CH3I_PORT_CONNREQ_INITED:
+        /* Connection timed out.
+         * Enqueue to revoked queue. Packet handler will notify server when
+         * when server starts on it. The request will be released at finalize. */
+        MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT, VERBOSE, "Connection timed out");
+
+        MPIDI_CH3I_Port_connreq_q_enqueue(&revoked_connreq_q, connreq);
+        MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, REVOKE);
+        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_PORT, "**ch3|conntimeout");
+        break;
+
+    case MPIDI_CH3I_PORT_CONNREQ_ERR_CLOSE:
+        /* Unexpected port closing on server.
+         * The same as no port case, return MPI_ERR_PORT. Now we caught the error,
+         * close vc and free connection request at fn_fail. */
+        MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                    "Error - remote closed without matching this connection");
+
+        mpi_errno = MPIDI_CH3I_Port_local_close_vc(connreq->vc);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIR_ERR_POP(mpi_errno);
+
+        MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, FREE);
+        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_PORT, "**ch3|portclose");
+        break;
+
+    default:
+        /* Unexpected status, internal error. */
+        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**unknown");
+        break;
+    }
+
     mpi_errno = MPIDI_CH3I_Initialize_tmp_comm(&tmp_comm, connect_vc, 1, port_name_tag);
     if (mpi_errno != MPI_SUCCESS) {
 	MPIR_ERR_POP(mpi_errno);
@@ -148,10 +349,18 @@ static int MPIDI_Create_inter_root_communicator_connect(const char *port_name,
     *comm_pptr = tmp_comm;
     *vc_pptr = connect_vc;
 
+    MPL_free(connreq);
+
  fn_exit:
     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_CONNECT);
     return mpi_errno;
  fn_fail:
+    if (connreq != NULL) {
+      int mpi_errno2 = MPI_SUCCESS;
+      mpi_errno2 = MPIDI_CH3I_Port_connreq_free(connreq);
+      if (mpi_errno2)
+          MPIR_ERR_ADD(mpi_errno, mpi_errno2);
+    }
     goto fn_exit;
 }
 
@@ -171,6 +380,8 @@ static int MPIDI_Create_inter_root_communicator_accept(const char *port_name,
     MPIDI_VC_t *new_vc = NULL;
     MPID_Progress_state progress_state;
     int port_name_tag;
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL;
+
     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
 
     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CREATE_INTER_ROOT_COMMUNICATOR_ACCEPT);
@@ -183,28 +394,77 @@ static int MPIDI_Create_inter_root_communicator_accept(const char *port_name,
 
     /* FIXME: Describe the algorithm used here, and what routine 
        is user on the other side of this connection */
-    /* dequeue the accept queue to see if a connection with the
-       root on the connect side has been formed in the progress
-       engine (the connection is returned in the form of a vc). If
-       not, poke the progress engine. */
+    /* dequeue the accept queue to see if a connection request with
+     * the root on the connect side has been formed in the progress
+     * engine. If not, poke the progress engine; If a new connection
+     * request has be found, then we start accepting such request by
+     * sending an ACK packet to client; Pork progress engine unless
+     * we get response from client (state changed). */
 
     MPID_Progress_start(&progress_state);
-    for(;;)
-    {
-	MPIDI_CH3I_Acceptq_dequeue(&new_vc, port_name_tag);
-	if (new_vc != NULL)
-	{
-	    break;
-	}
+    for (;;) {
+        int matched = 0;
 
-	mpi_errno = MPID_Progress_wait(&progress_state);
-	/* --BEGIN ERROR HANDLING-- */
-	if (mpi_errno)
-	{
-	    MPID_Progress_end(&progress_state);
-	    MPIR_ERR_POP(mpi_errno);
-	}
-	/* --END ERROR HANDLING-- */
+        if (connreq == NULL) {
+            mpi_errno = MPIDI_CH3I_Acceptq_dequeue(&connreq, port_name_tag);
+            if (mpi_errno)
+                MPIR_ERR_POP(mpi_errno);
+        }
+
+        if (connreq != NULL && connreq->stat == MPIDI_CH3I_PORT_CONNREQ_INITED) {
+            new_vc = connreq->vc;
+
+            /* locally accept */
+            MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, ACCEPT);
+
+            /* Send connection ACK: accept to client */
+            mpi_errno = MPIDI_CH3I_Port_issue_conn_ack(connreq->vc, TRUE /*accept*/);
+            MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Cannot issue acceptance packet");
+
+            MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                        "Sent acceptance to client, waiting for ACK");
+        }
+
+        /* Wait either new connection request or response packet for existing one. */
+        mpi_errno = MPID_Progress_wait(&progress_state);
+        /* --BEGIN ERROR HANDLING-- */
+        if (mpi_errno != MPI_SUCCESS) {
+            MPID_Progress_end(&progress_state);
+            MPIR_ERR_POP(mpi_errno);
+        }
+        /* --END ERROR HANDLING-- */
+
+        /* Packet handler received response from client, check updated state */
+        if (connreq != NULL && connreq->stat != MPIDI_CH3I_PORT_CONNREQ_ACCEPT) {
+            switch (connreq->stat) {
+            case MPIDI_CH3I_PORT_CONNREQ_ACCEPTED:
+                /* Matched, now finish acceptance. */
+                MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT, VERBOSE, "Matched with client connect");
+                matched = 1;    /* leave loop */
+                break;
+
+            case MPIDI_CH3I_PORT_CONNREQ_FREE:
+                /* Client revoked, free connection request.*/
+                MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                            "Connection is already closed on client");
+
+                /* Client already started vc closing process, thus it is safe to
+                 * blocking wait here till vc freed. */
+                mpi_errno = MPIDI_CH3I_Port_connreq_free(connreq);
+                if (mpi_errno != MPI_SUCCESS)
+                    MPIR_ERR_POP(mpi_errno);
+
+                connreq = NULL;
+                break;  /* continue while loop */
+            default:
+                /* report internal error -- unexpected state */
+                MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**unknown");
+                break;
+            }
+        }
+
+        if (matched)
+            break;
     }
     MPID_Progress_end(&progress_state);
 
@@ -216,6 +476,8 @@ static int MPIDI_Create_inter_root_communicator_accept(const char *port_name,
     *comm_pptr = tmp_comm;
     *vc_pptr = new_vc;
 
+    MPL_free(connreq);
+
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,VERBOSE,(MPL_DBG_FDEST,
 		  "new_vc=%p", new_vc));
 
@@ -224,6 +486,12 @@ fn_exit:
     return mpi_errno;
 
 fn_fail:
+    if(connreq != NULL) {
+        int mpi_errno2 = MPI_SUCCESS;
+        mpi_errno2 = MPIDI_CH3I_Port_connreq_free(connreq);
+        if (mpi_errno2)
+            MPIR_ERR_ADD(mpi_errno, mpi_errno2);
+    }
     goto fn_exit;
 }
 
@@ -537,6 +805,7 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
     }
  no_port:
     {
+        int mpi_errno_noport = MPI_SUCCESS;
         int mpi_errno2 = MPI_SUCCESS;
 
        /* broadcast error notification to other processes */
@@ -544,7 +813,11 @@ int MPIDI_Comm_connect(const char *port_name, MPIR_Info *info, int root,
         recv_ints[0] = -1;
         recv_ints[1] = -1;
         recv_ints[2] = -1;
-        MPIR_ERR_SET1(mpi_errno, MPI_ERR_PORT, "**portexist", "**portexist %s", port_name);
+
+        /* append no port error message */
+        MPIR_ERR_SET1(mpi_errno_noport, MPI_ERR_PORT, "**portexist", "**portexist %s", port_name);
+        MPIR_ERR_ADD(mpi_errno_noport, mpi_errno);
+        mpi_errno = mpi_errno_noport;
 
         /* notify other processes to return an error */
         MPL_DBG_MSG(MPIDI_CH3_DBG_CONNECT,VERBOSE,"broadcasting 3 ints: error case");
@@ -1250,18 +1523,11 @@ static int FreeNewVC( MPIDI_VC_t *new_vc )
    designed and documented.
 */
 
-typedef struct MPIDI_CH3I_Acceptq_s
-{
-    struct MPIDI_VC *vc;
-    int             port_name_tag;
-    struct MPIDI_CH3I_Acceptq_s *next;
-}
-MPIDI_CH3I_Acceptq_t;
-
-static MPIDI_CH3I_Acceptq_t * acceptq_head=0;
-static int maxAcceptQueueSize = 0;
-static int AcceptQueueSize    = 0;
-
+/* Enqueue a connection request from client. This routine is called from netmod
+ * (i.e., TCP) if received dynamic connection from others. If port exists, we
+ * enqueue the request to that port's accept queue to wait for an accept call to
+ * serve it; otherwise, such request should be discarded, thus we immediately send
+ * nack back to client and start closing. */
 #undef FUNCNAME
 #define FUNCNAME MPIDI_CH3I_Acceptq_enqueue
 #undef FCNAME
@@ -1269,91 +1535,501 @@ static int AcceptQueueSize    = 0;
 int MPIDI_CH3I_Acceptq_enqueue(MPIDI_VC_t * vc, int port_name_tag )
 {
     int mpi_errno=MPI_SUCCESS;
-    MPIDI_CH3I_Acceptq_t *q_item;
-    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL;
+    MPIDI_CH3I_Port_t *port = NULL;
 
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
 
-    /* FIXME: Use CHKPMEM */
-    q_item = (MPIDI_CH3I_Acceptq_t *)
-        MPL_malloc(sizeof(MPIDI_CH3I_Acceptq_t));
-    /* --BEGIN ERROR HANDLING-- */
-    if (q_item == NULL)
-    {
-        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIDI_CH3I_Acceptq_t" );
-	goto fn_exit;
-    }
-    /* --END ERROR HANDLING-- */
+    MPL_LL_SEARCH_SCALAR(active_portq.head, port, port_name_tag, port_name_tag);
+
+    /* Find port object by using port_name_tag. */
+    mpi_errno = MPIDI_CH3I_Port_connreq_create(vc, &connreq);
+    MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Can't create communicator connection object.");
 
-    q_item->vc		  = vc;
-    q_item->port_name_tag = port_name_tag;
+    /* No port exists if port is not opened or already closed (incorrect user code).
+     * Thus we just start closing VC here. */
+    if (port == NULL) {
+        /* Notify connecting client. */
+        mpi_errno = MPIDI_CH3I_Port_issue_conn_ack(connreq->vc, FALSE /* closed port */);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIR_ERR_POP(mpi_errno);
 
-    /* Keep some statistics on the accept queue */
-    AcceptQueueSize++;
-    if (AcceptQueueSize > maxAcceptQueueSize) 
-	maxAcceptQueueSize = AcceptQueueSize;
+        /* Start VC closing protocol. */
+        mpi_errno = MPIDI_CH3I_Port_local_close_vc(connreq->vc);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIR_ERR_POP(mpi_errno);
 
-    /* FIXME: Stack or queue? */
-    MPL_DBG_MSG_P(MPIDI_CH3_DBG_CONNECT,TYPICAL,"vc=%p:Enqueuing accept connection",vc);
-    q_item->next = acceptq_head;
-    acceptq_head = q_item;
-    
- fn_exit:
+        MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, FREE);
+
+        /* Enqueue unexpected VC to avoid waiting progress recursively.
+         * these VCs will be freed in finalize. */
+        MPIDI_CH3I_Port_connreq_q_enqueue(&unexpt_connreq_q, connreq);
+        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                        (MPL_DBG_FDEST, "Enqueued conn %p to unexpected queue with tag %d, vc=%p",
+                         connreq, port_name_tag, vc));
+    }
+    else {
+        /* Enqueue to accept queue, thus next accept call can serve it. */
+        MPIDI_CH3I_Port_connreq_q_enqueue(&port->accept_connreq_q, connreq);
+        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                        (MPL_DBG_FDEST, "Enqueued conn %p to accept queue with tag %d, vc=%p",
+                         connreq, port_name_tag, vc));
+
+        /* signal for new enqueued VC, thus progress wait can return in accept. */
+        MPIDI_CH3_Progress_signal_completion();
+    }
+
+  fn_exit:
     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_ENQUEUE);
     return mpi_errno;
+  fn_fail:
+    if (connreq)
+        MPIDI_CH3I_Port_connreq_free(connreq);
+    goto fn_exit;
 }
 
 
-/* Attempt to dequeue a vc from the accept queue. If the queue is
-   empty or the port_name_tag doesn't match, return a NULL vc. */
+/* Attempt to dequeue a connection request from the accept queue. If the queue
+ * is empty return a NULL object. */
 #undef FUNCNAME
 #define FUNCNAME MPIDI_CH3I_Acceptq_dequeue
 #undef FCNAME
 #define FCNAME MPL_QUOTE(FUNCNAME)
-int MPIDI_CH3I_Acceptq_dequeue(MPIDI_VC_t ** vc, int port_name_tag)
+int MPIDI_CH3I_Acceptq_dequeue(MPIDI_CH3I_Port_connreq_t ** connreq_ptr, int port_name_tag)
 {
     int mpi_errno=MPI_SUCCESS;
-    MPIDI_CH3I_Acceptq_t *q_item, *prev;
-    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
+    MPIDI_CH3I_Port_t *port = NULL;
 
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
 
-    *vc = NULL;
-    q_item = acceptq_head;
-    prev = q_item;
+    /* Find port object by using port_name_tag. */
+    MPL_LL_SEARCH_SCALAR(active_portq.head, port, port_name_tag, port_name_tag);
+    MPIR_Assert(port != NULL);  /* Port is always initialized in open_port. */
 
-    while (q_item != NULL)
-    {
-	if (q_item->port_name_tag == port_name_tag)
-	{
-	    *vc = q_item->vc;
+    MPIDI_CH3I_Port_connreq_q_dequeue(&port->accept_connreq_q, connreq_ptr);
+    if ((*connreq_ptr) != NULL) {
+        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                        (MPL_DBG_FDEST, "conn=%p:Dequeued accept connection with tag %d, vc=%p",
+                         (*connreq_ptr), port_name_tag, (*connreq_ptr)->vc));
+    }
 
-	    if ( q_item == acceptq_head )
-		acceptq_head = q_item->next;
-	    else
-		prev->next = q_item->next;
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
+    return mpi_errno;
+}
 
-	    MPL_free(q_item);
-	    AcceptQueueSize--;
-	    break;;
-	}
-	else
-	{
-	    prev = q_item;
-	    q_item = q_item->next;
-	}
+/* Clean up received new VCs that are not accepted or closed in accept
+ * calls (e.g., mismatching accept and connect).This routine is called in
+ * MPIDI_CH3I_Port_destroy(close_port) and MPIDI_Port_finalize (finalize).
+ * Note that we already deleted port from active_port queue before cleaning up
+ * its accept queue, thus no new VC can be enqueued concurrently. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Acceptq_cleanup
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static int MPIDI_CH3I_Acceptq_cleanup(MPIDI_CH3I_Port_connreq_q_t * accept_connreq_q)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL, *connreq_tmp = NULL;
+
+    MPL_LL_FOREACH_SAFE(accept_connreq_q->head, connreq, connreq_tmp) {
+        MPIDI_CH3I_Port_connreq_q_delete(accept_connreq_q, connreq);
+
+        /* Notify connecting client. */
+        mpi_errno = MPIDI_CH3I_Port_issue_conn_ack(connreq->vc, FALSE /* closed port */);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIR_ERR_POP(mpi_errno);
+
+        /* Start VC closing protocol. */
+        mpi_errno = MPIDI_CH3I_Port_local_close_vc(connreq->vc);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIR_ERR_POP(mpi_errno);
+
+        MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, FREE);
+
+        /* Free connection request (blocking wait till VC closed). */
+        mpi_errno = MPIDI_CH3I_Port_connreq_free(connreq);
+        if (mpi_errno != MPI_SUCCESS)
+            MPIR_ERR_POP(mpi_errno);
     }
-    
-    mpi_errno = MPIDI_CH3_Complete_Acceptq_dequeue(*vc);
 
-    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT,TYPICAL,
-	      (MPL_DBG_FDEST,"vc=%p:Dequeuing accept connection with tag %d",
-	       *vc,port_name_tag));
+    MPIR_Assert(accept_connreq_q->size == 0);
 
-    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_ACCEPTQ_DEQUEUE);
+  fn_exit:
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+
+/*** Utility routines for revoked connection requests   ***/
+
+/* Clean up revoked requests in connect (e.g., timed out connect).
+ * We do not want to wait for these VCs being freed in timed out connect,
+ * because it is blocked till the server calls a matching accept or close_port.
+ * This routine is called in finalize on client process. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Revokeq_cleanup
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static int MPIDI_CH3I_Revokeq_cleanup(void)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL, *connreq_tmp = NULL;
+
+    MPL_LL_FOREACH_SAFE(revoked_connreq_q.head, connreq, connreq_tmp) {
+        MPID_Progress_state progress_state;
+        MPIDI_CH3I_Port_connreq_q_delete(&revoked_connreq_q, connreq);
+
+        /* Blocking wait till the request is freed on server. */
+        if (connreq->stat != MPIDI_CH3I_PORT_CONNREQ_FREE) {
+            MPID_Progress_start(&progress_state);
+            do {
+                mpi_errno = MPID_Progress_wait(&progress_state);
+                /* --BEGIN ERROR HANDLING-- */
+                if (mpi_errno != MPI_SUCCESS) {
+                    MPID_Progress_end(&progress_state);
+                    MPIR_ERR_POP(mpi_errno);
+                }
+                /* --END ERROR HANDLING-- */
+            } while (connreq->stat != MPIDI_CH3I_PORT_CONNREQ_FREE);
+            MPID_Progress_end(&progress_state);
+        }
+
+        /* Release connection (blocking wait till VC closed). */
+        MPIDI_CH3I_Port_connreq_free(connreq);
+    }
+
+    MPIR_Assert(revoked_connreq_q.size == 0);
+
+  fn_exit:
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+/*** Packet handlers exposed to progress engine  ***/
+
+/* Packet handler to handle response (connection ACK) on client process. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_PktHandler_ConnAck
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+int MPIDI_CH3_PktHandler_ConnAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                 intptr_t * buflen, MPIR_Request ** rreqp)
+{
+    MPIDI_CH3_Pkt_conn_ack_t *ack_pkt = &pkt->conn_ack;
+    MPIDI_CH3I_Port_connreq_t *connreq = (MPIDI_CH3I_Port_connreq_t *) (vc->connreq_obj);
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIR_Assert(connreq != NULL);
+
+    /* Report unknown error, unexpectedly get response for remote
+     * revoked connection. */
+    if (connreq->stat != MPIDI_CH3I_PORT_CONNREQ_INITED &&
+        connreq->stat != MPIDI_CH3I_PORT_CONNREQ_REVOKE)
+        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_INTERN, "**unknown");
+
+    if (ack_pkt->ack == TRUE) {
+        if (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_INITED) {
+            MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                            (MPL_DBG_FDEST, "received ACK true for vc %p: inited->accepted", vc));
+
+            /* Reply to server */
+            mpi_errno = MPIDI_CH3I_Port_issue_accept_ack(connreq->vc, TRUE /* accept matched */);
+            MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Cannot issue accept-matched packet");
+
+            MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, ACCEPTED);
+        }
+        else if (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_REVOKE) {
+            MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                            (MPL_DBG_FDEST, "received ACK true for vc %p: revoke->free", vc));
+
+            /* Reply to server */
+            mpi_errno = MPIDI_CH3I_Port_issue_accept_ack(connreq->vc, FALSE /* locally revoked */);
+            MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Cannot issue revoke packet");
+
+            /* Start freeing connection request.
+             * Note that we do not blocking close VC here, instead, we close VC
+             * in MPIDI_CH3I_Revokeq_cleanup at finalize. This is because
+             * VC close packets might be received following this packet, thus if
+             * we blocked here we can never read that packet. */
+            mpi_errno = MPIDI_CH3I_Port_local_close_vc(connreq->vc);
+            MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Cannot locally close VC");
+
+            MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, FREE);
+        }
+    }
+    else {      /* ack == FALSE */
+        if (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_INITED) {
+            MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                            (MPL_DBG_FDEST, "received ACK false for vc %p: inited->err_close", vc));
+
+            /* Server closed port without issuing accept, client
+             * connect call will catch this error and return MPI_ERR_PORT. */
+            MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, ERR_CLOSE);
+        }
+        else if (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_REVOKE) {
+            MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                            (MPL_DBG_FDEST, "received ACK false for vc %p: revoke->free", vc));
+
+            /* Start VC closing, and set connection ready-for-free. */
+            mpi_errno = MPIDI_CH3I_Port_local_close_vc(connreq->vc);
+            MPIR_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Cannot locally close VC");
+
+            MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, FREE);
+        }
+    }
+
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+    *rreqp = NULL;
+
+  fn_exit:
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+/* Packet handler to handle response (acceptance ACK) on server process. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_PktHandler_AcceptAck
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+int MPIDI_CH3_PktHandler_AcceptAck(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt,
+                                   intptr_t * buflen, MPIR_Request ** rreqp)
+{
+    MPIDI_CH3_Pkt_accept_ack_t *ack_pkt = &pkt->accept_ack;
+    MPIDI_CH3I_Port_connreq_t *connreq = (MPIDI_CH3I_Port_connreq_t *) (vc->connreq_obj);
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIR_Assert(connreq != NULL);
+
+    /* Acceptance matched, finish accept. */
+    if (ack_pkt->ack == TRUE) {
+        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                        (MPL_DBG_FDEST, "received (accept) ACK true for vc %p: accept->match", vc));
+
+        MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, ACCEPTED);
+    }
+    else {
+        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CONNECT, VERBOSE,
+                        (MPL_DBG_FDEST, "received (accept) ACK false for vc %p: accept->close",
+                         vc));
+
+        /* Client already left, close VC.
+         * Note that accept call does not return when client timed out,
+         * thus we only change the state and let accept call handle closing. */
+        mpi_errno = MPIDI_CH3I_Port_local_close_vc(connreq->vc);
+        if (mpi_errno)
+            MPIR_ERR_POP(mpi_errno);
+
+        MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, FREE);
+    }
+
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+    *rreqp = NULL;
+
+  fn_exit:
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+/*** Routines for connection request creation and freeing  ***/
+
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_connreq_create
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static int MPIDI_CH3I_Port_connreq_create(MPIDI_VC_t * vc, MPIDI_CH3I_Port_connreq_t ** connreq_ptr)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_Port_connreq_t *connreq = NULL;
+
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_MALLOC(connreq, MPIDI_CH3I_Port_connreq_t *, sizeof(MPIDI_CH3I_Port_connreq_t),
+                        mpi_errno, "comm_conn");
+
+    connreq->vc = vc;
+    MPIDI_CH3I_PORT_CONNREQ_SET_STAT(connreq, INITED);
+
+    /* Netmod may not change VC to active when connection established (i.e., sock).
+     * Instead, it is changed in CH3 layer (e.g., isend, RMA).*/
+    if (vc->state == MPIDI_VC_STATE_INACTIVE)
+        MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
+
+    vc->connreq_obj = (void *) connreq; /* to get connection request in packet handlers */
+    *connreq_ptr = connreq;
+
+  fn_exit:
+    MPIR_CHKPMEM_COMMIT();
     return mpi_errno;
+  fn_fail:
+    MPIR_CHKPMEM_REAP();
+    goto fn_exit;
 }
 
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_connreq_free
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+static int MPIDI_CH3I_Port_connreq_free(MPIDI_CH3I_Port_connreq_t * connreq)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    /* Skip if connection request is still in revoked state.
+     * Because packet handler may be talking to server. */
+    if (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_REVOKE)
+        return mpi_errno;
+
+    /* Expected free, the remote side should also start closing too,
+     * thus we blocking close VC here. */
+    if (connreq->stat == MPIDI_CH3I_PORT_CONNREQ_FREE) {
+        mpi_errno = FreeNewVC(connreq->vc);
+    }
+    else {
+        /* Unexpected free, the remote side might not be able to close
+         * VC at this point. Thus we cannot blocking close VC. */
+        mpi_errno = MPIDI_CH3_VC_Destroy(connreq->vc);
+    }
+
+    /* Always free connection request.
+     * Because it is only used in connect/accept routine. */
+    MPL_free(connreq);
+
+    return mpi_errno;
+}
+
+
+/*** Routines to initialize / destroy dynamic connection  ***/
+
+/* Initialize port's accept queue. It is called in MPIDI_Open_port. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_init
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+int MPIDI_CH3I_Port_init(int port_name_tag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_Port_t *port = NULL;
+
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PORT_INIT);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PORT_INIT);
+
+    MPIR_CHKPMEM_DECL(1);
+    MPIR_CHKPMEM_MALLOC(port, MPIDI_CH3I_Port_t *, sizeof(MPIDI_CH3I_Port_t),
+                        mpi_errno, "comm_port");
+
+    port->port_name_tag = port_name_tag;
+    port->accept_connreq_q.head = port->accept_connreq_q.tail = 0;
+    port->accept_connreq_q.size = 0;
+    port->next = NULL;
+
+    MPL_LL_APPEND(active_portq.head, active_portq.tail, port);
+    active_portq.size++;
+
+  fn_exit:
+    MPIR_CHKPMEM_COMMIT();
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PORT_INIT);
+    return mpi_errno;
+  fn_fail:
+    MPIR_CHKPMEM_REAP();
+    goto fn_exit;
+}
+
+/* Destroy port's accept queue. It is called in MPIDI_Close_port. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Port_destroy
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+int MPIDI_CH3I_Port_destroy(int port_name_tag)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_CH3I_Port_t *port = NULL;
+
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PORT_DESTROY);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PORT_DESTROY);
+
+    MPL_LL_SEARCH_SCALAR(active_portq.head, port, port_name_tag, port_name_tag);
+    if (port != NULL) {
+        MPL_LL_DELETE(active_portq.head, active_portq.tail, port);
+
+        mpi_errno = MPIDI_CH3I_Acceptq_cleanup(&port->accept_connreq_q);
+        if (mpi_errno)
+            MPIR_ERR_POP(mpi_errno);
+
+        MPL_free(port);
+        active_portq.size--;
+    }
+
+  fn_exit:
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PORT_DESTROY);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
+
+/* This routine is called by MPID_Finalize to clean up dynamic connections. */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_Port_finalize
+#undef FCNAME
+#define FCNAME MPL_QUOTE(FUNCNAME)
+int MPIDI_Port_finalize(void)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PORT_FINALIZE);
+    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PORT_FINALIZE);
+
+    /* Server side clean up. */
+
+    /* - Clean up all active ports.
+     * Note that if a process is both server and client, we will never
+     * deadlock here because only server cleanup issues closing packets.
+     * All closing process on client is handled in progress. */
+    {
+        MPIDI_CH3I_Port_t *port = NULL, *port_tmp = NULL;
+
+        MPL_LL_FOREACH_SAFE(active_portq.head, port, port_tmp) {
+            /* destroy all opening ports. */
+            MPL_LL_DELETE(active_portq.head, active_portq.tail, port);
+
+            mpi_errno = MPIDI_CH3I_Acceptq_cleanup(&port->accept_connreq_q);
+            MPL_free(port);
+            active_portq.size--;
+        }
+        MPIR_Assert(active_portq.size == 0);
+    }
+
+    /* - Destroy all unexpected connection requests.
+     * The closing protocol already started when we got them in acceptq_enqueue,
+     * so just blocking wait for final release. */
+    {
+        MPIDI_CH3I_Port_connreq_t *connreq = NULL, *connreq_tmp = NULL;
+
+        MPL_LL_FOREACH_SAFE(unexpt_connreq_q.head, connreq, connreq_tmp) {
+            MPIDI_CH3I_Port_connreq_q_delete(&unexpt_connreq_q, connreq);
+            mpi_errno = MPIDI_CH3I_Port_connreq_free(connreq);
+            if (mpi_errno)
+                MPIR_ERR_POP(mpi_errno);
+        }
+        MPIR_Assert(unexpt_connreq_q.size == 0);
+    }
+
+    /* Client side clean up. */
+
+    /* - Destroy all revoked connection requests. */
+    mpi_errno = MPIDI_CH3I_Revokeq_cleanup();
+    if (mpi_errno)
+        MPIR_ERR_POP(mpi_errno);
+
+  fn_exit:
+    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PORT_FINALIZE);
+    return mpi_errno;
+  fn_fail:
+    goto fn_exit;
+}
 #else  /* MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS is defined */
 
 #endif /* MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS */
diff --git a/src/mpid/ch3/src/mpid_finalize.c b/src/mpid/ch3/src/mpid_finalize.c
index 0bf8586..a5fe615 100644
--- a/src/mpid/ch3/src/mpid_finalize.c
+++ b/src/mpid/ch3/src/mpid_finalize.c
@@ -95,6 +95,10 @@ int MPID_Finalize(void)
     
     /* Re-enabling the close step because many tests are failing
      * without it, particularly under gforker */
+
+    mpi_errno = MPIDI_Port_finalize();
+    if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
+
 #if 1
     /* FIXME: The close actions should use the same code as the other
        connection close code */
diff --git a/src/mpid/ch3/src/mpid_port.c b/src/mpid/ch3/src/mpid_port.c
index e306338..7cf4fe1 100644
--- a/src/mpid/ch3/src/mpid_port.c
+++ b/src/mpid/ch3/src/mpid_port.c
@@ -312,6 +312,8 @@ static int MPIDI_Open_port(MPIR_Info *info_ptr, char *port_name)
     mpi_errno = MPIDI_CH3_Get_business_card(myRank, port_name, len);
     MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE, (MPL_DBG_FDEST, "port_name = %s", port_name));
 
+    mpi_errno = MPIDI_CH3I_Port_init(port_name_tag);
+
 fn_exit:
     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_OPEN_PORT);
     return mpi_errno;
@@ -339,6 +341,8 @@ static int MPIDI_Close_port(const char *port_name)
 
     free_port_name_tag(port_name_tag);
 
+    mpi_errno = MPIDI_CH3I_Port_destroy(port_name_tag);
+
 fn_exit:
     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CLOSE_PORT);
     return mpi_errno;

http://git.mpich.org/mpich.git/commitdiff/e049ef4358cdcbab8f3e6ac4da3cefa7cc88ce62

commit e049ef4358cdcbab8f3e6ac4da3cefa7cc88ce62
Author: Min Si <msi at anl.gov>
Date:   Fri Jun 17 17:07:46 2016 -0500

    Disable vc state change in tcp if vc started closing.
    
    Tcp netmod updated vc to ACTIVE state when connection is established.
    It can happen in the first message. However, if the first message is vc
    closing packet, that vc can never be closed. Because, during vc closing,
    process needs do handshake with remote peer and change state:
    	LOCAL_CLOSE/REMOTE_CLOSE -> CLOSE_ACKED -> CLOSED -> INACTIVE
    The next state change depends on the current state. If such state is
    changed LOCAL_CLOSE->ACTIVE in netmod, the closing process can never
    finish. This patch fixed it. Tcp netmod only updates state when the vc
    is INACTIVE.
    
    Signed-off-by: Yanfei Guo <yguo at anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
index 2277dc8..c44b213 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
@@ -800,7 +800,11 @@ int MPID_nem_tcp_connect(struct MPIDI_VC *const vc)
     /* We have an active connection, start polling more often */
     MPID_nem_tcp_skip_polls = MAX_SKIP_POLLS_ACTIVE;
 
-    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
+    /* only update VC state when it is not being closed.
+     * Note that we still need change state here if the VC is passively
+     * connected (i.e., server in dynamic process connection) */
+    if (vc->state == MPIDI_VC_STATE_INACTIVE)
+        MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
 
     if (vc_tcp->state == MPID_NEM_TCP_VC_STATE_DISCONNECTED) {
         struct sockaddr_in *sock_addr;
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
index 135b69e..fc73e8d 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
@@ -213,7 +213,11 @@ int MPID_nem_tcp_conn_est (MPIDI_VC_t *vc)
 
     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CONN_EST);
 
-    MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
+    /* only update VC state when it is not being closed.
+     * Note that we still need change state here if the VC is passively
+     * connected (i.e., server in dynamic process connection) */
+    if (vc->state == MPIDI_VC_STATE_INACTIVE)
+        MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
 
     if (!MPIDI_CH3I_Sendq_empty(vc_tcp->send_queue))
     {

http://git.mpich.org/mpich.git/commitdiff/7ededee9220b263f87668b991e8d1c642ff76072

commit 7ededee9220b263f87668b991e8d1c642ff76072
Author: Min Si <msi at il.is.s.u-tokyo.ac.jp>
Date:   Mon May 23 18:31:58 2016 -0500

    Added tests to check comm_connect timeout.
    
    test/mpi/errors/spawn/connect_timeout.c checks the timeout
    implementation in MPI_Comm_connect. Two tests are generated:
    (1) no accept: no accept call on server;
    (2) mismatch: number of accept calls is less than amount of connect.
    
    Signed-off-by: Yanfei Guo <yguo at anl.gov>

diff --git a/test/mpi/errors/spawn/Makefile.am b/test/mpi/errors/spawn/Makefile.am
index c2f956f..a9f1b42 100644
--- a/test/mpi/errors/spawn/Makefile.am
+++ b/test/mpi/errors/spawn/Makefile.am
@@ -12,5 +12,12 @@ EXTRA_DIST = testlist
 ## for all programs that are just built from the single corresponding source
 ## file, we don't need per-target _SOURCES rules, automake will infer them
 ## correctly
-noinst_PROGRAMS = badport unpub lookup_name
+noinst_PROGRAMS = badport unpub lookup_name \
+				  connect_timeout_no_accept \
+				  connect_timeout_mismatch
 
+connect_timeout_no_accept_SOURCES  = connect_timeout.c
+connect_timeout_mismatch_SOURCES   = connect_timeout.c
+
+connect_timeout_no_accept_CPPFLAGS = -DTEST_NOACCEPT $(AM_CPPFLAGS)
+connect_timeout_mismatch_CPPFLAGS  = -DTEST_MISMATCH $(AM_CPPFLAGS)
\ No newline at end of file
diff --git a/test/mpi/errors/spawn/connect_timeout.c b/test/mpi/errors/spawn/connect_timeout.c
new file mode 100644
index 0000000..2bef859
--- /dev/null
+++ b/test/mpi/errors/spawn/connect_timeout.c
@@ -0,0 +1,225 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2016 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mpitest.h"
+
+#define IF_VERBOSE(cond, a) if (verbose && (cond)) { printf a ; fflush(stdout); }
+#define SERVER_GID 0
+#define CLIENT_GID 1
+#define TIMEOUT "5"
+
+/* This test checks whether a MPI_COMM_CONNECT can eventually time out and return
+ * an error of class MPI_ERR_PORT when the named port exists but no pending
+ * MPI_COMM_ACCEPT on server side. We divide all processes into a server group
+ * and a client group, the client group issues connect to the server group.
+ *
+ * Two test cases are performed in separately:
+ * (1) No_accept: Server only calls open/close_port while client tries to connect.
+ *     Each connect call should return MPI_ERR_PORT.
+ * (2) Mismatched: Server only issues one accept while client tries twice connect.
+ *     At least one of the connect call should return MPI_ERR_PORT. */
+
+int rank, nproc;
+int verbose = 0;
+
+int test_mismatched_accept(MPI_Comm intra_comm, int gid);
+int test_no_accept(MPI_Comm intra_comm, int gid);
+
+#if defined(TEST_MISMATCH)
+#define RUN_TEST(comm, gid) test_mismatched_accept(comm, gid)
+#else /* no accept by default */
+#define RUN_TEST(comm, gid) test_no_accept(comm, gid)
+#endif
+
+/* Check whether error clase is equal to the expected one. */
+static inline int check_errno(int mpi_errno, int expected_errno)
+{
+    int errclass = MPI_SUCCESS;
+    int err = 0;
+
+    MPI_Error_class(mpi_errno, &errclass);
+    if (errclass != expected_errno) {
+        char errstring[MPI_MAX_ERROR_STRING];
+        char exp_errstring[MPI_MAX_ERROR_STRING];
+        int errstrlen = 0;
+
+        err++;
+        MPI_Error_string(mpi_errno, errstring, &errstrlen);
+        MPI_Error_string(expected_errno, exp_errstring, &errstrlen);
+        fprintf(stderr, "Error: returned wrong error class %s, expected %s\n",
+                errstring, exp_errstring);
+    }
+
+    return err;
+}
+
+/* The first process in server group opens port and broadcast to all other
+ * processes in the world.  */
+static inline void open_and_bcast_port(int intra_rank, int gid, char (*port)[])
+{
+    int server_rank = 0, local_server_rank = 0;
+
+    /* server root opens port */
+    if (intra_rank == 0 && gid == SERVER_GID) {
+        local_server_rank = rank;
+
+        MPI_Open_port(MPI_INFO_NULL, (*port));
+        IF_VERBOSE(1, ("server root: opened port1: <%s>\n", (*port)));
+    }
+
+    /* broadcast world rank of server root, then broadcast port */
+    MPI_Allreduce(&local_server_rank, &server_rank, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Bcast((*port), MPI_MAX_PORT_NAME, MPI_CHAR, server_rank, MPI_COMM_WORLD);
+}
+
+int test_mismatched_accept(MPI_Comm intra_comm, int gid)
+{
+    char port[MPI_MAX_PORT_NAME];
+    MPI_Comm comm = MPI_COMM_NULL;
+    int mpi_errno = MPI_SUCCESS;
+    int errs = 0;
+    int intra_rank, intra_nproc;
+    int server_rank = 0, local_server_rank = 0;
+
+    MTEST_VG_MEM_INIT(port, MPI_MAX_PORT_NAME * sizeof(char));
+
+    MPI_Comm_rank(intra_comm, &intra_rank);
+    MPI_Comm_size(intra_comm, &intra_nproc);
+    IF_VERBOSE(intra_rank == 0, ("%s: TEST mismatched accept case, %d nprocs in group\n",
+                                 (gid == SERVER_GID) ? "server" : "client", intra_nproc));
+
+    open_and_bcast_port(intra_rank, gid, &port);
+
+    /* client group */
+    if (gid == CLIENT_GID) {
+        int mpi_errno1 = MPI_SUCCESS, mpi_errno2 = MPI_SUCCESS;
+
+        IF_VERBOSE(intra_rank == 0, ("client: connecting to <%s> with default timeout.\n", port));
+        mpi_errno1 = MPI_Comm_connect(port, MPI_INFO_NULL, 0, intra_comm, &comm);
+        if (mpi_errno1 != MPI_SUCCESS) {
+            errs += check_errno(mpi_errno1, MPI_ERR_PORT);
+        }
+
+        /* At least one of the connect calls should return MPI_ERR_PORT */
+        IF_VERBOSE(intra_rank == 0, ("client: connecting to <%s> again.\n", port));
+        mpi_errno2 = MPI_Comm_connect(port, MPI_INFO_NULL, 0, intra_comm, &comm);
+        if (mpi_errno1 == MPI_SUCCESS) {
+            errs += check_errno(mpi_errno2, MPI_ERR_PORT);
+        }
+        else {
+            errs += check_errno(mpi_errno2, MPI_SUCCESS);
+        }
+    }
+    else if (gid == SERVER_GID) {
+        /* NOTE: if accept hangs, try increase MPIR_CVAR_CH3_COMM_CONN_TIMEOUT. */
+        IF_VERBOSE(intra_rank == 0, ("server: accepting connection to <%s>.\n", port));
+        MPI_Comm_accept(port, MPI_INFO_NULL, 0, intra_comm, &comm);
+    }
+
+    if (comm != MPI_COMM_NULL) {
+        IF_VERBOSE(intra_rank == 0, ("connection matched, freeing communicator.\n"));
+        MPI_Comm_free(&comm);
+    }
+
+    if (intra_rank == 0 && gid == SERVER_GID) {
+        IF_VERBOSE(1, ("server root: closing port.\n"));
+        MPI_Close_port(port);
+    }
+
+    return errs;
+}
+
+int test_no_accept(MPI_Comm intra_comm, int gid)
+{
+    char port[MPI_MAX_PORT_NAME], timeout = 0;
+    MPI_Info info = MPI_INFO_NULL;
+    MPI_Comm comm = MPI_COMM_NULL;
+    int mpi_errno = MPI_SUCCESS;
+    int errs = 0;
+    int intra_rank, intra_nproc;
+    int server_rank = 0, local_server_rank = 0;
+
+    MTEST_VG_MEM_INIT(port, MPI_MAX_PORT_NAME * sizeof(char));
+
+    MPI_Comm_rank(intra_comm, &intra_rank);
+    MPI_Comm_size(intra_comm, &intra_nproc);
+    IF_VERBOSE(intra_rank == 0, ("%s: TEST no accept case, %d nprocs in group\n",
+                                 (gid == SERVER_GID) ? "server" : "client", intra_nproc));
+
+    open_and_bcast_port(intra_rank, gid, &port);
+
+    /* client group */
+    if (gid == CLIENT_GID) {
+        IF_VERBOSE(intra_rank == 0,
+                   ("client: case 1 - connecting to <%s> with default timeout.\n", port));
+        mpi_errno = MPI_Comm_connect(port, MPI_INFO_NULL, 0, intra_comm, &comm);
+        errs += check_errno(mpi_errno, MPI_ERR_PORT);
+
+        MPI_Info_create(&info);
+        MPI_Info_set(info, "timeout", TIMEOUT);
+
+        IF_VERBOSE(intra_rank == 0,
+                   ("client: case 2 - connecting to <%s> with specified timeout <%s>s.\n", port,
+                    TIMEOUT));
+        mpi_errno = MPI_Comm_connect(port, info, 0, intra_comm, &comm);
+        errs += check_errno(mpi_errno, MPI_ERR_PORT);
+
+        MPI_Info_free(&info);
+    }
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    /* server closes port after client finished connection */
+    if (intra_rank == 0 && gid == SERVER_GID) {
+        IF_VERBOSE(1, ("server root: closing port.\n"));
+        MPI_Close_port(port);
+    }
+
+    return errs;
+}
+
+
+int main(int argc, char *argv[])
+{
+    MPI_Comm intra_comm = MPI_COMM_NULL;
+    int sub_rank, sub_nproc;
+    int errs = 0, allerrs = 0;
+
+    if (getenv("MPITEST_VERBOSE")) {
+        verbose = 1;
+    }
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    if (nproc < 2) {
+        printf("At least two processes needed to run this test.\n");
+        goto exit;
+    }
+
+    /* Separate all processes into a server group and a client group.
+     * Processes in client group connect to server group. */
+    MPI_Comm_split(MPI_COMM_WORLD, rank % 2, rank, &intra_comm);
+    MPI_Errhandler_set(intra_comm, MPI_ERRORS_RETURN);
+
+    errs = RUN_TEST(intra_comm, rank % 2);
+
+    MPI_Comm_free(&intra_comm);
+    MPI_Reduce(&errs, &allerrs, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+
+    if (rank == 0 && allerrs == 0) {
+        printf(" No Errors\n");
+        fflush(stdout);
+    }
+
+  exit:
+    MPI_Finalize();
+    return 0;
+}
diff --git a/test/mpi/errors/spawn/testlist.in b/test/mpi/errors/spawn/testlist.in
index e8ba5e3..f685b06 100644
--- a/test/mpi/errors/spawn/testlist.in
+++ b/test/mpi/errors/spawn/testlist.in
@@ -1,3 +1,7 @@
 badport 2
 @namepub_tests at unpub 1
 @namepub_tests at lookup_name 1
+connect_timeout_no_accept 2 env=MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT=20
+connect_timeout_mismatch 2 env=MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT=20
+connect_timeout_no_accept 5 env=MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT=20
+connect_timeout_mismatch 5 env=MPIR_CVAR_CH3_COMM_CONNECT_TIMEOUT=20

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c  |    6 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_send.c     |    6 +-
 src/mpid/ch3/errnames.txt                          |    3 +
 src/mpid/ch3/include/mpid_port.h                   |  200 +++++
 src/mpid/ch3/include/mpidimpl.h                    |   35 +-
 src/mpid/ch3/include/mpidpkt.h                     |   19 +
 src/mpid/ch3/src/ch3u_handle_recv_pkt.c            |    7 +
 src/mpid/ch3/src/ch3u_port.c                       |  874 ++++++++++++++++++--
 src/mpid/ch3/src/mpid_finalize.c                   |    4 +
 src/mpid/ch3/src/mpid_port.c                       |    4 +
 test/mpi/errors/spawn/Makefile.am                  |    9 +-
 test/mpi/errors/spawn/connect_timeout.c            |  225 +++++
 test/mpi/errors/spawn/testlist.in                  |    4 +
 13 files changed, 1298 insertions(+), 98 deletions(-)
 create mode 100644 src/mpid/ch3/include/mpid_port.h
 create mode 100644 test/mpi/errors/spawn/connect_timeout.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list