[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1rc3-8-g5d595a2

mysql vizuser noreply at mpich.org
Sun Feb 9 12:03:39 CST 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  5d595a22d3f00cae36158161c07bbe46e6ce3a61 (commit)
       via  f51499529a6040c79ab759cf640893aca572b34b (commit)
       via  d593f14654c76840d6acbea058200260426359f5 (commit)
       via  f2c75e957247ee262ade2f0d9c11515aca9aefc1 (commit)
      from  8df06af4810d8266839b36d3d5e43ee25ab9aeeb (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/5d595a22d3f00cae36158161c07bbe46e6ce3a61

commit 5d595a22d3f00cae36158161c07bbe46e6ce3a61
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Sat Feb 8 22:10:44 2014 -0600

    Fix bad casting that creates errors for large messages.
    
    The bad casting to int was losing information when the message size is
    larger than 2GB.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/sock/src/ch3_isendv.c b/src/mpid/ch3/channels/sock/src/ch3_isendv.c
index b407da6..8d0fcf4 100644
--- a/src/mpid/ch3/channels/sock/src/ch3_isendv.c
+++ b/src/mpid/ch3/channels/sock/src/ch3_isendv.c
@@ -91,7 +91,7 @@ int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPID_Request * sreq,
 		
 		while (offset < n_iov)
 		{
-		    if ((int)iov[offset].MPID_IOV_LEN <= nb)
+		    if (iov[offset].MPID_IOV_LEN <= nb)
 		    {
 			nb -= iov[offset].MPID_IOV_LEN;
 			offset++;

http://git.mpich.org/mpich.git/commitdiff/f51499529a6040c79ab759cf640893aca572b34b

commit f51499529a6040c79ab759cf640893aca572b34b
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Sat Feb 8 19:13:51 2014 -0600

    Use MPL_large_writev instead of plain writev in ch3:sock.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/src/mpid/common/sock/poll/sock_immed.i b/src/mpid/common/sock/poll/sock_immed.i
index 5da081b..323758d 100644
--- a/src/mpid/common/sock/poll/sock_immed.i
+++ b/src/mpid/common/sock/poll/sock_immed.i
@@ -607,7 +607,7 @@ int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPID_IOV * iov, int iov_n, MPIU_Size_t
     do
     {
 	MPIDI_FUNC_ENTER(MPID_STATE_WRITEV);
-	nb = writev(pollinfo->fd, iov, iov_n);
+	nb = MPL_large_writev(pollinfo->fd, iov, iov_n);
 	MPIDI_FUNC_EXIT(MPID_STATE_WRITEV);
     }
     while (nb == -1 && errno == EINTR);
diff --git a/src/mpid/common/sock/poll/sock_wait.i b/src/mpid/common/sock/poll/sock_wait.i
index 7a7e85d..8c1e48e 100644
--- a/src/mpid/common/sock/poll/sock_wait.i
+++ b/src/mpid/common/sock/poll/sock_wait.i
@@ -702,7 +702,7 @@ static int MPIDU_Socki_handle_write(struct pollfd * const pollfd, struct pollinf
 	if (pollinfo->write_iov_flag)
 	{ 
 	    MPIDI_FUNC_ENTER(MPID_STATE_WRITEV);
-	    nb = writev(pollinfo->fd, pollinfo->write.iov.ptr + pollinfo->write.iov.offset,
+	    nb = MPL_large_writev(pollinfo->fd, pollinfo->write.iov.ptr + pollinfo->write.iov.offset,
 			pollinfo->write.iov.count - pollinfo->write.iov.offset);
 	    MPIDI_FUNC_EXIT(MPID_STATE_WRITEV);
 	}

http://git.mpich.org/mpich.git/commitdiff/d593f14654c76840d6acbea058200260426359f5

commit d593f14654c76840d6acbea058200260426359f5
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Sat Feb 8 17:55:32 2014 -0600

    Use MPL_large_writev instead of plain writev in ch3:nemesis:tcp.
    
    This works around problems with writev on some platforms (such as Mac
    OSX, 10.9.1) where writev hangs for large messages.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
index 8baada1..fae2d21 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c
@@ -493,7 +493,7 @@ static int send_id_info(const sockconn_t *const sc)
         ++iov_cnt;
     }
     
-    CHECK_EINTR (offset, writev(sc->fd, iov, iov_cnt));
+    offset = MPL_large_writev(sc->fd, iov, iov_cnt);
     MPIU_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
     MPIU_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
 /*     FIXME log appropriate error */
@@ -543,7 +543,7 @@ static int send_tmpvc_info(const sockconn_t *const sc)
     iov[1].iov_len = sizeof(port_info);
     buf_size = sizeof(hdr) + sizeof(port_info);
     
-    CHECK_EINTR (offset, writev(sc->fd, iov, iov_cnt));
+    offset = MPL_large_writev(sc->fd, iov, iov_cnt);
     MPIU_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
     MPIU_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
 /*     FIXME log appropriate error */
diff --git a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
index 7abb119..8031092 100644
--- a/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
+++ b/src/mpid/ch3/channels/nemesis/netmod/tcp/tcp_send.c
@@ -92,7 +92,7 @@ int MPID_nem_tcp_send_queued(MPIDI_VC_t *vc, MPIDI_nem_tcp_request_queue_t *send
 
         iov = &sreq->dev.iov[sreq->dev.iov_offset];
         
-        CHECK_EINTR(offset, writev(vc_tcp->sc->fd, iov, sreq->dev.iov_count));
+        offset = MPL_large_writev(vc_tcp->sc->fd, iov, sreq->dev.iov_count);
         if (offset == 0) {
             int req_errno = MPI_SUCCESS;
 
@@ -257,7 +257,7 @@ int MPID_nem_tcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_s
                 iov[1].MPID_IOV_BUF = data;
                 iov[1].MPID_IOV_LEN = data_sz;
                 
-                CHECK_EINTR(offset, writev(sc->fd, iov, 2));
+                offset = MPL_large_writev(sc->fd, iov, 2);
                 if (offset == 0) {
                     int req_errno = MPI_SUCCESS;
 
@@ -396,7 +396,7 @@ int MPID_nem_tcp_iStartContigMsg_paused(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_
             iov[1].MPID_IOV_BUF = data;
             iov[1].MPID_IOV_LEN = data_sz;
                 
-            CHECK_EINTR(offset, writev(sc->fd, iov, 2));
+            offset = MPL_large_writev(sc->fd, iov, 2);
             if (offset == 0) {
                 int req_errno = MPI_SUCCESS;
 
@@ -531,7 +531,7 @@ int MPID_nem_tcp_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPID
                 iov[1].MPID_IOV_BUF = data;
                 iov[1].MPID_IOV_LEN = data_sz;
                 
-                CHECK_EINTR(offset, writev(sc->fd, iov, 2));
+                offset = MPL_large_writev(sc->fd, iov, 2);
                 if (offset == 0) {
                     int req_errno = MPI_SUCCESS;
 
@@ -690,7 +690,7 @@ int MPID_nem_tcp_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *header,
         {
             if (MPIDI_CH3I_Sendq_empty(vc_tcp->send_queue))
             {
-                CHECK_EINTR(offset, writev(vc_tcp->sc->fd, iov, iov_n));
+                offset = MPL_large_writev(vc_tcp->sc->fd, iov, iov_n);
                 if (offset == 0) {
                     int req_errno = MPI_SUCCESS;
 

http://git.mpich.org/mpich.git/commitdiff/f2c75e957247ee262ade2f0d9c11515aca9aefc1

commit f2c75e957247ee262ade2f0d9c11515aca9aefc1
Author: Pavan Balaji <balaji at mcs.anl.gov>
Date:   Sat Feb 8 16:24:42 2014 -0600

    Added a new MPL_large_writev function.
    
    Using writev on larger than 2GB messages seems to cause some platforms
    to hang (e.g., Mac OSX, at least as of 10.9.1).  This patch creates a
    new function that reduces the message size being transmitted.  The
    function does not attempt to send all data, in order to avoid making
    it a blocking function.  A higher-level function would need to check
    how much data is sent and retry later if needed.
    
    Signed-off-by: Wesley Bland <wbland at mcs.anl.gov>

diff --git a/src/mpl/Makefile.am b/src/mpl/Makefile.am
index 8693ae6..5800349 100644
--- a/src/mpl/Makefile.am
+++ b/src/mpl/Makefile.am
@@ -8,7 +8,7 @@ ACLOCAL_AMFLAGS = -I confdb
 AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include
 
 lib_LTLIBRARIES = lib at MPLLIBNAME@.la
-lib at MPLLIBNAME@_la_SOURCES = src/mplstr.c src/mpltrmem.c src/mplenv.c
+lib at MPLLIBNAME@_la_SOURCES = src/mplstr.c src/mpltrmem.c src/mplenv.c src/mplsock.c
 lib at MPLLIBNAME@_la_LDFLAGS = ${lib at MPLLIBNAME@_so_versionflags}
 
 MPL_TESTS = strsep
@@ -24,7 +24,8 @@ mpl_headers =              \
     include/mplconfig.h    \
     include/mplenv.h       \
     include/mplstr.h       \
-    include/mpltrmem.h
+    include/mpltrmem.h     \
+    include/mplsock.h
 
 if MPL_EMBEDDED_MODE
 noinst_HEADERS = $(mpl_headers)
diff --git a/src/mpl/configure.ac b/src/mpl/configure.ac
index c151de7..8d348e6 100644
--- a/src/mpl/configure.ac
+++ b/src/mpl/configure.ac
@@ -71,7 +71,7 @@ if test "$pac_cv_have___typeof" = "yes" ; then
 fi
 
 dnl Check if the necessary headers are available
-AC_CHECK_HEADERS(stdio.h stdlib.h string.h stdarg.h ctype.h search.h)
+AC_CHECK_HEADERS(stdio.h stdlib.h string.h stdarg.h ctype.h search.h sys/types.h sys/uio.h)
 
 # A C99 compliant compiler should have inttypes.h for fixed-size int types
 AC_CHECK_HEADERS(inttypes.h stdint.h)
diff --git a/src/mpl/include/mpl.h b/src/mpl/include/mpl.h
index 3a33f0a..28120a1 100644
--- a/src/mpl/include/mpl.h
+++ b/src/mpl/include/mpl.h
@@ -61,5 +61,6 @@
 #include "mplstr.h"
 #include "mpltrmem.h"
 #include "mplenv.h"
+#include "mplsock.h"
 
 #endif /* MPL_H_INCLUDED */
diff --git a/src/mpl/include/mplsock.h b/src/mpl/include/mplsock.h
new file mode 100644
index 0000000..4aae366
--- /dev/null
+++ b/src/mpl/include/mplsock.h
@@ -0,0 +1,45 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef MPLSOCK_H_INCLUDED
+#define MPLSOCK_H_INCLUDED
+
+#include "mplconfig.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <netdb.h>
+#include <limits.h>
+
+#ifdef MPL_HAVE_SYS_TYPES_H
+#include <sys/types.h> /* macs need sys/types.h before uio.h can be included */
+#endif
+#ifdef MPL_HAVE_SYS_UIO_H
+#include <sys/uio.h>
+#endif
+
+#if !defined(MPL_HAVE_SYS_UIO_H)
+struct iovec;
+#endif
+
+/* *INDENT-ON* */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+/* *INDENT-OFF* */
+
+int MPL_large_writev(int fd, const struct iovec *iov, int iovcnt);
+
+/* *INDENT-ON* */
+#if defined(__cplusplus)
+}
+#endif
+/* *INDENT-OFF* */
+
+#endif /* MPLSOCK_H_INCLUDED */
diff --git a/src/mpl/src/mplsock.c b/src/mpl/src/mplsock.c
new file mode 100644
index 0000000..93504b8
--- /dev/null
+++ b/src/mpl/src/mplsock.c
@@ -0,0 +1,69 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "mpl.h"
+
+#if defined MPL_HAVE_SYS_UIO_H
+/* Some platforms, such as Mac OSX (at least as of 10.9.1) hang when
+ * attempting to send more than 2GB data, even though the writev
+ * function is supposed to be able to handle large data.  This
+ * function is a simple workaround for this case by attempting to send
+ * lesser data, and having the upper layer retry later if needed.
+ * This adds a small amount of bookkeeping overhead, but it should be
+ * negligible compared to the system call overhead for small messages
+ * and compared to the data transmission overhead for large
+ * messages. */
+int MPL_large_writev(int fd, const struct iovec *iov, int iovcnt)
+{
+    ssize_t total_size, tmp;
+    struct iovec dummy;
+    int i;
+
+    /* If the total data fits into INT_MAX, directly use writev */
+    total_size = 0;
+    for (i = 0; i < iovcnt; i++)
+        total_size += iov[i].iov_len;
+
+    if (total_size <= INT_MAX) {
+        do {
+            tmp = writev(fd, iov, iovcnt);
+        } while (tmp == -1 && errno == EINTR);
+        return tmp;
+    }
+
+    /* Total data is larger than INT_MAX.  Issue writev with fewer
+     * elements, so as to not exceed INT_MAX.  In this case, doing
+     * multiple write calls, one for each iov segment is not a big
+     * deal with respect to performance. */
+
+    total_size = 0;
+    for (i = 0; i < iovcnt; i++) {
+        if (iov[i].iov_len <= INT_MAX) {
+            do {
+                tmp = writev(fd, &iov[i], 1);
+            } while (tmp == -1 && errno == EINTR);
+        }
+        else {
+            dummy.iov_base = iov[i].iov_base;
+            dummy.iov_len = INT_MAX;
+            do {
+                tmp = writev(fd, &dummy, 1);
+            } while (tmp == -1 && errno == EINTR);
+        }
+
+        if (tmp < 0)
+            return tmp;
+        else if (tmp < iov[i].iov_len) {
+            total_size += tmp;
+            return total_size;
+        }
+        else
+            total_size += tmp;
+    }
+
+    return total_size;
+}
+#endif /* MPL_HAVE_SYS_UIO_H */

-----------------------------------------------------------------------

Summary of changes:
 src/mpid/ch3/channels/nemesis/netmod/tcp/socksm.c  |    4 +-
 .../ch3/channels/nemesis/netmod/tcp/tcp_send.c     |   10 ++--
 src/mpid/ch3/channels/sock/src/ch3_isendv.c        |    2 +-
 src/mpid/common/sock/poll/sock_immed.i             |    2 +-
 src/mpid/common/sock/poll/sock_wait.i              |    2 +-
 src/mpl/Makefile.am                                |    5 +-
 src/mpl/configure.ac                               |    2 +-
 src/mpl/include/mpl.h                              |    1 +
 src/mpl/include/mplsock.h                          |   45 +++++++++++++
 src/mpl/src/mplsock.c                              |   69 ++++++++++++++++++++
 10 files changed, 129 insertions(+), 13 deletions(-)
 create mode 100644 src/mpl/include/mplsock.h
 create mode 100644 src/mpl/src/mplsock.c


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list