[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.3-50-gc16466e

Service Account noreply at mpich.org
Tue Oct 28 13:36:06 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  c16466e33992d1f0bc05b9581f9727013ed9443c (commit)
       via  976272a7eff02633bdc794a03cba7fa2742d068c (commit)
      from  ee83ffb52e7d2f2c357078b3bbd25939fd009ac8 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/c16466e33992d1f0bc05b9581f9727013ed9443c

commit c16466e33992d1f0bc05b9581f9727013ed9443c
Author: Paul Coffman <pkcoff at us.ibm.com>
Date:   Mon Oct 27 21:42:43 2014 -0500

    Assign large blocks first in ADIOI_GPFS_Calc_file_domains
    
    For files that are less than the size of a gpfs block there seems to be
    an issue if successive MPI_File_write_at_all are called with proceeding
    offsets.  Given the simple case of 2 aggs, the 2nd agg/fd will be utilized,
    however the initial offset into the 2nd agg is distorted on the 2nd call
    to MPI_File_write_at_all because of the negative size of the 1st agg/fd
    because the offset info the 2nd agg/fd is influenced by the size of the
    first.  Simple solution is to reverse the default large block assignment so
    in the case where only 1 agg/fd will be used it will be the first.  By chance
    in the 2 agg situation this is what the GPFSMPIO_BALANCECONTIG
    optimization does and it does not have this problem.
    
    Signed-off-by: Rob Latham <robl at mcs.anl.gov>

diff --git a/src/mpi/romio/adio/ad_gpfs/ad_gpfs_aggrs.c b/src/mpi/romio/adio/ad_gpfs/ad_gpfs_aggrs.c
index 0e67b54..e403d9b 100644
--- a/src/mpi/romio/adio/ad_gpfs/ad_gpfs_aggrs.c
+++ b/src/mpi/romio/adio/ad_gpfs/ad_gpfs_aggrs.c
@@ -367,10 +367,10 @@ void ADIOI_GPFS_Calc_file_domains(ADIO_File fd,
 	/* BG/L- and BG/P-style distribution of file domains: simple allocation of
 	 * file domins to each aggregator */
 	for (i=0; i<naggs; i++) {
-	    if (i < naggs_small) {
-		fd_size[i] = nb_cn_small     * blksize;
-	    } else {
+	    if (i < naggs_large) {
 		fd_size[i] = (nb_cn_small+1) * blksize;
+	    } else {
+		fd_size[i] = nb_cn_small     * blksize;
 	    }
 	}
     }
@@ -387,12 +387,12 @@ void ADIOI_GPFS_Calc_file_domains(ADIO_File fd,
 
 #else // not BGQ platform
 	for (i=0; i<naggs; i++) {
-	    if (i < naggs_small) {
-		fd_size[i] = nb_cn_small     * blksize;
-	    } else {
+	    if (i < naggs_large) {
 		fd_size[i] = (nb_cn_small+1) * blksize;
+	    } else {
+		fd_size[i] = nb_cn_small     * blksize;
 	    }
-	}
+    }
 
 #endif
 

http://git.mpich.org/mpich.git/commitdiff/976272a7eff02633bdc794a03cba7fa2742d068c

commit 976272a7eff02633bdc794a03cba7fa2742d068c
Author: Paul Coffman <pkcoff at us.ibm.com>
Date:   Mon Oct 27 13:57:22 2014 -0500

    MP_IOTASKLIST error checking
    
    PE users may manually specify the MP_IOTASKLIST for explicit aggregator
    selection.  Code needed to be added to verify that the user
    specification of aggregators were all valid.
    
    Do our best to maintain the old PE behavior of using as much of the
    correctly specified MP_IOTASKLIST as possible and issuing what it
    labeled error messages but were really warnings about the incorrect
    portions and functionally just ignoring it, unless none of it was usable
    in which case it fell back on the default.
    
    Signed-off-by: Rob Latham <robl at mcs.anl.gov>

diff --git a/src/mpi/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c b/src/mpi/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c
index dfeeff5..8453238 100644
--- a/src/mpi/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c
+++ b/src/mpi/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c
@@ -60,50 +60,89 @@ ADIOI_PE_gen_agg_ranklist(ADIO_File fd)
     int i,j;
     int inTERcommFlag = 0;
 
-    int myRank;
+    int myRank,commSize;
     MPI_Comm_rank(fd->comm, &myRank);
+    MPI_Comm_size(fd->comm, &commSize);
 
     MPI_Comm_test_inter(fd->comm, &inTERcommFlag);
     if (inTERcommFlag) {
-      FPRINTF(stderr,"inTERcomms are not supported in MPI-IO - aborting....\n");
+      FPRINTF(stderr,"ERROR: ATTENTION: inTERcomms are not supported in MPI-IO - aborting....\n");
       perror("ADIOI_PE_gen_agg_ranklist:");
       MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
     if (ioTaskList) {
+      int ioTaskListLen = strlen(ioTaskList);
+      int ioTaskListPos = 0;
       char tmpBuf[8];   /* Big enough for 1M tasks (7 digits task ID). */
       tmpBuf[7] = '\0';
       for (i=0; i<7; i++) {
          tmpBuf[i] = *ioTaskList++;      /* Maximum is 7 digits for 1 million. */
+         ioTaskListPos++;
          if (*ioTaskList == ':') {       /* If the next char is a ':' ends it. */
              tmpBuf[i+1] = '\0';
              break;
          }
       }
       numAggs = atoi(tmpBuf);
+      if (numAggs == 0)
+        FPRINTF(stderr,"ERROR: ATTENTION: Number of aggregators specified in MP_IOTASKLIST set at 0 - default aggregator selection will be used.\n");
+      else if (!((numAggs > 0 ) && (numAggs <= commSize))) {
+        FPRINTF(stderr,"ERROR: ATTENTION: The number of aggregators (%s) specified in MP_IOTASKLIST is outside the communicator task range of %d.\n",tmpBuf,commSize);
+        numAggs = commSize;
+      }
       fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int));
 
-      for (j=0; j<numAggs; j++) {
+      int aggIndex = 0;
+      while (aggIndex < numAggs) {
          ioTaskList++;                /* Advance past the ':' */
+         ioTaskListPos++;
+         int allDigits=1;
          for (i=0; i<7; i++) {
+            if (*ioTaskList < '0' || *ioTaskList > '9')
+              allDigits=0;
             tmpBuf[i] = *ioTaskList++;
+            ioTaskListPos++;
             if ( (*ioTaskList == ':') || (*ioTaskList == '\0') ) {
                 tmpBuf[i+1] = '\0';
                 break;
             }
          }
-         fd->hints->ranklist[j] = atoi(tmpBuf);
+         if (allDigits) {
+           int newAggRank = atoi(tmpBuf);
+           if (!((newAggRank >= 0 ) && (newAggRank < commSize))) {
+             FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not within the communicator task range of 0 to %d  - it will be ignored.\n",tmpBuf,commSize-1);
+           }
+           else {
+             int aggAlreadyAdded = 0;
+             for (i=0;i<aggIndex;i++)
+               if (fd->hints->ranklist[i] == newAggRank) {
+                 aggAlreadyAdded = 1;
+                 break;
+               }
+             if (!aggAlreadyAdded)
+               fd->hints->ranklist[aggIndex++] = newAggRank;
+             else
+               FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%d' is specified multiple times in MP_IOTASKLIST - duplicates are ignored.\n",newAggRank);
+           }
+         }
+         else {
+           FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not a valid integer task id  - it will be ignored.\n",tmpBuf);
+         }
 
          /* At the end check whether the list is shorter than specified. */
-         if (*ioTaskList == '\0') {
-            if (j < (numAggs-1)) {
-               numAggs = j;
-            }
-            break;
+         if (ioTaskListPos == ioTaskListLen) {
+           if (aggIndex == 0) {
+             FPRINTF(stderr,"ERROR: ATTENTION: No aggregators were correctly specified in MP_IOTASKLIST - default aggregator selection will be used.\n");
+             ADIOI_Free(fd->hints->ranklist);
+           }
+           else if (aggIndex < numAggs)
+             FPRINTF(stderr,"ERROR: ATTENTION: %d aggregators were specified in MP_IOTASKLIST but only %d were correctly specified - setting the number of aggregators to %d.\n",numAggs, aggIndex,aggIndex);
+           numAggs = aggIndex;
          }
       }
     }
-    else {
+    if (numAggs == 0)  {
       MPID_Comm *mpidCommData;
 
       MPID_Comm_get_ptr(fd->comm,mpidCommData);

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/romio/adio/ad_gpfs/ad_gpfs_aggrs.c  |   14 +++---
 src/mpi/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c |   59 ++++++++++++++++++++++-----
 2 files changed, 56 insertions(+), 17 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list