[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.2-147-g9b51441

Service Account noreply at mpich.org
Wed Sep 3 14:38:49 CDT 2014


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  9b51441012556210417c6301348e38b32dc3c35e (commit)
       via  dacc232decdea8a0ba2c9576b59a794120b8283c (commit)
      from  2d2ff83d6be5c168e908a4cd426beea238a86885 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/9b51441012556210417c6301348e38b32dc3c35e

commit 9b51441012556210417c6301348e38b32dc3c35e
Author: Paul Coffman <pkcoff at us.ibm.com>
Date:   Sun Aug 31 22:14:35 2014 -0500

    Additional optimization of gpfsmpio_bridgeringagg during MPI_File_open
    
    The code that chose the aggs for gpfsmpio_bridgeringagg was written very
    inefficiently resulting in a large performance degradation during the
    MPI_File_open.  That code has been rewritten to squash the five torus
    dimensions into one dimension and to loop over that new representation
    fewer times. Now there is no performance degradation.
    
    Signed-off-by: Rob Latham <robl at mcs.anl.gov>

diff --git a/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c b/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c
index 1141bab..240c013 100644
--- a/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c
+++ b/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c
@@ -167,6 +167,12 @@ typedef struct
    int bridge;
 } sortstruct;
 
+typedef struct
+{
+   int bridgeRank;
+   int numAggsAssigned;
+} bridgeAggAssignment;
+
 static int intsort(const void *p1, const void *p2)
 {
    sortstruct *i1, *i2;
@@ -215,91 +221,78 @@ ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo,
       }
       else { // aggTotal > 1
 
-        ADIOI_BG_ProcInfo_t *allProcInfoAggNodeList = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc(confInfo->nProcs * sizeof(ADIOI_BG_ProcInfo_t));
-        int allProcInfoAggNodeListSize = 0;
-        int maxManhattanDistanceToBridge = 0;
-
-        // for ppn > 1, assign minumum rank as agg candidate
-        for (i=0;i<confInfo->nProcs;i++) {
-          int addProcToAggNodeList = 1;
-          for (j=0;j<allProcInfoAggNodeListSize;j++) {
-            if ((allProcInfoAggNodeList[j].torusCoords[0] == all_procInfo[i].torusCoords[0]) &&
-              (allProcInfoAggNodeList[j].torusCoords[1] == all_procInfo[i].torusCoords[1]) &&
-              (allProcInfoAggNodeList[j].torusCoords[2] == all_procInfo[i].torusCoords[2]) &&
-              (allProcInfoAggNodeList[j].torusCoords[3] == all_procInfo[i].torusCoords[3]) &&
-              (allProcInfoAggNodeList[j].torusCoords[4] == all_procInfo[i].torusCoords[4]) &&
-              addProcToAggNodeList) {
-              // proc is in the node list, replace if this rank is smaller
-              addProcToAggNodeList = 0;
-
-              if (allProcInfoAggNodeList[j].rank > all_procInfo[i].rank)
-                allProcInfoAggNodeList[j] = all_procInfo[i];
-            }
-          } // for j
-          if (addProcToAggNodeList) {
-            allProcInfoAggNodeList[allProcInfoAggNodeListSize] = all_procInfo[i];
-            if (allProcInfoAggNodeList[allProcInfoAggNodeListSize].manhattanDistanceToBridge > maxManhattanDistanceToBridge)
-              maxManhattanDistanceToBridge = allProcInfoAggNodeList[allProcInfoAggNodeListSize].manhattanDistanceToBridge;
-            allProcInfoAggNodeListSize++;
-          }
-        } // for i
+        int currentAggListSize = 0;
+        int numBridgesWithAggAssignments = 0;
+        bridgeAggAssignment *aggAssignments = (bridgeAggAssignment *)ADIOI_Malloc(confInfo->numBridgeRanks * sizeof(bridgeAggAssignment));
 
-#ifdef bridgeringaggtrace
-      fprintf(stderr,"allProcInfoAggNodeListSize is %d aggTotal is %d\n",allProcInfoAggNodeListSize,aggTotal);
-#endif
-
-      int *aggNodeBridgeList = (int *) ADIOI_Malloc (allProcInfoAggNodeListSize * sizeof(int)); // list of all bridge ranks
-      int *aggNodeBridgeListNum = (int *) ADIOI_Malloc (allProcInfoAggNodeListSize * sizeof(int));
-      for (i=0;i<allProcInfoAggNodeListSize;i++) {
-        aggNodeBridgeList[i] = -1;
-        aggNodeBridgeListNum[i] = 0;
-      }
-
-      int aggNodeBridgeListSize = 0;
-      for (i=0;i<allProcInfoAggNodeListSize;i++) {
-        int foundBridge = 0;
-        for (j=0;(j<aggNodeBridgeListSize && !foundBridge);j++) {
-          if (aggNodeBridgeList[j] == allProcInfoAggNodeList[i].bridgeRank) {
-            foundBridge = 1;
-            aggNodeBridgeListNum[i]++;
-          }
-        }
-        if (!foundBridge) {
-          aggNodeBridgeList[aggNodeBridgeListSize] = allProcInfoAggNodeList[i].bridgeRank;
-          aggNodeBridgeListNum[aggNodeBridgeListSize] = 1;
-          aggNodeBridgeListSize++;
-        }
-      }
+        int partitionSize = all_procInfo[0].numNodesInPartition;
+        int *nodesAssigned = (int *)ADIOI_Malloc(partitionSize * sizeof(int));
+        for (i=0;i<partitionSize;i++)
+          nodesAssigned[i] = 0;
 
-      // add aggs based on numAggs per bridge, starting at gpfsmpio_bridgeringagg hops and increasing until numAggs aggs found
-      int currentAggListSize = 0;
-      for (i=0;i<aggNodeBridgeListSize;i++) {
-        int currentBridge = aggNodeBridgeList[i];
         int currentNumHops = gpfsmpio_bridgeringagg;
-        int numAggsAssignedToThisBridge = 0;
-        while ((numAggsAssignedToThisBridge < numAggs) && (currentNumHops <= maxManhattanDistanceToBridge)) {
-          for (j=0;j<allProcInfoAggNodeListSize;j++) {
-            if (allProcInfoAggNodeList[j].bridgeRank == currentBridge) {
-              if (allProcInfoAggNodeList[j].manhattanDistanceToBridge == currentNumHops) {
-                aggList[currentAggListSize] = allProcInfoAggNodeList[j].rank;
+        int allAggsAssigned = 0;
+
+        /* Iterate thru the process infos and select aggregators starting at currentNumHops
+           away.  Increase the currentNumHops until all bridges have numAggs assigned to them.
+        */
+        while (!allAggsAssigned) {
+          /* track whether any aggs are selected durng this round */
+          int startingCurrentAggListSize = currentAggListSize;
+          int numIterForHopsWithNoAggs = 0;
+          for (i=0;i<confInfo->nProcs;i++) {
+          if (all_procInfo[i].manhattanDistanceToBridge == currentNumHops) {
+            if (nodesAssigned[all_procInfo[i].nodeRank] == 0) { // node is not assigned as an agg yet
+              int foundBridge = 0;
+              for (j=0;(j<numBridgesWithAggAssignments && !foundBridge);j++) {
+                if (aggAssignments[j].bridgeRank == all_procInfo[i].bridgeRank) {
+                  foundBridge = 1;
+                  if (aggAssignments[j].numAggsAssigned < numAggs) {
+                    aggAssignments[j].numAggsAssigned++;
+                    nodesAssigned[all_procInfo[i].nodeRank] = 1;
+                    aggList[currentAggListSize] = all_procInfo[i].rank;
+                    currentAggListSize++;
 #ifdef bridgeringaggtrace
-                printf("Assigned agg rank %d at torus coords %u %u %u %u %u to bridge %d at torus coords %u %u %u %u %u at a distance of %d hops\n",allProcInfoAggNodeList[j].rank,allProcInfoAggNodeList[j].torusCoords[0],allProcInfoAggNodeList[j].torusCoords[1],allProcInfoAggNodeList[j].torusCoords[2],allProcInfoAggNodeList[j].torusCoords[3],allProcInfoAggNodeList[j].torusCoords[4], currentBridge, all_procInfo[currentBridge].torusCoords[0], all_procInfo[currentBridge].torusCoords[1], all_procInfo[currentBridge].torusCoords[2], all_procInfo[currentBridge].torusCoords[3], all_procInfo[currentBridge].torusCoords[4],currentNumHops);
+                printf("Assigned agg rank %d at nodeRank %d to bridge rank %d at a distance of %d hops\n",all_procInfo[i].rank,all_procInfo[i].nodeRank,all_procInfo[i].bridgeRank,currentNumHops);
 #endif
+                  }
+                }
+              }
+              if (!foundBridge) {
+                aggAssignments[numBridgesWithAggAssignments].bridgeRank = all_procInfo[i].bridgeRank;
+                aggAssignments[numBridgesWithAggAssignments].numAggsAssigned = 1;
+                numBridgesWithAggAssignments++;
+                nodesAssigned[all_procInfo[i].nodeRank] = 1;
+                aggList[currentAggListSize] = all_procInfo[i].rank;
                 currentAggListSize++;
-                numAggsAssignedToThisBridge++;
-                if (numAggsAssignedToThisBridge >= numAggs)
-                  break;
+#ifdef bridgeringaggtrace
+                printf("Assigned agg rank %d at nodeRank %d to bridge rank %d at a distance of %d hops\n",all_procInfo[i].rank,all_procInfo[i].nodeRank,all_procInfo[i].bridgeRank,currentNumHops);
+#endif
               }
             }
           }
-          currentNumHops++;
-        } // while
-        ADIOI_Assert(numAggsAssignedToThisBridge == numAggs);
-      } // for
-
-      ADIOI_Free(allProcInfoAggNodeList);
-      ADIOI_Free(aggNodeBridgeList);
-      ADIOI_Free(aggNodeBridgeListNum);
+        }
+
+        if (numBridgesWithAggAssignments == confInfo->numBridgeRanks) {
+          allAggsAssigned = 1;
+          for (i=0;(i<numBridgesWithAggAssignments && allAggsAssigned);i++) {
+            if (aggAssignments[i].numAggsAssigned < numAggs)
+              allAggsAssigned = 0;
+          }
+        }
+        currentNumHops++;
+        /* If 3 rounds go by without selecting an agg abort to avoid
+           infinite loop.
+        */
+        if (startingCurrentAggListSize == currentAggListSize)
+          numIterForHopsWithNoAggs++;
+        else
+          numIterForHopsWithNoAggs = 0;
+        ADIOI_Assert(numIterForHopsWithNoAggs <= 3);
+        }
+
+        ADIOI_Free(aggAssignments);
+        ADIOI_Free(nodesAssigned);
 
       } // else aggTotal  > 1
 
diff --git a/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.c b/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.c
index 59f2219..a18b02a 100644
--- a/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.c
+++ b/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.c
@@ -136,16 +136,23 @@ ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf,
    proc->coreID = hw.coreID;
 
    if (gpfsmpio_bridgeringagg > 0) {
-     for (i=0;i<MPIX_TORUS_MAX_DIMS;i++) {
-       proc->torusCoords[i] = hw.Coords[i];
-     }
 #ifdef bridgeringaggtrace
      if (rank == 0)
        fprintf(stderr,"Block dimensions:\n");
 #endif
+
+     /* Set the numNodesInPartition and nodeRank for this proc
+     */
+     proc->numNodesInPartition = 1;
+     proc->nodeRank = 0;
      for (i=0;i<MPIX_TORUS_MAX_DIMS;i++) {
        torusSize[i] = hw.Size[i];
        dimTorus[i] = hw.isTorus[i];
+       proc->numNodesInPartition *= hw.Size[i];
+         int baseNum = 1;
+         for (int j=0;j<i;j++)
+           baseNum *= hw.Size[j];
+         proc->nodeRank += (hw.Coords[i] * baseNum);
 #ifdef bridgeringaggtrace
        if (rank == 0)
          fprintf(stderr,"Dimension %d has %d elements wrap-around value is %d\n",i,torusSize[i],dimTorus[i]);
diff --git a/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.h b/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.h
index d5f36b1..bcdc61f 100644
--- a/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.h
+++ b/src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.h
@@ -35,7 +35,8 @@ typedef struct {
       node, i.e. psetsize*/
    int iamBridge; /* am *I* the bridge rank? */
    int __ipad[2];
-   unsigned torusCoords[MPIX_TORUS_MAX_DIMS]; /* torus coordinates of node on which this rank resides */
+   unsigned nodeRank; /* torus coords converted to an integer for use with gpfsmpio_bridgeringagg */
+   unsigned numNodesInPartition; /* number of physical nodes in the job partition */
    unsigned manhattanDistanceToBridge; /* number of hops between this rank and the bridge node */
 } ADIOI_BG_ProcInfo_t __attribute__((aligned(16)));
 

http://git.mpich.org/mpich.git/commitdiff/dacc232decdea8a0ba2c9576b59a794120b8283c

commit dacc232decdea8a0ba2c9576b59a794120b8283c
Author: Rob Latham <robl at mcs.anl.gov>
Date:   Wed Sep 3 11:48:33 2014 -0500

    fix compiler warnings and shadowed declarations
    
    We do not often enough compile with --enable-strict on blue gene
    
    No Reviewer

diff --git a/src/mpi/romio/adio/ad_gpfs/ad_gpfs_open.c b/src/mpi/romio/adio/ad_gpfs/ad_gpfs_open.c
index 31ba04b..f4fef37 100644
--- a/src/mpi/romio/adio/ad_gpfs/ad_gpfs_open.c
+++ b/src/mpi/romio/adio/ad_gpfs/ad_gpfs_open.c
@@ -115,14 +115,14 @@ void ADIOI_GPFS_Open(ADIO_File fd, int *error_code)
 
 	MPI_Comm_rank(fd->comm, &rank);
 	if ((rank == fd->hints->ranklist[0]) || (fd->comm == MPI_COMM_SELF)) {
-	    struct stat64 gpfs_stat;
+	    struct stat64 gpfs_statbuf;
 	    /* Get the (real) underlying file system block size */
-	    rc = stat64(fd->filename, &gpfs_stat);
+	    rc = stat64(fd->filename, &gpfs_statbuf);
 	    if (rc >= 0)
 	    {
-		fd->blksize = gpfs_stat.st_blksize;
+		fd->blksize = gpfs_statbuf.st_blksize;
 		DBGV_FPRINTF(stderr,"Successful stat '%s'.  Blocksize=%ld\n",
-			fd->filename,gpfs_stat.st_blksize);
+			fd->filename,gpfs_statbuf.st_blksize);
 	    }
 	    else
 	    {
diff --git a/src/mpi/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c b/src/mpi/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c
index d049340..0e2a1d2 100644
--- a/src/mpi/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c
+++ b/src/mpi/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c
@@ -572,7 +572,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype
       }
     }
 
-    ADIO_Offset st_loc_ion, end_loc_ion, needs_gpfs_access_cleanup=0;
+    ADIO_Offset st_loc_ion=0, end_loc_ion=0, needs_gpfs_access_cleanup=0;
 #ifdef BGQPLATFORM
     if (ntimes > 0) { /* only set the gpfs hint if we have io - ie this rank is
 			 an aggregator -- otherwise will fail for deferred open */

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/romio/adio/ad_gpfs/ad_gpfs_open.c   |    8 +-
 src/mpi/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c |    2 +-
 src/mpi/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c |  145 +++++++++++++--------------
 src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.c  |   13 ++-
 src/mpi/romio/adio/ad_gpfs/bg/ad_bg_pset.h  |    3 +-
 5 files changed, 86 insertions(+), 85 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list