[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-239-ga33abfa

Service Account noreply at mpich.org
Fri Mar 11 12:55:58 CST 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".

The branch, master has been updated
       via  a33abfab501908b2fc02565819b653937f1637b1 (commit)
       via  eaf5c0cb8eca7fd957a3a90e7b1028462e6702dc (commit)
      from  f3a3cfb728d385e85b6fd007c289eaa110fcb7f5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/a33abfab501908b2fc02565819b653937f1637b1

commit a33abfab501908b2fc02565819b653937f1637b1
Author: Rob Latham <robl at mcs.anl.gov>
Date:   Thu Feb 4 18:05:03 2016 -0600

    fix lustre file distribution and deferred open
    
    under deferred open, not all processes open the file and determine the
    file distribution parameters (stripe size, chunk size, and starting io
    device).
    
    Signed-off-by: Michael Raymond <mraymond at sgi.com>

diff --git a/src/mpi/romio/adio/ad_lustre/ad_lustre_open.c b/src/mpi/romio/adio/ad_lustre/ad_lustre_open.c
index a3b03be..9028aec 100644
--- a/src/mpi/romio/adio/ad_lustre/ad_lustre_open.c
+++ b/src/mpi/romio/adio/ad_lustre/ad_lustre_open.c
@@ -91,8 +91,9 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
     /* It was strange and buggy to open the file in the hint path.  Instead,
      * we'll apply the file tunings at open time */
     if ((amode & O_CREAT) && set_layout ) {
-	/* if user has specified striping info, process 0 tries to set it */
-	if (!myrank) {
+	/* if user has specified striping info, first aggregator tries to set
+	 * it */
+	if (myrank == fd->hints->ranklist[0] || fd->comm == MPI_COMM_SELF) {
 	    lum->lmm_magic = LOV_USER_MAGIC;
 	    lum->lmm_pattern = 0;
 	    /* crude check for overflow of lustre internal datatypes.
@@ -138,7 +139,7 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
 	sprintf(value, "%d", lum->lmm_stripe_count);
 	ADIOI_Info_set(fd->info, "striping_factor", value);
 
-	fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
+	fd->hints->start_iodevice = lum->lmm_stripe_offset;
 	sprintf(value, "%d", lum->lmm_stripe_offset);
 	ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
 
diff --git a/src/mpi/romio/adio/common/ad_opencoll.c b/src/mpi/romio/adio/common/ad_opencoll.c
index 467a322..712d0e9 100644
--- a/src/mpi/romio/adio/common/ad_opencoll.c
+++ b/src/mpi/romio/adio/common/ad_opencoll.c
@@ -21,11 +21,52 @@
  * on MPI-IO scalability").  
  */
      
+enum {
+    BLOCKSIZE = 0,
+    STRIPE_SIZE,
+    STRIPE_FACTOR,
+    START_IODEVICE,
+    STAT_ITEMS
+} file_stats;
+
+
+/* generate an MPI datatype describing the members of the ADIO_File struct that
+ * we want to ensure all processes have.  In deferred open, aggregators will
+ * open the file and possibly read layout and other information.
+ * non-aggregators will skip the open, but still need to know how the file is
+ * being treated and what optimizations to apply */
+
+static MPI_Datatype make_stats_type(ADIO_File fd) {
+    int lens[STAT_ITEMS];
+    MPI_Aint offsets[STAT_ITEMS];
+    MPI_Datatype types[STAT_ITEMS];
+    MPI_Datatype newtype;
+
+    lens[BLOCKSIZE] = 1;
+    MPI_Address(&fd->blksize, &offsets[BLOCKSIZE]);
+    types[BLOCKSIZE] = MPI_LONG;
+
+    lens[STRIPE_SIZE]= lens[STRIPE_FACTOR]= lens[START_IODEVICE] = 1;
+    types[STRIPE_SIZE] = types[STRIPE_FACTOR] =
+	types[START_IODEVICE] = MPI_INT;
+    MPI_Address(&fd->hints->striping_unit, &offsets[STRIPE_SIZE]);
+    MPI_Address(&fd->hints->striping_factor, &offsets[STRIPE_FACTOR]);
+    MPI_Address(&fd->hints->start_iodevice, &offsets[START_IODEVICE]);
+
+
+    MPI_Type_create_struct(STAT_ITEMS, lens, offsets, types, &newtype);
+    MPI_Type_commit(&newtype);
+    return newtype;
+
+}
 void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, 
 	int access_mode, int *error_code)
 {
     int orig_amode_excl, orig_amode_wronly;
     MPI_Comm tmp_comm;
+    MPI_Datatype stats_type;  /* deferred open: some processes might not
+				 open the file, so we'll exchange some
+				 information with those non-aggregators */
 
     orig_amode_excl = access_mode;
 
@@ -68,6 +109,7 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank,
     /* if we are doing deferred open, non-aggregators should return now */
     if (fd->hints->deferred_open ) {
         if (!(fd->is_agg)) {
+	    char value[MPI_MAX_INFO_VAL+1];
             /* we might have turned off EXCL for the aggregators.
              * restore access_mode that non-aggregators get the right
              * value from get_amode */
@@ -78,9 +120,24 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank,
 	     * this open call.  Broadcast a bit of information in case
 	     * lower-level file system driver (e.g. 'bluegene') collected it
 	     * (not all do)*/
-	    MPI_Bcast(&(fd->blksize), 1, MPI_LONG, fd->hints->ranklist[0], fd->comm);
-	    *error_code = MPI_SUCCESS;
+	    stats_type = make_stats_type(fd);
+	    MPI_Bcast(MPI_BOTTOM, 1, stats_type, fd->hints->ranklist[0], fd->comm);
 	    ADIOI_Assert(fd->blksize > 0);
+	    /* some file systems (e.g. lustre) will inform the user via the
+	     * info object about the file configuration.  deferred open,
+	     * though, skips that step for non-aggregators.  we do the
+	     * info-setting here */
+	    sprintf(value, "%d", fd->hints->striping_unit);
+	    ADIOI_Info_set(fd->info, "striping_unit", value);
+
+	    sprintf(value, "%d", fd->hints->striping_factor);
+	    ADIOI_Info_set(fd->info, "striping_factor", value);
+
+	    sprintf(value, "%d", fd->hints->start_iodevice);
+	    ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
+
+	    *error_code = MPI_SUCCESS;
+	    MPI_Type_free(&stats_type);
 	    return;
 	}
     }
@@ -112,12 +169,16 @@ void ADIOI_GEN_OpenColl(ADIO_File fd, int rank,
     /* if we turned off EXCL earlier, then we should turn it back on */
     if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;
 
-    /* broadcast a bit of information (blocksize for now) to all proceses in
+    /* broadcast information to all proceses in
      * communicator, not just those who participated in open */
-    MPI_Bcast(&(fd->blksize), 1, MPI_LONG, fd->hints->ranklist[0], fd->comm);
+
+    stats_type = make_stats_type(fd);
+    MPI_Bcast(MPI_BOTTOM, 1, stats_type, fd->hints->ranklist[0], fd->comm);
+    MPI_Type_free(&stats_type);
     /* file domain code will get terribly confused in a hard-to-debug way if
      * gpfs blocksize not sensible */
     ADIOI_Assert( fd->blksize > 0);
+
     /* for deferred open: this process has opened the file (because if we are
      * not an aggregaor and we are doing deferred open, we returned earlier)*/
     fd->is_open = 1;
diff --git a/src/mpi/romio/adio/include/adioi.h b/src/mpi/romio/adio/include/adioi.h
index 26edfb0..0b9d8b3 100644
--- a/src/mpi/romio/adio/include/adioi.h
+++ b/src/mpi/romio/adio/include/adioi.h
@@ -47,6 +47,7 @@ struct ADIOI_Hints_struct {
     int ind_rd_buffer_size;
     int ind_wr_buffer_size;
     int deferred_open;
+    int start_iodevice;
     int min_fdomain_size;
     char *cb_config_list;
     int *ranklist;
@@ -65,7 +66,6 @@ struct ADIOI_Hints_struct {
 		    int dtype_write;
 	    } pvfs2;
             struct {
-                    int start_iodevice;
                     int co_ratio;
                     int coll_threshold;
                     int ds_in_coll;

http://git.mpich.org/mpich.git/commitdiff/eaf5c0cb8eca7fd957a3a90e7b1028462e6702dc

commit eaf5c0cb8eca7fd957a3a90e7b1028462e6702dc
Author: Rob Latham <robl at mcs.anl.gov>
Date:   Mon Nov 9 14:43:42 2015 -0600

    small 64-to-32 fixup in tests
    
    No Reviewer

diff --git a/test/mpi/util/mtest_datatype.h b/test/mpi/util/mtest_datatype.h
index e549406..2320d51 100644
--- a/test/mpi/util/mtest_datatype.h
+++ b/test/mpi/util/mtest_datatype.h
@@ -50,7 +50,7 @@ enum MTEST_MIN_DERIVED_DT {
     MTEST_MIN_DDT_MAX
 };
 
-typedef int (*MTestDdtCreator) (int, int, int, int, MPI_Datatype, const char *, MTestDatatype *);
+typedef int (*MTestDdtCreator) (MPI_Aint, MPI_Aint, MPI_Aint, MPI_Aint, MPI_Datatype, const char *, MTestDatatype *);
 
 extern void MTestTypeCreatorInit(MTestDdtCreator * creators);
 extern void MTestTypeMinCreatorInit(MTestDdtCreator * creators);

-----------------------------------------------------------------------

Summary of changes:
 src/mpi/romio/adio/ad_lustre/ad_lustre_open.c |    7 ++-
 src/mpi/romio/adio/common/ad_opencoll.c       |   69 +++++++++++++++++++++++--
 src/mpi/romio/adio/include/adioi.h            |    2 +-
 test/mpi/util/mtest_datatype.h                |    2 +-
 4 files changed, 71 insertions(+), 9 deletions(-)


hooks/post-receive
-- 
MPICH primary repository


More information about the commits mailing list