[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.1.1-105-g142b944
Service Account
noreply at mpich.org
Wed Jul 16 09:36:19 CDT 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 142b944024cbac4f6fc2cfc989b5404c52f7d1cc (commit)
from 7669873d2dadd04dd8509aa5939d68f24eef4c32 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/142b944024cbac4f6fc2cfc989b5404c52f7d1cc
commit 142b944024cbac4f6fc2cfc989b5404c52f7d1cc
Author: Rob Latham <robl at mcs.anl.gov>
Date: Tue Jul 15 09:54:24 2014 -0500
Revert "Address many of the perf problems in #1788"
This reverts commit 1c5c594554343a0b6b4335cb28790aa51ab8a968.
Reopens #1788 (datatype performance tests failing)
but better to have poor performance than incorrect performance
Closes #2115 (RMA fails with derived type containing struct of struct)
Closes #2126 (Data Integrity issue in MPI_Gather ...)
Conflicts:
src/mpid/common/datatype/dataloop/dataloop_optimize.c
but only because a subsequent commit removed bits of this optimization.
This commit fully removes this optimization, but we leave behind test
cases to help us make sure we get it right next time. We also leave
behind some additional debugging support routines.
Signed-off-by: Junchao Zhang <jczhang at mcs.anl.gov>
diff --git a/src/mpid/common/datatype/dataloop/Makefile.mk b/src/mpid/common/datatype/dataloop/Makefile.mk
index 6cb8799..8bf212a 100644
--- a/src/mpid/common/datatype/dataloop/Makefile.mk
+++ b/src/mpid/common/datatype/dataloop/Makefile.mk
@@ -21,8 +21,7 @@ mpi_core_sources += \
src/mpid/common/datatype/dataloop/segment_count.c \
src/mpid/common/datatype/dataloop/segment_flatten.c \
src/mpid/common/datatype/dataloop/segment_packunpack.c \
- src/mpid/common/datatype/dataloop/subarray_support.c \
- src/mpid/common/datatype/dataloop/dataloop_optimize.c
+ src/mpid/common/datatype/dataloop/subarray_support.c
# several headers are included by the rest of MPICH
AM_CPPFLAGS += -I$(top_srcdir)/src/mpid/common/datatype
diff --git a/src/mpid/common/datatype/dataloop/dataloop_create.h b/src/mpid/common/datatype/dataloop/dataloop_create.h
index 0815b0b..414e849 100644
--- a/src/mpid/common/datatype/dataloop/dataloop_create.h
+++ b/src/mpid/common/datatype/dataloop/dataloop_create.h
@@ -88,22 +88,11 @@ DLOOP_Count PREPEND_PREFIX(Type_indexed_count_contig)(DLOOP_Count count,
const void *displacement_array,
int dispinbytes,
DLOOP_Offset old_extent);
-
+
DLOOP_Count PREPEND_PREFIX(Type_blockindexed_count_contig)(DLOOP_Count count,
DLOOP_Count blklen,
const void *disp_array,
int dispinbytes,
DLOOP_Offset old_extent);
-int PREPEND_PREFIX(Dataloop_optimize)( DLOOP_Dataloop *dlpOld_p, int level );
-
-int PREPEND_PREFIX(Dataloop_est_complexity)(DLOOP_Dataloop *,
- MPI_Aint *, MPI_Aint *);
-int PREPEND_PREFIX(Dataloop_est_struct_complexity)( int,
- const int [],
- const DLOOP_Type [],
- MPI_Aint *,
- MPI_Aint * );
-
-void PREPEND_PREFIX(Dataloop_debug_print)( DLOOP_Dataloop *dp );
#endif
diff --git a/src/mpid/common/datatype/dataloop/dataloop_create_struct.c b/src/mpid/common/datatype/dataloop/dataloop_create_struct.c
index 70054b6..7d51b7b 100644
--- a/src/mpid/common/datatype/dataloop/dataloop_create_struct.c
+++ b/src/mpid/common/datatype/dataloop/dataloop_create_struct.c
@@ -11,57 +11,6 @@
#error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h"
#endif
-
-/*
-=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
-
-categories :
- - name : DATATYPE
- description : Datatype optimization parameters
-
-cvars:
- - name : MPIR_CVAR_DATALOOP_OPTIMIZE
- category : DATATYPE
- type : boolean
- default : true
- class : none
- verbosity : MPI_T_VERBOSITY_USER_BASIC
- scope : MPI_T_SCOPE_LOCAL
- description : >-
- By default, the internal representation of an MPI datatype that
- is used by MPICH to move data is very similar to the original
- description of the datatype. If this flag is true, additional
- optimizations are used to improve the performance of datatypes.
-
- - name : MPIR_CVAR_DATALOOP_FLATTEN
- category : DATATYPE
- type : boolean
- class : none
- default : true
- verbosity : MPI_T_VERBOSITY_USER_BASIC
- scope : MPI_T_SCOPE_LOCAL
- description : >-
- If true, attempt to "flatten" the internal representation of
- MPI struct datatypes (created with MPI_Type_create_struct).
-
- - name : MPIR_CVAR_DATALOOP_FLATTEN_MULT
- category : DATATYPE
- type : int
- class : none
- default : 2
- verbosity : MPI_T_VERBOSITY_USER_BASIC
- scope : MPI_T_SCOPE_LOCAL
- description : >-
- Flattening an MPI struct datatype does not always improve
- performance. This parameter is a threshold that is used in
- comparing the size of the description with the amount of data
- moved. Larger values make it more likely that a struct datatype
- will be flattened. The default value is adequate for flattening
- simple structs, and will usually avoid flattening structs
- containing vectors or block-indexed data.
-
-=== END_MPI_T_CVAR_INFO_BLOCK ===
-*/
static int DLOOP_Dataloop_create_struct_memory_error(void);
static int DLOOP_Dataloop_create_unique_type_struct(DLOOP_Count count,
const int *blklens,
@@ -289,37 +238,19 @@ int PREPEND_PREFIX(Dataloop_create_struct)(DLOOP_Count count,
* if caller asked for homogeneous or all bytes representation,
* flatten the type and store it as an indexed type so that
* there are no branches in the dataloop tree.
- *
- * Note that this is not always an optimization - for example,
- * replacing two long block_indexed with one longer indexed (with
- * the additional blockcount array) is likely to be slower, because
- * of the additional memory motion required.
*/
- if (MPIR_CVAR_DATALOOP_FLATTEN && (
- (flag == DLOOP_DATALOOP_HOMOGENEOUS) ||
- (flag == DLOOP_DATALOOP_ALL_BYTES) ))
- {
- MPI_Aint nElms = 0, nDesc = 0;
- PREPEND_PREFIX(Dataloop_est_struct_complexity)( count,
- blklens,
- oldtypes,
- &nElms,
- &nDesc );
-
- /* Only convert to flattened if the flattened description
- is likely to be more efficient. The magic number of 24 was
- determined emperically. */
- if ( nDesc * 24 * MPIR_CVAR_DATALOOP_FLATTEN_MULT > nElms) {
- return DLOOP_Dataloop_create_flattened_struct(count,
- blklens,
- disps,
- oldtypes,
- dlp_p,
- dlsz_p,
- dldepth_p,
- flag);
- }
- }
+ if ((flag == DLOOP_DATALOOP_HOMOGENEOUS) ||
+ (flag == DLOOP_DATALOOP_ALL_BYTES))
+ {
+ return DLOOP_Dataloop_create_flattened_struct(count,
+ blklens,
+ disps,
+ oldtypes,
+ dlp_p,
+ dlsz_p,
+ dldepth_p,
+ flag);
+ }
/* scan through types and gather derived type info */
for (i=0; i < count; i++)
diff --git a/src/mpid/common/datatype/dataloop/dataloop_optimize.c b/src/mpid/common/datatype/dataloop/dataloop_optimize.c
deleted file mode 100644
index 29be813..0000000
--- a/src/mpid/common/datatype/dataloop/dataloop_optimize.c
+++ /dev/null
@@ -1,668 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-
-/*
- * (C) 2013 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "dataloop.h"
-
-/* #define MPICH_DEBUG_DATALOOP */
-#ifdef MPICH_DEBUG_DATALOOP
-static int firstCall = 1;
-static int printDataloop = 0;
-static int printIfOptimized = 0;
-
-/* Print format:
- (spaces for level).(el_size,el_extent,el_type)(count)....
-*/
-static void dl_print_dataloop( int, int, DLOOP_Dataloop * );
-static void dl_print_contig( int, DLOOP_Dataloop * );
-static void dl_print_vector( int, DLOOP_Dataloop * );
-static void dl_print_blockindexed( int, DLOOP_Dataloop * );
-static void dl_print_struct( int, DLOOP_Dataloop * );
-static void dl_print( int, const char * );
-
-static void dl_print_tab( int l )
-{
- int i;
- for (i=2*l; i!=0; i--) printf( "%c", ' ' );
-}
-static void dl_print_base( DLOOP_Dataloop *dp )
-{
- printf( "(%ld,%ld,%lx)(%ld)", (long)dp->el_size, (long)dp->el_extent,
- (long)dp->el_type, (long)dp->loop_params.count );
-}
-static void dl_print( int l, const char *s )
-{
- dl_print_tab(l);
- printf( "%s", s );
-}
-static void dl_print_contig( int l, DLOOP_Dataloop *dp )
-{
- dl_print_tab(l);
- printf( "CONTIG " );
- dl_print_base( dp );
- printf( "\n" );
-}
-static void dl_print_vector( int l, DLOOP_Dataloop *dp )
-{
- int stride = dp->loop_params.v_t.stride;
- int blocksize = dp->loop_params.v_t.blocksize ;
- dl_print_tab(l);
- printf( "VECTOR " );
- dl_print_base( dp );
- printf( ":Stride %d Blocksize %d\n", stride, blocksize );
-}
-static void dl_print_blockindexed( int l, DLOOP_Dataloop *dp )
-{
- int blocksize = dp->loop_params.bi_t.blocksize ;
- DLOOP_Offset *offarray = dp->loop_params.bi_t.offset_array;
- int i, n;
- dl_print_tab(l);
- printf( "BLOCKINDEXED " );
- dl_print_base( dp );
- printf( ":Blocksize %d:", blocksize );
- n = dp->loop_params.bi_t.count;
- if (n > 8) n = 8;
- for (i=0; i<n; i++) {
- printf( "%lx,", (long)offarray[i] );
- }
- if (dp->loop_params.bi_t.count > n) printf( "..." );
- printf( "\n" );
-}
-static void dl_print_indexed( int l, DLOOP_Dataloop *dp )
-{
- DLOOP_Count *blocksizearray = dp->loop_params.i_t.blocksize_array ;
- DLOOP_Offset *offarray = dp->loop_params.i_t.offset_array;
- int i, n;
- int minblock, maxblock;
- dl_print_tab(l);
- printf( "INDEXED " );
- dl_print_base( dp );
- n = dp->loop_params.i_t.count;
- minblock = maxblock = (n>0) ? blocksizearray[0] : 0;
- for (i=0; i<n; i++) {
- if (blocksizearray[i] > maxblock) maxblock = blocksizearray[i];
- if (blocksizearray[i] < minblock) minblock = blocksizearray[i];
- }
- printf( "blocks in [%d,%d]", minblock, maxblock );
-
- if (n > 8) n = 8;
- for (i=0; i<n; i++) {
- printf( "(%lx,%ld)", (long)offarray[i], (long)blocksizearray[i] );
- }
- if (dp->loop_params.i_t.count > n) printf( "..." );
- printf( "\n" );
-}
-
-static void dl_print_struct( int l, DLOOP_Dataloop *dp )
-{
- DLOOP_Count *blocksizearray = dp->loop_params.s_t.blocksize_array ;
- DLOOP_Offset *offarray = dp->loop_params.s_t.offset_array;
- DLOOP_Dataloop **looparray = dp->loop_params.s_t.dataloop_array;
- int i, n;
- dl_print_tab(l);
- printf( "STRUCT " );
- dl_print_base( dp );
- printf( "\n" );
- n = dp->loop_params.i_t.count;
- if (n > 8) n = 8;
- for (i=0; i<n; i++) {
- dl_print_tab(l+1);
- printf( "(%lx,%ld):\n", (long)offarray[i], (long)blocksizearray[i] );
- dl_print_dataloop( l+1, 0, looparray[i] );
- }
- if (dp->loop_params.i_t.count > n) printf( "...\n" );
-}
-static void dl_print_dataloop( int l, int doBase, DLOOP_Dataloop *dp )
-{
- dl_print_tab( l );
- if (doBase)
- dl_print_base( dp );
- switch (dp->kind & DLOOP_KIND_MASK) {
- case DLOOP_KIND_CONTIG:
- dl_print_contig( l, dp );
- break;
- case DLOOP_KIND_VECTOR:
- dl_print_vector( l, dp );
- break;
- case DLOOP_KIND_BLOCKINDEXED:
- dl_print_blockindexed( l, dp );
- break;
- case DLOOP_KIND_INDEXED:
- dl_print_indexed( l, dp );
- break;
- case DLOOP_KIND_STRUCT:
- dl_print_struct( l, dp );
- break;
- default:
- dl_print( l, "Unknown dataloop type " );
- printf( "\n" );
- break;
- }
-}
-#endif
-
-void PREPEND_PREFIX(Dataloop_debug_print)( DLOOP_Dataloop *dp )
-{
-#ifdef MPICH_DEBUG_DATALOOP
- if (firstCall) {
- char *s = getenv( "MPICH_DATALOOP_PRINT" );
- if (s && (strcmp(s,"yes")==0 || strcmp(s,"YES") == 0)) {
- printDataloop = 1;
- printIfOptimized = 1;
- }
- firstCall = 0;
- }
- if (printDataloop) {
- printf( "In Dataloop_debug_print:\n" );
- dl_print_dataloop( 1, 0, dp );
- }
-#endif
-}
-
-/*
- * Indicates whether a dataloop is a basic and final contig type.
- * This can be used to determine when a contig type can be removed
- * in a dataloop.
- */
-static int dl_contig_isFinal( DLOOP_Dataloop *dp )
-{
- if ((dp->kind & DLOOP_KIND_MASK) != DLOOP_KIND_CONTIG) return 0;
- if (dp->el_size == dp->el_extent &&
- (dp->kind & DLOOP_FINAL_MASK))
- return 1;
- return 0;
-}
-
-
-/*
- * Optimize a dataloop
- *
- * Apply the following transformations and return a new dataloop.
- * 1. Convert all predefined types to UINTS with the best alignment (may be BYTE
- * in worst case)
- * 2. Convert blocks of contiguous into a single block of basic unit (e.g.,
- * a vector type with a block count of 27 applied to a contiguous type of
- * 6 ints will be turned into a block count of (27*6) UINTs)
- * 3. Convert struct (with different dataloops (from different MPI datatypes)
- * into indexed when all types are contig
- * 4. Convert dataloops with counts of 1 into simpler types (e.g., q vector
- * with 1 element is really a contig type)
- *
- * Value of these optimizations
- * A 2012 paper[1] compared performance of Open MPI, MPICH2, and user-written code
- * for some datatypes, and found MPICH2 often performed poorer than other
- * options. An investigation showed that some of the issues are due to
- * a failure to perform optimizations of these type (especially #1 and 2).
- * It may also be necessary to enhance the dataloop execution engine, but
- * that will b a separate step.
- *
- * [1] T. Schneider and R. Gerstenberger and T. Hoefler, "Micro-Applications
- * for Communication Data Access Patterns and MPI Datatypes", EuroMPI 2012
- *
- * The level argument is used primarily for debugging output; it keeps track
- * of how deep a recursive application of this routine has gone.
- */
-int PREPEND_PREFIX(Dataloop_optimize)(DLOOP_Dataloop *dlpOld_p, int level )
-{
- int i;
-
-#ifdef MPICH_DEBUG_DATALOOP
- /* Temp for debugging */
- /* This is threadsafe in the sense that we don't care */
- if (firstCall) {
- char *s = getenv( "MPICH_DATALOOP_PRINT" );
- if (s && (strcmp(s,"yes")==0 || strcmp(s,"YES") == 0)) {
- printDataloop = 1;
- printIfOptimized = 1;
- }
- firstCall = 0;
- }
- if (printDataloop && level == 0)
- printf( "About to optimize in commit...\n" );
-#endif
-
- switch (dlpOld_p->kind & DLOOP_KIND_MASK) {
- case DLOOP_KIND_CONTIG:
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- dl_print_contig( level, dlpOld_p );
-#endif
- /* replace contig of (non-basic) contig with contig (basic) */
- if (!(dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- DLOOP_Dataloop *dlpChild_p = dlpOld_p->loop_params.c_t.dataloop;
- PREPEND_PREFIX(Dataloop_optimize)( dlpChild_p, level+1 );
- if ((dlpChild_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG &&
- dl_contig_isFinal( dlpChild_p )) {
- if (dlpOld_p->el_size == dlpOld_p->el_extent &&
- !MPIU_Prod_overflows_max(
- dlpChild_p->loop_params.c_t.count,
- dlpOld_p->loop_params.c_t.count,
- INT_MAX ) ) {
-
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- printf( "replacing with contig\n" );
-#endif
- dlpOld_p->loop_params.c_t.count *= dlpChild_p->loop_params.c_t.count;
- dlpOld_p->el_size = dlpChild_p->el_size;
- dlpOld_p->el_extent = dlpChild_p->el_extent;
- dlpOld_p->el_type = dlpChild_p->el_type;
- dlpOld_p->kind |= DLOOP_FINAL_MASK;
- dlpOld_p->loop_params.c_t.dataloop = 0;
-#ifdef MPICH_DEBUG_DATALOOP
- if (printIfOptimized || printDataloop) {
- printf( "replacement contig is:\n" );
- dl_print_contig( level, dlpOld_p );
- }
-#endif
- }
- else {
- /* */
- /* printf( "not replacing...\n" ); */
- /* If the low level contig is a single byte,
- we could make that replacement. Not done. */
- /* By doing nothing here, we ensure that the dataloop
- is correct if not fully optimized */
- ;
- }
- }
- }
- break;
-
- case DLOOP_KIND_VECTOR:
- /* if sub-dloop is (non-basic) contig, merge with blockcount */
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- dl_print_vector( level, dlpOld_p );
-#endif
-
- if (!(dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- DLOOP_Dataloop *dlpChild_p = dlpOld_p->loop_params.v_t.dataloop;
- PREPEND_PREFIX(Dataloop_optimize)( dlpChild_p, level+1 );
-
- if (dl_contig_isFinal( dlpChild_p ) &&
- !MPIU_Prod_overflows_max(
- dlpChild_p->loop_params.count,
- dlpOld_p->loop_params.v_t.blocksize,
- INT_MAX ) ) {
- /* We can replace the contig type by enlarging the blocksize */
- if (dlpOld_p->el_size == dlpOld_p->el_extent ||
- dlpOld_p->loop_params.v_t.blocksize == 1) {
- /* Reset the kind to final, free the child type,
- set to null */
- dlpOld_p->loop_params.v_t.blocksize *=
- dlpChild_p->loop_params.count;
- dlpOld_p->el_size = dlpChild_p->el_size;
- dlpOld_p->el_type = dlpChild_p->el_type;
- /*dlpOld_p->el_extent = dlpChild_p->el_extent; */
- dlpOld_p->kind |= DLOOP_FINAL_MASK;
- dlpOld_p->loop_params.v_t.dataloop = 0;
-#ifdef MPICH_DEBUG_DATALOOP
- if (printIfOptimized || printDataloop) {
- printf( "replacement Vector is:\n" );
- dl_print_vector( level, dlpOld_p );
- }
-#endif
- }
- else {
- /* TODO: If the vector elements do not have
- size==extent, and the blocksize is greater than 1,
- then it may be better to replace the elements with
- a single strided(vector) copy with blocksize elements:
- New vector:
- stride <- extent
- el_size <- size
- extent <- ?
- count <- blocksize
- blocksize <- 1
- Old vector become
- blocksize <- 1
- extent <- ?
- */
- dlpChild_p->loop_params.v_t.stride =
- dlpOld_p->el_extent;
- dlpChild_p->el_size = 1;
- dlpChild_p->el_type = MPI_BYTE;
- dlpChild_p->loop_params.v_t.dataloop = 0;
- dlpChild_p->loop_params.v_t.count =
- dlpOld_p->loop_params.v_t.blocksize;
- dlpChild_p->loop_params.v_t.blocksize = dlpOld_p->el_size;
- dlpChild_p->kind = DLOOP_KIND_VECTOR |
- DLOOP_FINAL_MASK;
- dlpOld_p->loop_params.v_t.blocksize = 1;
-#ifdef MPICH_DEBUG_DATALOOP
- if (printIfOptimized || printDataloop) {
- printf( "Replacing vector of contig with vector of vector\n" );
- printf( "replacement Vector is:\n" );
- dl_print_vector( level, dlpOld_p );
- dl_print_vector( level+1, dlpChild_p );
- }
-#endif
- }
- }
- }
- /* replace vector of a single element with contig */
- if ((dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- int blocksize = dlpOld_p->loop_params.v_t.blocksize;
- int count = dlpOld_p->loop_params.v_t.count;
- if (dlpOld_p->el_size * blocksize ==
- dlpOld_p->loop_params.v_t.stride &&
- !MPIU_Prod_overflows_max( count, blocksize, INT_MAX ) ) {
- dlpOld_p->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
- dlpOld_p->loop_params.c_t.dataloop = 0;
- dlpOld_p->loop_params.c_t.count = count * blocksize;
-#ifdef MPICH_DEBUG_DATALOOP
- if (printIfOptimized || printDataloop) {
- printf( "replacement Contig is:\n" );
- dl_print_contig( level, dlpOld_p );
- }
-#endif
- }
- }
- /* replace vector that is contiguous with contiguous */
- break;
-
- case DLOOP_KIND_BLOCKINDEXED:
- /* if subdloop is (non-basic) contig, merge with blockcount */
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- dl_print_blockindexed( level, dlpOld_p );
-#endif
- if (!(dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- DLOOP_Dataloop *dlpChild_p = dlpOld_p->loop_params.bi_t.dataloop;
- PREPEND_PREFIX(Dataloop_optimize)( dlpChild_p, level+1 );
- if (dl_contig_isFinal( dlpChild_p ) &&
- !MPIU_Prod_overflows_max(
- dlpChild_p->loop_params.count,
- dlpOld_p->loop_params.bi_t.blocksize,
- INT_MAX ) ) {
- /* We can replace the contig type by enlarging the blocksize */
-
- /* Reset the kind to final, free the child type, set to null */
- dlpOld_p->loop_params.bi_t.blocksize *= dlpChild_p->loop_params.count;
- dlpOld_p->el_size = dlpChild_p->el_size;
- /*dlpOld_p->el_extent = dlpChild_p->el_extent;*/
- dlpOld_p->el_type = dlpChild_p->el_type;
- dlpOld_p->kind |= DLOOP_FINAL_MASK;
- dlpOld_p->loop_params.bi_t.dataloop = 0;
-#ifdef MPICH_DEBUG_DATALOOP
- if (printIfOptimized || printDataloop) {
- printf( "replacement BlockIndexed is:\n" );
- dl_print_blockindexed( level, dlpOld_p );
- }
-#endif
- }
- }
- break;
-
- case DLOOP_KIND_INDEXED:
- /* if sub-dloop is (non-basic) contig, merge with blockcount */
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- dl_print_indexed( level, dlpOld_p );
-#endif
- if (!(dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- DLOOP_Dataloop *dlpChild_p = dlpOld_p->loop_params.i_t.dataloop;
- PREPEND_PREFIX(Dataloop_optimize)( dlpChild_p, level+1 );
- if (dl_contig_isFinal( dlpChild_p ) ) {
- /* Could include the child type in the blocksize counts */
- }
- }
-
- /* replace indexed of a single element with contig */
-
- /* If all block counts are multiples of the smallest, and if most
- blocks are smallest, then the other blocks could be split into
- separate blocks with appropriate offsets, replacing indexed with
- blockindexed */
-
- break;
-
- case DLOOP_KIND_STRUCT:
- /* if sub-dloops are all contig, replace with indexed */
- /* Not done yet - but first step is to recurse and
- simply/optimize the component dataloops */
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop) {
- dl_print_struct( level, dlpOld_p );
- printf( "now optimizing...\n" );
- }
-#endif
- if (!(dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- for (i=0; i<dlpOld_p->loop_params.s_t.count; i++) {
- PREPEND_PREFIX(Dataloop_optimize)(
- dlpOld_p->loop_params.s_t.dataloop_array[i],
- level+1);
- }
- }
- /* Can the preceding if case ever be false? */
- /* Heres where we might check the following:
- Are all child dataloops CONTIG?
- Are all extents equal to sizes?
- Are all LBs equal to 0?
- If these are all true and in addition they are contiguous,
- replace with a single contig (but be careful of the extent)
- Otherwise, if these are all true, then replace with INDEXED.
- */
- if (!(dlpOld_p->kind & DLOOP_FINAL_MASK)) {
- int isContig = 1;
- int allContig = 1;
- MPI_Aint lastAdd = 0;
- for (i=0; i<dlpOld_p->loop_params.s_t.count; i++) {
- DLOOP_Dataloop *dlpChild_p =
- dlpOld_p->loop_params.s_t.dataloop_array[i];
- if ((dlpChild_p->kind & DLOOP_KIND_MASK) != DLOOP_KIND_CONTIG) {
- allContig = 0; break;
- }
- if (/* dlpChild_p->el_lb != 0 || */ /* No lb in dataloop(?) */
- dlpChild_p->el_extent != dlpChild_p->el_size) {
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- printf( "not natural contig\n" );
-#endif
- allContig = 0; break;
- }
- if (isContig &&
- lastAdd != dlpOld_p->loop_params.s_t.offset_array[i]) {
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- printf( "Not contiguous bytes: %lx != %lx\n",
- (long)lastAdd,
- (long)dlpOld_p->loop_params.s_t.offset_array[i] );
-#endif
- isContig = 0;
- }
- else {
- lastAdd += dlpChild_p->el_extent *
- dlpChild_p->loop_params.count;
- }
- }
- if (allContig) {
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- printf( "All subtypes are contig - can replace with index\n" );
-#endif
- if (isContig) {
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- printf( "All subtypes consequtive - can replace with a single contig\n" );
-#endif
- ;
- }
- }
- }
-
- break;
- default:
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop)
- dl_print( level, "Unknown type!" );
-#endif
- break;
- }
-
-#ifdef MPICH_DEBUG_DATALOOP
- if (printDataloop && level == 0)
- printf( "Done!\n" );
-#endif
-
- return 0;
-}
-
-
-/*
- * Make an estimate at the complexity of a datatype. This can be used
- * to determine whether flattening the datatype to an indexed type is
- * likely to be efficient.
- */
-int PREPEND_PREFIX(Dataloop_est_complexity)(DLOOP_Dataloop *dlp_p,
- MPI_Aint *nElms,
- MPI_Aint *nDesc )
-{
- int i;
- MPI_Aint myElms = 0;
- MPI_Aint myDesc = 0;
- MPI_Aint childElms = 0, childDesc = 0;
- DLOOP_Dataloop *dlpChild_p;
-
- switch (dlp_p->kind & DLOOP_KIND_MASK) {
- case DLOOP_KIND_CONTIG:
- /* Data moved is count*size of the child type */
-
- if (!(dlp_p->kind & DLOOP_FINAL_MASK)) {
- dlpChild_p = dlp_p->loop_params.c_t.dataloop;
- PREPEND_PREFIX(Dataloop_est_complexity)( dlpChild_p, &childElms,
- &childDesc );
- }
- else {
- childElms = dlp_p->el_size;
- childDesc = 0;
- }
- myElms += dlp_p->loop_params.c_t.count * childElms;
- myDesc += childDesc + 1;
-
- break;
-
- case DLOOP_KIND_VECTOR:
- /* Data moved is count*size of the child type */
-
- if (!(dlp_p->kind & DLOOP_FINAL_MASK)) {
- dlpChild_p = dlp_p->loop_params.v_t.dataloop;
- PREPEND_PREFIX(Dataloop_est_complexity)( dlpChild_p, &childElms,
- &childDesc );
- }
- else {
- childElms = dlp_p->el_size;
- childDesc = 0;
- }
- myElms += dlp_p->loop_params.v_t.count *
- dlp_p->loop_params.v_t.blocksize * childElms;
- myDesc += childDesc + 2;
-
- break;
-
- case DLOOP_KIND_BLOCKINDEXED:
- if (!(dlp_p->kind & DLOOP_FINAL_MASK)) {
- dlpChild_p = dlp_p->loop_params.bi_t.dataloop;
- PREPEND_PREFIX(Dataloop_est_complexity)( dlpChild_p, &childElms,
- &childDesc );
- }
- else {
- childElms = dlp_p->el_size;
- childDesc = 0;
- }
- myElms += dlp_p->loop_params.bi_t.count *
- dlp_p->loop_params.bi_t.blocksize * childElms;
- myDesc += childDesc + dlp_p->loop_params.bi_t.count;
- break;
-
- case DLOOP_KIND_INDEXED:
-
- if (!(dlp_p->kind & DLOOP_FINAL_MASK)) {
- dlpChild_p = dlp_p->loop_params.i_t.dataloop;
- PREPEND_PREFIX(Dataloop_est_complexity)( dlpChild_p, &childElms,
- &childDesc );
- }
- else {
- childElms = dlp_p->el_size;
- childDesc = 0;
- }
- myElms += dlp_p->loop_params.i_t.total_blocks * childElms;
- myDesc += childDesc + 2 * dlp_p->loop_params.i_t.count;
-
- break;
-
- case DLOOP_KIND_STRUCT:
- if (!(dlp_p->kind & DLOOP_FINAL_MASK)) {
- MPI_Aint celm, cdesc;
- for (i=0; i<dlp_p->loop_params.s_t.count; i++) {
- celm = 0; cdesc = 0;
- PREPEND_PREFIX(Dataloop_est_complexity)(
- dlp_p->loop_params.s_t.dataloop_array[i],
- &celm, &cdesc );
- childElms += celm * dlp_p->loop_params.s_t.blocksize_array[i];
- childDesc += cdesc + 3;
- }
- }
- else {
- int elsize = dlp_p->el_size;
- childElms = 0;
- childDesc = 0;
- for (i=0; i<dlp_p->loop_params.s_t.count; i++) {
- childElms += elsize * dlp_p->loop_params.s_t.blocksize_array[i];
- childDesc += 3;
- }
- }
-
- myElms += childElms;
- myDesc += childDesc;
- break;
-
- default:
- break;
- }
-
- /* Return the final values */
- *nElms += myElms;
- *nDesc += myDesc;
-
- return 0;
-}
-
-/*
- * Estimate the complexity of a struct Dataloop before it is constructed.
- */
-int PREPEND_PREFIX(Dataloop_est_struct_complexity)( int count,
- const int blklens[],
- const DLOOP_Type oldtypes[],
- MPI_Aint *nElms,
- MPI_Aint *nDesc )
-{
- MPI_Aint myElms = 0, myDesc = 0;
- int i;
- int flag = MPID_DATALOOP_ALL_BYTES;
-
- for (i=0; i<count; i++) {
- DLOOP_Dataloop *dlp_p = 0;
- MPI_Aint celms = 0, cdesc = 0;
-
- DLOOP_Handle_get_loopptr_macro(oldtypes[i],dlp_p,flag);
- if (dlp_p) {
- PREPEND_PREFIX(Dataloop_est_complexity)( dlp_p,
- &celms, &cdesc );
- }
- else {
- celms = 1;
- cdesc = 1;
- }
- myElms += celms * blklens[i];
- myDesc += cdesc;
- }
- *nElms = myElms;
- *nDesc = myDesc;
-
- return MPI_SUCCESS;
-}
diff --git a/src/mpid/common/datatype/dataloop/segment_packunpack.c b/src/mpid/common/datatype/dataloop/segment_packunpack.c
index 1712f38..7a2ea88 100644
--- a/src/mpid/common/datatype/dataloop/segment_packunpack.c
+++ b/src/mpid/common/datatype/dataloop/segment_packunpack.c
@@ -13,8 +13,6 @@
#include "dataloop.h"
#include "veccpy.h"
-/* NOTE: bufp values are unused, ripe for removal */
-
/* #define MPICH_DEBUG_SEGMENT_MOVE */
/* TODO: Consider integrating this with the general debug support. */
/* Note: This does not use the CVAR support for the environment variable
@@ -37,6 +35,8 @@ static void setPrint( void ) {
#define DBG_SEGMENT(_a)
#endif
+/* NOTE: bufp values are unused, ripe for removal */
+
int PREPEND_PREFIX(Segment_contig_m2m)(DLOOP_Offset *blocks_p,
DLOOP_Type el_type,
DLOOP_Offset rel_off,
@@ -323,22 +323,6 @@ int PREPEND_PREFIX(Segment_blkidx_m2m)(DLOOP_Offset *blocks_p,
DLOOP_Handle_get_size_macro(el_type, el_size);
DBG_SEGMENT(printf( "blkidx m2m: elsize = %d, count = %d, blocklen = %d\n", (int)el_size, (int)count, (int)blocklen ));
- /* If the blocklen * el_size is relatively small, then for
- performance reasons, its important to hoist most of these
- tests out of the loop. Ignoring some of the issues of handling
- the available buffer size (blocks_left), this should translate
- directly into code that looks like this for blocksize == 1
-
- for (i=0; i<count; i++) {
- dest[i] = userbuf[offsetarray[i]];
- }
-
- where "dest" and "userbuf" are pointers to objects of the correct
- size. If blocksize is > 1, then various unrollings are important
- until blocksize is large enough to make the overhead of memcpy
- negligible. Datatypes such as this are used in LAMMPS, for example.
- */
-
while (blocks_left) {
char *src, *dest;
diff --git a/src/mpid/common/datatype/mpid_type_commit.c b/src/mpid/common/datatype/mpid_type_commit.c
index b27eaff..d990503 100644
--- a/src/mpid/common/datatype/mpid_type_commit.c
+++ b/src/mpid/common/datatype/mpid_type_commit.c
@@ -58,17 +58,6 @@ int MPID_Type_commit(MPI_Datatype *datatype_p)
MPIU_DBG_PRINTF(("# contig blocks = %d\n",
(int) datatype_ptr->max_contig_blocks));
- if (MPIR_CVAR_DATALOOP_OPTIMIZE) {
- MPID_Dataloop_optimize(datatype_ptr->dataloop, 0 );
- }
- else {
- /* This allows the developer to output the final dataloops
- in the case where the dataloops are not optimized.
- It does nothing if that printing is not enabled.
- */
- MPID_Dataloop_debug_print( datatype_ptr->dataloop );
- }
-
#if 0
MPIDI_Dataloop_dot_printf(datatype_ptr->dataloop, 0, 1);
#endif
diff --git a/test/mpi/datatype/testlist.in b/test/mpi/datatype/testlist.in
index 3f7d07a..b2875b2 100644
--- a/test/mpi/datatype/testlist.in
+++ b/test/mpi/datatype/testlist.in
@@ -59,4 +59,4 @@ cxx-types 1 mpiversion=3.0
@largetest at large_type 1 mpiversion=3.0
@largetest at large_type_sendrec 2 arg=31 mpiversion=3.0
@largetest at large_type_sendrec 2 arg=32 mpiversion=3.0 timeLimit=360
-get-struct 2 xfail=ticket2115
+get-struct 2
-----------------------------------------------------------------------
Summary of changes:
src/mpid/common/datatype/dataloop/Makefile.mk | 3 +-
.../common/datatype/dataloop/dataloop_create.h | 13 +-
.../datatype/dataloop/dataloop_create_struct.c | 93 +---
.../common/datatype/dataloop/dataloop_optimize.c | 668 --------------------
.../common/datatype/dataloop/segment_packunpack.c | 20 +-
src/mpid/common/datatype/mpid_type_commit.c | 11 -
test/mpi/datatype/testlist.in | 2 +-
7 files changed, 17 insertions(+), 793 deletions(-)
delete mode 100644 src/mpid/common/datatype/dataloop/dataloop_optimize.c
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list