[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2b3-181-g0c54cef
Service Account
noreply at mpich.org
Wed Jul 1 23:28:35 CDT 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 0c54cef778aa8a4849fedf92d96c03f4a38caa3d (commit)
via c0141dcb5a6fcc956d31dfc7ccdc01fe9e846940 (commit)
from 1629ff5d74ffcf2e319e665e2b613acd48ed01b0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/0c54cef778aa8a4849fedf92d96c03f4a38caa3d
commit 0c54cef778aa8a4849fedf92d96c03f4a38caa3d
Author: Pavan Balaji <balaji at anl.gov>
Date: Wed Jul 1 16:53:26 2015 -0500
Remove test/mpid.
Unused/untested directory.
Signed-off-by: Halim Amer <aamer at anl.gov>
diff --git a/test/Makefile.am b/test/Makefile.am
index b92ca41..e08a285 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -7,7 +7,7 @@
## FIXME: commented out temporarily, really just want "mpi" to be dealt with at
## distclean time for now
-##SUBDIRS = mpi mpid util basic commands .
+##SUBDIRS = mpi util basic commands .
SUBDIRS = mpi commands .
# Test both the MPI routines and the MPICH command scripts
diff --git a/test/mpid/atomic.c b/test/mpid/atomic.c
deleted file mode 100644
index 16e3b69..0000000
--- a/test/mpid/atomic.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- * (C) 2002 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-/*
- * Test performance of atomic access operations.
- * This is a *very* simple test.
- */
-
-#include "mpiimpl.h"
-
-/* FIXME: MPICH_SINGLE_THREADED is obsolete and no longer defined */
-#if defined(MPICH_SINGLE_THREADED) || !defined(USE_ATOMIC_UPDATES)
-#define MPID_Atomic_incr( count_ptr ) \
- __asm__ __volatile__ ( "lock; incl %0" \
- : "=m" (*count_ptr) :: "memory", "cc" )
-
-#define MPID_Atomic_decr_flag( count_ptr, nzflag ) \
- __asm__ __volatile__ ( "xor %%ax,%%ax; lock; decl %0 ; setnz %%al" \
- : "=m" (*count_ptr) , "=a" (nzflag) :: "memory", "cc" )
-#endif
-
-int main( int argc, char **argv )
-{
- int i, n;
- MPID_Thread_lock_t mutex;
- MPID_Time_t start_t, end_t;
- double time_lock, time_incr, time_single;
- int count;
- int nzflag;
-
- /* Set values */
- n = 10000000;
-
- /* Warm up */
- count = 0;
- MPID_Wtime( &start_t );
- for (i=0; i<1000; i++) {
- count ++;
- }
- MPID_Wtime( &end_t );
-
- /* Test nonatomic increment */
- count = 0;
- MPID_Wtime( &start_t );
- for (i=0; i<n; i++) {
- count ++;
- }
- MPID_Wtime( &end_t );
- MPID_Wtime_diff( &start_t, &end_t, &time_single );
- time_single /= n;
- if (count != n) {
- printf( "Error in nonatomic update\n" );
- }
-
- /* Test atomic increment using lock/unlock */
- count = 0;
- MPID_Thread_lock_init( &mutex );
- MPID_Wtime( &start_t );
- for (i=0; i<n; i++) {
- MPID_Thread_lock( &mutex );
- count ++;
- MPID_Thread_unlock( &mutex );
- }
- MPID_Wtime( &end_t );
- MPID_Wtime_diff( &start_t, &end_t, &time_lock );
- time_lock /= n;
- if (count != n) {
- printf( "Error in thread-locked atomic update\n" );
- }
-
- /* Test atomic increment using special instructions */
- count = 0;
- MPID_Wtime( &start_t );
- for (i=0; i<n; i++) {
- MPID_Atomic_incr( &count );
- }
- MPID_Wtime( &end_t );
- MPID_Wtime_diff( &start_t, &end_t, &time_incr );
-
- time_incr /= n;
- if (count != n) {
- printf( "Error in native atomic update (%d != %d)\n", count, n );
- }
- /* Check for dec sets zero flag */
- for (i=0; i<n-1; i++) {
- MPID_Atomic_decr_flag( &count, nzflag );
- if (!nzflag) {
- printf( "flag not set on iteration %d\n", i );
- break;
- }
- }
- MPID_Atomic_decr_flag( &count, nzflag );
- if (!nzflag) {
- printf( "Flag still set on final decrement\n" );
- }
-
- /* convert times to microseconds */
- time_single *= 1.0e6;
- time_lock *= 1.0e6;
- time_incr *= 1.0e6;
- printf ("Regular \t%f\nLock time \t%f\nAtomic time\t%f\n",
- time_single, time_lock, time_incr );
-
- {
- unsigned int low=0, high=0;
- __asm__ __volatile__ ( "rdtsc ; movl %%edx,%0 ; movl %%eax,%1"
- : "=r" (high), "=r" (low) ::
- "eax", "edx" );
-
- printf ( "time stamp %u %u\n", high, low );
- }
- return 0;
-}
diff --git a/test/mpid/atomic_fai.c b/test/mpid/atomic_fai.c
deleted file mode 100644
index 63388dd..0000000
--- a/test/mpid/atomic_fai.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- * (C) 2002 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-/*
- * Test performance of atomic access operations.
- * This is a *very* simple test.
- */
-
-#include "stdio.h"
-#include "unistd.h"
-
-#define MPID_Atomic_fetch_and_incr(count_ptr_, count_old_) do { \
- (count_old_) = 1; \
- __asm__ __volatile__ ("lock ; xaddl %0,%1" \
- : "=r" (count_old_), "=m" (*count_ptr_) \
- : "0" (count_old_), "m" (*count_ptr_)); \
- } while (0)
-
-int main( int argc, char **argv )
-{
- volatile int count = 0;
- int count_old;
- int failures = 0;
- int i;
-
- for (i = 0; i < 1000; i++)
- {
- MPID_Atomic_fetch_and_inc(&count, count_old);
- if (count_old != i )
- {
- fprintf(stderr, "count_old=%d, should be %d\n", count_old, i);
- failures++;
- }
- if (count != i + 1 )
- {
- fprintf(stderr, "count=%d, should be %d\n", count, i + 1);
- failures++;
- }
- }
-
- MPID_Atomic_fetch_and_inc(&count, count_old);
-
- if (failures == 0)
- {
- printf("No Errors\n");
- }
- else
- {
- printf("%d errors encountered\n", failures);
- }
-
- exit(0);
-}
diff --git a/test/mpid/ch3/reorder.c b/test/mpid/ch3/reorder.c
deleted file mode 100644
index 675d4d2..0000000
--- a/test/mpid/ch3/reorder.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- * (C) 2001 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include <stdio.h>
-
-#include "mpidimpl.h"
-
-#define ITERS 10
-
-int main(int argc, char * argv[])
-{
-#if defined(MPIDI_CH3_MSGS_UNORDERED)
- {
- int rc;
- int size;
- int rank;
- int buf[ITERS];
- int i;
-
- rc = MPI_Init(&argc, &argv);
- assert(rc == MPI_SUCCESS);
-
- rc = MPI_Comm_size(MPI_COMM_WORLD, &size);
- assert(rc == MPI_SUCCESS);
- rc = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- assert(rc == MPI_SUCCESS);
-
- if (size < 2)
- {
- fprintf(stderr, "ERROR: at least 2 processes are required\n");
- fflush(stderr);
- return 1;
- }
-
- if (rank == 0)
- {
- MPI_Request request;
- MPID_Request * req;
- MPI_Status status;
- MPID_Comm * comm;
- MPIDI_VC_t * vc;
-
- MPID_Comm_get_ptr(MPI_COMM_WORLD, comm);
- vc = comm->vcr[1];
-
- for (i = 0; i < ITERS; i++)
- {
- MPID_Seqnum_t seqnum = (i * 7) % ITERS;
- MPIDI_CH3_Pkt_eager_send_t pkt;
-
- pkt.type = MPIDI_CH3_PKT_EAGER_SEND;
- pkt.match.rank = 0;
- pkt.match.tag = i;
- pkt.match.context_id = comm->context_id + MPID_CONTEXT_INTRA_PT2PT;
- pkt.sender_req_id = MPI_REQUEST_NULL;
-# if defined(SEND_DATA)
- {
- pkt.data_sz = seqnum * sizeof(int);
- }
-# else
- {
- pkt.data_sz = 0;
- }
-# endif
- /* MPIDI_CH3U_VC_FAI_send_seqnum(vc, seqnum); */
- MPIDI_CH3U_Pkt_set_seqnum(&pkt, seqnum);
-
- printf("Sending msg %lu, tag=%d\n", seqnum, pkt.match.tag);
- fflush(stdout);
-
- if (pkt.data_sz > 0)
- {
- int j;
- MPID_IOV iov[2];
-
- for(j = 0; j < seqnum; j++)
- {
- buf[j] = seqnum;
- }
-
- iov[0].MPID_IOV_BUF = &pkt;
- iov[0].MPID_IOV_LEN = sizeof(pkt);
- iov[1].MPID_IOV_BUF = buf;
- iov[1].MPID_IOV_LEN = pkt.data_sz;
- req = MPIDI_CH3_iStartMsgv(vc, iov, 2);
- }
- else
- {
- req = MPIDI_CH3_iStartMsg(vc, &pkt, sizeof(pkt));
- }
-
- if (req != NULL)
- {
- MPIDI_CH3U_Request_set_seqnum(req, seqnum);
- MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_SEND);
- req->comm = comm;
- request = req->handle;
- }
- else
- {
- request = MPI_REQUEST_NULL;
- }
-
- rc = MPI_Wait(&request, &status);
- assert(rc == MPI_SUCCESS);
- }
-
- vc->seqnum_send += ITERS;
- }
- else if (rank == 1)
- {
- int errs = 0;
-
- for (i = 0; i < ITERS; i++)
- {
- MPI_Status status;
- int j;
-
- rc = MPI_Recv(buf, ITERS, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
- assert(rc == MPI_SUCCESS);
-
- rc = MPI_Get_count(&status, MPI_INT, &j);
- assert(rc == MPI_SUCCESS);
-
- printf("Recevied msg rank=%d tag=%d count=%d error=%d\n", status.MPI_SOURCE, status.MPI_TAG, j, status.MPI_ERROR);
- fflush(stdout);
-
-# if defined(SEND_DATA)
- {
- if (j != i)
- {
- errs++;
- printf("ERROR: expected count=%d, got count=%d\n", i, j);
- fflush(stdout);
- continue;
- }
-
- for (j = 0; j < i; j++)
- {
- if (buf[j] != i)
- {
- errs++;
- printf("ERROR: expected buf[%d]=%d, got buf[%d]=%d\n", j, i, j, buf[j]);
- fflush(stdout);
- }
- }
- }
-# endif
- }
-
- if (errs == 0)
- {
- printf("No Errors\n");
- }
- else
- {
- printf("%d Errors\n", errs);
- }
- fflush(stdout);
- }
-
- rc = MPI_Finalize();
- assert(rc == MPI_SUCCESS);
- }
-# else
- {
- fprintf(stderr, "Test disabled.\n");
- fflush(stderr);
- }
-# endif
- return 0;
-}
-
diff --git a/test/mpid/dltest.c b/test/mpid/dltest.c
deleted file mode 100644
index 44cd768..0000000
--- a/test/mpid/dltest.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-/* This provides a simple test of dynamically loading a library. If
- NO_UPCALL is defined, this library only provides routines; otherwise,
- it will make use of the routine upcall provided in the program that is
- loading this library */
-#ifndef NO_UPCALL
-extern int upcall( int );
-#endif
-
-int counter = 1;
-
-int init(void) {
- int a = counter;
- counter++;
-#ifndef NO_UPCALL
- a = upcall( a );
- if (a != counter) {
- counter --;
- }
-#endif
-}
-int finalize( int offset )
-{
- int rc = 1;
- if (counter != offset) {
- rc = 0;
- }
- return rc;
-}
diff --git a/test/mpid/dluse.c b/test/mpid/dluse.c
deleted file mode 100644
index b59a257..0000000
--- a/test/mpid/dluse.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-/*
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <dlfcn.h>
-
-int main( int argc, char *argv[] )
-{
- void *handle;
- int (*init)(void);
- int (*finalize)(int);
- int *counter;
- int errs = 0, rc;
-
- /* We allow different extensions for the shared libraries here,
- as OSX uses .dylib and Cygwin may use .dll . */
- handle = dlopen( "./libconftest."## #SHLIBEXT, RTLD_LAZY );
- if (!handle) {
- fprintf( stderr, "Could not open test library: %s\n", dlerror() );
- exit(1);
- }
-
- init = (int (*)(void))dlsym( handle, "init" );
- counter = (int *)dlsym( handle, "counter" );
- finalize = (int (*)(int))dlsym( handle, "finalize" );
- if (!init || !counter || !finalize) {
- errs++;
- fprintf( stderr, "Could not load a function or variable\n" );
- exit(1);
- }
-
- if (*counter != 1) {
- errs++;
- fprintf( stderr, "counter value is %d, expected 1\n" );
- }
- (*init)();
- if (*counter != 2) {
- errs++;
- fprintf( stderr, "counter value is %d, expected 2\n" );
- }
- rc = (*finalize)(2);
- if (rc != 1) {
- errs++;
- fprintf( stderr, "finalize returned failure\n" );
- }
- dlclose( handle );
-
- printf( "Found %d errors\n", errs );
- return 0;
-}
-
-int upcall( int a )
-{
- return a + 1;
-}
http://git.mpich.org/mpich.git/commitdiff/c0141dcb5a6fcc956d31dfc7ccdc01fe9e846940
commit c0141dcb5a6fcc956d31dfc7ccdc01fe9e846940
Author: Pavan Balaji <balaji at anl.gov>
Date: Wed Jul 1 16:14:34 2015 -0500
Remove unnecessary asm checks for nemesis.
We were testing to see if we were on an x86 32-bit platform that
accepts some assembly code to optimize 32-bit architectures. This is
only for 32-bit systems and seems to be not comprehensive enough to
avoid failures with some compilers (e.g., icc-13.1). Since such
performance optimization on 32-bit platforms is not really a priority
for us, it's cleaner to just remove this.
Signed-off-by: Halim Amer <aamer at anl.gov>
diff --git a/configure.ac b/configure.ac
index 44e7c97..47caa1b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4540,102 +4540,7 @@ int main(int argc, char **argv )
AC_DEFINE(HAVE_ANY_INT32_T_ALIGNMENT,1,[Define if int32_t works with any alignment])
fi
fi
-# Check for special compile characteristics
-# If we are either gcc or icc, see if we can use __asm__
-# We test on prog_gcc to allow gcc by any name; if we are using
-# icc, the value of CC must be icc for this test to pass
-if test "$ac_cv_prog_gcc" = "yes" -o "$ac_cv_prog_CC" = "icc" ; then
-AC_MSG_CHECKING([for gcc __asm__ and pentium cmpxchgl instruction])
-AC_TRY_RUN([
-int main(int argc, char *argv[])
-{
-long int compval = 10;
-volatile long int *p = &compval;
-long int oldval = 10;
-long int newval = 20;
-char ret;
-long int readval;
-__asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0"
- : "=q" (ret), "=m" (*p), "=a" (readval)
- : "r" (newval), "m" (*p), "a" (oldval) : "memory");
-return (compval == 20) ? 0 : -1;
-}
-], AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_GCC_AND_PENTIUM_ASM, 1,[Define if using gcc on a system with an Intel Pentium class chip])
- lac_cv_use_atomic_updates="yes",
-AC_MSG_RESULT(no), AC_MSG_RESULT(not checking when cross compiling))
-fi
-if test "$lac_cv_use_atomic_updates" = "yes" ; then
- AC_DEFINE(USE_ATOMIC_UPDATES,, [Define if assembly language atomic update macros should be used (if available)])
-fi
-# check for x86_64
-if test "$ac_cv_prog_gcc" = "yes" ; then
-AC_MSG_CHECKING([for gcc __asm__ and AMD x86_64 cmpxchgq instruction])
-AC_TRY_RUN([
-int main(int argc, char *argv[])
-{
-long int compval = 10;
-volatile long int *p = &compval;
-long int oldval = 10;
-long int newval = 20;
-char ret;
-long int readval;
-__asm__ __volatile__ ("lock; cmpxchgq %3, %1; sete %0"
- : "=q" (ret), "=m" (*p), "=a" (readval)
- : "r" (newval), "m" (*p), "a" (oldval) : "memory");
-return (compval == 20) ? 0 : -1;
-}
-], AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_GCC_AND_X86_64_ASM, 1,[Define if using gcc on a system with an AMD x86_64 class chip]),
-AC_MSG_RESULT(no), AC_MSG_RESULT(not checking when cross compiling))
-fi
-dnl
-dnl check for asm() format
-dnl
-dnl AC_MSG_CHECKING([for asm() and pentium cmpxchgl instruction])
-dnl AC_TRY_RUN([
-dnl int main(int argc, char *argv[])
-dnl {
-dnl long int compval = 10;
-dnl volatile long int *p = &compval;
-dnl long int oldval = 10;
-dnl long int newval = 20;
-dnl char ret;
-dnl long int readval;
-dnl asm("lock; cmpxchgl %3, %1; sete %0"
-dnl : "=q" (ret), "=m" (*p), "=a" (readval)
-dnl : "r" (newval), "m" (*p), "a" (oldval) : "memory");
-dnl return (compval == 20) ? 0 : -1;
-dnl }
-dnl ], AC_MSG_RESULT(yes)
-dnl AC_DEFINE(HAVE_PENTIUM_ASM, 1,[Define if using asm() on a system with an Intel Pentium class chip]),
-dnl AC_MSG_RESULT(no), AC_MSG_RESULT(not checking when cross compiling))
-dnl
-dnl
-dnl check for IA64
-dnl
-if test "$ac_cv_prog_gcc" = "yes" ; then
-AC_MSG_CHECKING([for gcc __asm__ and IA64 xchg4 instruction])
-AC_TRY_RUN([
-unsigned long _InterlockedExchange(volatile void *ptr, unsigned long x)
-{
- unsigned long result;
- __asm__ __volatile ("xchg4 %0=[%1],%2" : "=r" (result)
- : "r" (ptr), "r" (x) : "memory");
- return result;
-}
-int main(int argc, char *argv[])
-{
-long val = 1;
-volatile long *p = &val;
-long oldval = _InterlockedExchange(p, (unsigned long)2);
-return (oldval == 1 && val == 2) ? 0 : -1;
-}
-], AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_GCC_AND_IA64_ASM, 1,[Define if using gcc on a system with an IA64 class chip]),
-AC_MSG_RESULT(no), AC_MSG_RESULT(not checking when cross compiling))
-fi
# -----------------------------------------------------------------------------
# Check for support of enable-coverage. Put this near the end of the tests
# because the coverage options may affect the other tests.
diff --git a/src/mpid/ch3/channels/nemesis/Makefile.mk b/src/mpid/ch3/channels/nemesis/Makefile.mk
index c27ba57..8078ba5 100644
--- a/src/mpid/ch3/channels/nemesis/Makefile.mk
+++ b/src/mpid/ch3/channels/nemesis/Makefile.mk
@@ -23,7 +23,6 @@ noinst_HEADERS += \
src/mpid/ch3/channels/nemesis/include/mpid_nem_generic_queue.h \
src/mpid/ch3/channels/nemesis/include/mpid_nem_impl.h \
src/mpid/ch3/channels/nemesis/include/mpid_nem_inline.h \
- src/mpid/ch3/channels/nemesis/include/mpid_nem_memdefs.h \
src/mpid/ch3/channels/nemesis/include/mpid_nem_nets.h \
src/mpid/ch3/channels/nemesis/include/mpid_nem_post.h \
src/mpid/ch3/channels/nemesis/include/mpid_nem_pre.h \
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_memdefs.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_memdefs.h
deleted file mode 100644
index 9e8ee39..0000000
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_memdefs.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
-/*
- * (C) 2006 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#ifndef MPID_MEMDEFS_H
-#define MPID_MEMDEFS_H
-#include <mpichconf.h>
-#include <mpimem.h>
-
-#if defined(HAVE_GCC_AND_PENTIUM_ASM)
-#define asm_memcpy(dst, src, n) do { \
- const char *_p = (char *)(src); \
- char *_q = (char *)(dst); \
- size_t _nl = (size_t)(n) >> 2; \
- __asm__ __volatile__ ("cld ; rep ; movsl ; movl %3,%0 ; rep ; movsb" \
- : "+c" (_nl), "+S" (_p), "+D" (_q) \
- : "r" ((n) & 3) : "memory" ); \
- } while (0)
-
-/*
- nt_memcpy (dst, src, len)
- This performs a memcopy using non-temporal stores. It's optimized
- for ia_32 machines.
-
- The general idea is to prefetch a block of the source data into the
- cache, then read the data from the source buffer into 64-bit mmx
- registers so that the data can be written to the destination buffer
- using non-temporal move instructions.
-
- This is done in three steps: copy 8K or larger chunks, copy (8K,
- 128B] chunks, and copy the rest.
-
- In the first step, the main loop prefetches an 8K chunk, by reading
- one element from each cacheline. Then we copy that 8K chunk, 64
- bytes at a time (8bytes per mmx reg * 8 mmx regs) using
- non-temporal stores. Rinse and repeat.
-
- The second step is essentially the same as the first, except that
- the amount of data to be copied in that step is less than 8K, so we
- prefetch all of the data. These two steps could have been combined
- but I think I saved some time by simplifying the main loop in step
- one by not checking if we have to prefetch less than 8K.
-
- The last step just copies whatever's left.
-
- */
-
-static inline void nt_memcpy (volatile void *dst, volatile const void *src, size_t len)
-{
- void *dummy_dst;
- void *dummy_src;
-
- int n;
-
- /* copy in 8K chunks */
- n = len & (-8*1024);
- if (n)
- {
-
- __asm__ __volatile__ ("mov %4, %%ecx\n"
- ".set PREFETCHBLOCK, 1024\n" /* prefetch PREFETCHBLOCK number of 8-byte words */
- "lea (%%esi, %%ecx, 8), %%esi\n"
- "lea (%%edi, %%ecx, 8), %%edi\n"
-
- "neg %%ecx\n"
- "emms\n"
-
- "1:\n" /* main loop */
-
- /* eax is the prefetch loop iteration counter */
- "mov $PREFETCHBLOCK/16, %%eax\n" /* only need to touch one element per cacheline, and we're doing two at once */
-
- /* prefetch 2 cachelines at a time (128 bytes) */
- "2:\n" /* prefetch loop */
- "mov (%%esi, %%ecx, 8), %%edx\n"
- "mov 64(%%esi, %%ecx, 8), %%edx\n"
- "add $16, %%ecx\n"
-
- "dec %%eax\n"
- "jnz 2b\n"
- "sub $PREFETCHBLOCK, %%ecx\n"
-
- /* eax is the copy loop iteration counter */
- "mov $PREFETCHBLOCK/8, %%eax\n"
-
- /* copy data 64 bytes at a time */
- "3:\n" /* copy loop */
- "movq (%%esi, %%ecx, 8), %%mm0\n"
- "movq 8(%%esi, %%ecx, 8), %%mm1\n"
- "movq 16(%%esi, %%ecx, 8), %%mm2\n"
- "movq 24(%%esi, %%ecx, 8), %%mm3\n"
- "movq 32(%%esi, %%ecx, 8), %%mm4\n"
- "movq 40(%%esi, %%ecx, 8), %%mm5\n"
- "movq 48(%%esi, %%ecx, 8), %%mm6\n"
- "movq 56(%%esi, %%ecx, 8), %%mm7\n"
-
- "movntq %%mm0, (%%edi, %%ecx, 8)\n"
- "movntq %%mm1, 8(%%edi, %%ecx, 8)\n"
- "movntq %%mm2, 16(%%edi, %%ecx, 8)\n"
- "movntq %%mm3, 24(%%edi, %%ecx, 8)\n"
- "movntq %%mm4, 32(%%edi, %%ecx, 8)\n"
- "movntq %%mm5, 40(%%edi, %%ecx, 8)\n"
- "movntq %%mm6, 48(%%edi, %%ecx, 8)\n"
- "movntq %%mm7, 56(%%edi, %%ecx, 8)\n"
-
- "add $8, %%ecx\n"
- "dec %%eax\n"
- "jnz 3b\n"
-
- "or %%ecx, %%ecx\n"
- "jnz 1b\n"
-
- "sfence\n"
- "emms\n"
- : "=D" (dummy_dst), "=S" (dummy_src)
- : "0" (dst), "1" (src), "g" (n >> 3)
- : "eax", "edx", "ecx", "memory" );
-
- src = (char *)src + n;
- dst = (char *)dst + n;
- }
-
- /* copy in 128byte chunks */
- n = len & (8*1024 - 1) & -128;
- if (n)
- {
-
- __asm__ __volatile__ ("mov %4, %%ecx\n"
- "lea (%%esi, %%ecx, 8), %%esi\n"
- "lea (%%edi, %%ecx, 8), %%edi\n"
-
- "push %%ecx\n" /* save n */
-
- "mov %%ecx, %%eax\n" /* prefetch loopctr = n/128 */
- "shr $4, %%eax\n"
-
- "neg %%ecx\n"
- "emms\n"
-
- /* prefetch all data to be copied 2 cachelines at a time (128 bytes)*/
- "1:\n" /* prefetch loop */
- "mov (%%esi, %%ecx, 8), %%edx\n"
- "mov 64(%%esi, %%ecx, 8), %%edx\n"
- "add $16, %%ecx\n"
-
- "dec %%eax\n"
- "jnz 1b\n"
-
- "pop %%ecx\n" /* restore n */
-
- "mov %%ecx, %%eax\n" /* write loopctr = n/64 */
- "shr $3, %%eax\n"
- "neg %%ecx\n"
-
- /* copy data 64 bytes at a time */
- "2:\n" /* copy loop */
- "movq (%%esi, %%ecx, 8), %%mm0\n"
- "movq 8(%%esi, %%ecx, 8), %%mm1\n"
- "movq 16(%%esi, %%ecx, 8), %%mm2\n"
- "movq 24(%%esi, %%ecx, 8), %%mm3\n"
- "movq 32(%%esi, %%ecx, 8), %%mm4\n"
- "movq 40(%%esi, %%ecx, 8), %%mm5\n"
- "movq 48(%%esi, %%ecx, 8), %%mm6\n"
- "movq 56(%%esi, %%ecx, 8), %%mm7\n"
-
- "movntq %%mm0, (%%edi, %%ecx, 8)\n"
- "movntq %%mm1, 8(%%edi, %%ecx, 8)\n"
- "movntq %%mm2, 16(%%edi, %%ecx, 8)\n"
- "movntq %%mm3, 24(%%edi, %%ecx, 8)\n"
- "movntq %%mm4, 32(%%edi, %%ecx, 8)\n"
- "movntq %%mm5, 40(%%edi, %%ecx, 8)\n"
- "movntq %%mm6, 48(%%edi, %%ecx, 8)\n"
- "movntq %%mm7, 56(%%edi, %%ecx, 8)\n"
-
- "add $8, %%ecx\n"
- "dec %%eax\n"
- "jnz 2b\n"
-
- "sfence\n"
- "emms\n"
- : "=D" (dummy_dst), "=S" (dummy_src)
- : "0" (dst), "1" (src), "g" (n >> 3)
- : "eax", "edx", "ecx", "memory" );
- src = (char *)src + n;
- dst = (char *)dst + n;
- }
-
- /* copy leftover */
- n = len & (128 - 1);
- if (n)
- asm_memcpy (dst, src, n);
-}
-
-#define MPID_NEM_MEMCPY_CROSSOVER (63*1024)
-
-#define MPIU_Memcpy(a,b,c) do { \
- MPIU_MEM_CHECK_MEMCPY((a),(b),(c)); \
- if (((c)) >= MPID_NEM_MEMCPY_CROSSOVER) \
- nt_memcpy (a, b, c); \
- else \
- asm_memcpy (a, b, c); \
- } while (0)
-
-#elif 0 && defined(HAVE_GCC_AND_X86_64_ASM)
-
-#define asm_memcpy(dst, src, n) do { \
- const char *_p = (char *)(src); \
- char *_q = (char *)(dst); \
- size_t _nq = n >> 3; \
- __asm__ __volatile__ ("cld ; rep ; movsq ; movl %3,%%ecx ; rep ; movsb" \
- : "+c" (_nq), "+S" (_p), "+D" (_q) \
- : "r" ((uint32_t)((n) & 7)) : "memory" ); \
- } while (0)
-
-static inline void amd64_cpy_nt (volatile void *dst, const volatile void *src, size_t n)
-{
- size_t n32 = (n) >> 5;
- size_t nleft = (n) & (32-1);
-
- if (n32)
- {
- __asm__ __volatile__ (".align 16 \n"
- "1: \n"
- "mov (%1), %%r8 \n"
- "mov 8(%1), %%r9 \n"
- "add $32, %1 \n"
- "movnti %%r8, (%2) \n"
- "movnti %%r9, 8(%2) \n"
- "add $32, %2 \n"
- "mov -16(%1), %%r8 \n"
- "mov -8(%1), %%r9 \n"
- "dec %0 \n"
- "movnti %%r8, -16(%2) \n"
- "movnti %%r9, -8(%2) \n"
- "jnz 1b \n"
- "sfence \n"
- "mfence \n"
- : "+a" (n32), "+S" (src), "+D" (dst)
- : : "r8", "r9", "memory" );
- }
-
- if (nleft)
- {
- memcpy ((void *)dst, (void *)src, nleft);
- }
-}
-
-static inline
-void volatile_memcpy (volatile void *restrict dst, volatile const void *restrict src, size_t n)
-{
- MPIUI_Memcpy ((void *)dst, (const void *)src, n);
-}
-
-#define MPID_NEM_MEMCPY_CROSSOVER (32*1024)
-#define MPIU_Memcpy(a,b,c) do { \
- MPIU_MEM_CHECK_MEMCPY((a),(b),(c)); \
- if ((c) >= MPID_NEM_MEMCPY_CROSSOVER) \
- amd64_cpy_nt(a, b, c); \
- else \
- volatile_memcpy(a, b, c); \
- } while (0)
-/* #define MPID_NEM_MEMCPY(a,b,c) (((c) < MPID_NEM_MEMCPY_CROSSOVER) ? memcpy(a, b, c) : amd64_cpy_nt(a, b, c)) */
-/* #define MPID_NEM_MEMCPY(a,b,c) amd64_cpy_nt(a, b, c) */
-/* #define MPID_NEM_MEMCPY(a,b,c) memcpy (a, b, c) */
-
-#else
-/* #define MPIU_Memcpy(dst, src, n) do { volatile void * restrict d = (dst); volatile const void *restrict s = (src); MPIUI_Memcpy((void *)d, (const void *)s, n); }while (0) */
-#define MPIU_Memcpy(dst, src, n) \
- do { \
- MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "memcpy(%p, %p, %lu)", dst, src, (size_t)n)); \
- MPIU_MEM_CHECK_MEMCPY((dst),(src),(n)); \
- MPIUI_Memcpy(dst, src, n); \
- } while (0)
-#endif
-
-#endif /* MPID_MEMDEFS_H */
diff --git a/src/mpid/ch3/channels/nemesis/include/mpid_nem_pre.h b/src/mpid/ch3/channels/nemesis/include/mpid_nem_pre.h
index ca0a4b2..1aaa4ce 100644
--- a/src/mpid/ch3/channels/nemesis/include/mpid_nem_pre.h
+++ b/src/mpid/ch3/channels/nemesis/include/mpid_nem_pre.h
@@ -9,7 +9,6 @@
#include "mpid_nem_net_module_defs.h"
#include "mpid_nem_defs.h"
-#include "mpid_nem_memdefs.h"
#if defined(HAVE_PTHREAD_H)
#include <pthread.h>
-----------------------------------------------------------------------
Summary of changes:
configure.ac | 95 -------
src/mpid/ch3/channels/nemesis/Makefile.mk | 1 -
.../channels/nemesis/include/mpid_nem_memdefs.h | 278 --------------------
.../ch3/channels/nemesis/include/mpid_nem_pre.h | 1 -
test/Makefile.am | 2 +-
test/mpid/atomic.c | 115 --------
test/mpid/atomic_fai.c | 55 ----
test/mpid/ch3/reorder.c | 176 -------------
test/mpid/dltest.c | 34 ---
test/mpid/dluse.c | 60 -----
10 files changed, 1 insertions(+), 816 deletions(-)
delete mode 100644 src/mpid/ch3/channels/nemesis/include/mpid_nem_memdefs.h
delete mode 100644 test/mpid/atomic.c
delete mode 100644 test/mpid/atomic_fai.c
delete mode 100644 test/mpid/ch3/reorder.c
delete mode 100644 test/mpid/dltest.c
delete mode 100644 test/mpid/dluse.c
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list