[mpich-commits] [software/dev/stencil] Stencil code repository branch, master, created. 5c2816986cd0be1c0bb66652e08e213da1307700
Service Account
noreply at mpich.org
Wed Sep 10 13:25:16 CDT 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Stencil code repository".
The branch, master has been created
at 5c2816986cd0be1c0bb66652e08e213da1307700 (commit)
- Log -----------------------------------------------------------------
http://git.mpich.org/software/dev/stencil.git/commitdiff/5c2816986cd0be1c0bb66652e08e213da1307700
commit 5c2816986cd0be1c0bb66652e08e213da1307700
Author: Pavan Balaji <balaji at anl.gov>
Date: Sat Sep 6 21:11:06 2014 -0500
Added header file.
diff --git a/stencil_par.h b/stencil_par.h
new file mode 100644
index 0000000..2dba3d2
--- /dev/null
+++ b/stencil_par.h
@@ -0,0 +1,24 @@
+/*
+ * stencil_par.h
+ *
+ * Created on: Jan 4, 2012
+ * Author: htor
+ */
+
+#ifndef STENCIL_PAR_H_
+#define STENCIL_PAR_H_
+
+#include "mpi.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+// row-major order
+#define ind(i,j) (j)*(bx+2)+(i)
+
+void printarr_par(int iter, double* array, int size, int px, int py, int rx, int ry, int bx, int by, int offx, int offy, MPI_Comm comm);
+
+
+#endif /* STENCIL_PAR_H_ */
http://git.mpich.org/software/dev/stencil.git/commitdiff/ef82f607fb83051c58d894412293adcae65cb8f3
commit ef82f607fb83051c58d894412293adcae65cb8f3
Author: Pavan Balaji <balaji at anl.gov>
Date: Sat Sep 6 19:14:05 2014 -0500
White space cleanup.
diff --git a/stencil.cpp b/stencil.cpp
index b4d7f2e..b417e06 100644
--- a/stencil.cpp
+++ b/stencil.cpp
@@ -6,527 +6,515 @@
*/
#include "stencil_par.h"
-int main(int argc, char **argv) {
+int main(int argc, char **argv)
+{
- int provide;
- MPI_Init(&argc,&argv);
- int r,p;
- MPI_Comm comm = MPI_COMM_WORLD;
- MPI_Comm_rank(comm, &r);
- MPI_Comm_size(comm, &p);
- int n, energy, niters,nx,ny, lpx,lpy,px, py;
+ int provide;
+ MPI_Init(&argc, &argv);
+ int r, p;
+ MPI_Comm comm = MPI_COMM_WORLD;
+ MPI_Comm_rank(comm, &r);
+ MPI_Comm_size(comm, &p);
+ int n, energy, niters, nx, ny, lpx, lpy, px, py;
- MPI_Comm shmcomm;
- MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
- int sr, sp; // rank and size in shmem comm
+ MPI_Comm shmcomm;
+ MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
+ int sr, sp; // rank and size in shmem comm
- MPI_Comm_size(shmcomm, &sp);
- MPI_Comm_rank(shmcomm, &sr);
- printf("shmcomm size = %d\n",sp);
- int CORENUM = 12;
- if (r==0) {
- // argument checking
- if(argc < 8) {
- if(!r) printf("usage: stencil_mpi <n> <energy> <niters> <nx> <ny> <lpx> <lpy>\n");
- MPI_Finalize();
- exit(1);
- }
+ MPI_Comm_size(shmcomm, &sp);
+ MPI_Comm_rank(shmcomm, &sr);
+ printf("shmcomm size = %d\n", sp);
+ int CORENUM = 12;
+ if (r == 0) {
+ // argument checking
+ if (argc < 8) {
+ if (!r)
+ printf("usage: stencil_mpi <n> <energy> <niters> <nx> <ny> <lpx> <lpy>\n");
+ MPI_Finalize();
+ exit(1);
+ }
+
+ n = atoi(argv[1]); // nxn grid
+ energy = atoi(argv[2]); // energy to be injected per iteration
+ niters = atoi(argv[3]); // number of iterations
- n = atoi(argv[1]); // nxn grid
- energy = atoi(argv[2]); // energy to be injected per iteration
- niters = atoi(argv[3]); // number of iterations
+ nx = atoi(argv[4]); // 1st dim processes
+ ny = atoi(argv[5]); // 2nd dim processes
+ lpx = atoi(argv[6]); // 1st dim processes
+ lpy = atoi(argv[7]); // 2nd dim processes
+ px = nx * lpx;
+ py = ny * lpy;
+ if (px * py != p)
+ MPI_Abort(comm, 1); // abort if px or py are wrong
+ if (n % py != 0)
+ MPI_Abort(comm, 2); // abort px needs to divide n
+ if (n % px != 0)
+ MPI_Abort(comm, 3); // abort py needs to divide n
+ if (nx * ny != p / CORENUM)
+ MPI_Abort(comm, 66);
+ if (lpx * lpy != CORENUM)
+ MPI_Abort(comm, 77);
+ // distribute arguments
+ int args[7] = { n, energy, niters, nx, ny, lpx, lpy };
+ MPI_Bcast(args, 7, MPI_INT, 0, comm);
+ }
+ else {
+ int args[7];
+ MPI_Bcast(args, 7, MPI_INT, 0, comm);
+ n = args[0];
+ energy = args[1];
+ niters = args[2];
- nx=atoi(argv[4]); // 1st dim processes
- ny=atoi(argv[5]); // 2nd dim processes
- lpx=atoi(argv[6]); // 1st dim processes
- lpy=atoi(argv[7]); // 2nd dim processes
- px = nx*lpx;
- py = ny*lpy;
- if(px * py != p) MPI_Abort(comm, 1);// abort if px or py are wrong
- if(n % py != 0) MPI_Abort(comm, 2); // abort px needs to divide n
- if(n % px != 0) MPI_Abort(comm, 3); // abort py needs to divide n
- if(nx*ny != p/CORENUM) MPI_Abort(comm,66);
- if(lpx*lpy != CORENUM) MPI_Abort(comm,77);
- // distribute arguments
- int args[7] = {n, energy, niters, nx,ny,lpx,lpy};
- MPI_Bcast(args, 7, MPI_INT, 0, comm);
- }
- else {
- int args[7];
- MPI_Bcast(args, 7, MPI_INT, 0, comm);
- n=args[0]; energy=args[1]; niters=args[2];
-
- nx=args[3]; ny=args[4];
- lpx = args[5];
- lpy = args[6];
-
- px = nx*lpx;
- py = ny*lpy;
- }
+ nx = args[3];
+ ny = args[4];
+ lpx = args[5];
+ lpy = args[6];
+
+ px = nx * lpx;
+ py = ny * lpy;
+ }
- int nodenum = nx*ny;
- int nodeidx, nodeidy;
- int nodeid = r/CORENUM;
- int CH = 2;
- if(CH == 1)
- {
- nodeidx = nodeid % nx;
- nodeidy = nodeid / nx;
- }
- else if(CH == 2)
- {
- nodeidx = nodeid / ny;
- nodeidy = nodeid % ny;
- }
- else if(CH == 3)
- {
- nodeidx = nodeid / ny;
- nodeidy = nodeid % ny;
- if(nodeidx % 2 != 0)
- {
- nodeidy = ny-1 - nodeidy;
- }
- }
+ int nodenum = nx * ny;
+ int nodeidx, nodeidy;
+ int nodeid = r / CORENUM;
+ int CH = 2;
+ if (CH == 1) {
+ nodeidx = nodeid % nx;
+ nodeidy = nodeid / nx;
+ }
+ else if (CH == 2) {
+ nodeidx = nodeid / ny;
+ nodeidy = nodeid % ny;
+ }
+ else if (CH == 3) {
+ nodeidx = nodeid / ny;
+ nodeidy = nodeid % ny;
+ if (nodeidx % 2 != 0) {
+ nodeidy = ny - 1 - nodeidy;
+ }
+ }
//nodeidx idy is the node coordinates(x,y)
//
- int localprocessindex = r%CORENUM;
- int local_x, local_y;
- local_x = localprocessindex%lpx;
- local_y = localprocessindex/lpx;
- // determine my coordinates (x,y) -- r=x*a+y in the 2d processor array
+ int localprocessindex = r % CORENUM;
+ int local_x, local_y;
+ local_x = localprocessindex % lpx;
+ local_y = localprocessindex / lpx;
+ // determine my coordinates (x,y) -- r=x*a+y in the 2d processor array
// int rx = r % px;
// int ry = r / px;
int rx;
int ry;
- rx = lpx*nodeidx+local_x;
- ry = lpy*nodeidy+local_y;
- int noden, nodes, nodew, nodee;
- if(nodeidy < ny-1)
- {
- if(CH ==1 )
- nodes = nodeid+ nx;
- if(CH == 2)
- nodes = nodeid+1;
- if(CH == 3)
- {
- if(nodeidx %2 ==0)
- nodes=nodeid+1;
- else
- nodes = nodeid-1;
- }
- }
- else
- {
- nodes = -1;
- }
- if(nodeidy == 0)
- {
- noden = -1;
- }
- else
- {
- if(CH==1)
- noden = nodeid - nx;
- if(CH == 2)
- noden = nodeid-1;
- if(CH == 3)
- {
- if(nodeidx %2 == 0)
- noden = nodeid-1;
- else
- noden = nodeid+1;
- }
-
- }
- if(nodeidx == 0)
- {
- nodew = -1;
- }
- else
- {
- if(CH ==1)
- nodew = nodeid -1;
- if(CH == 2)
- nodew = nodeid -ny;
- if(CH == 3)
- {
-
- if(nodeidx%2 == 0)
- nodew = nodeid - nodeidy -1-nodeidy;
- else
- {
- nodew = nodeid +nodeidy+1-2*ny+nodeidy;
- }
- }
- }
- if(nodeidx == nx -1)
- {
- nodee = -1;
- }
- else
- {
- if(CH==1)
- {
- nodee = nodeid+1;
- }
- if(CH==2)
- {
- nodee = nodeid + ny;
- }
- if(CH==3)
- {
- if(nodeidx%2 == 0)
- {
- nodee = nodeid - nodeidy + ny + ny-1-nodeidy;
- }
- else
- {
- nodee = nodeid +nodeidy +1 + nodeidy;
- }
- }
- }
- int north1, south1, west1, east1;
- if(local_y != 0)
- {
- //south
- north1 = r - lpx;
- }
- else
- {
- if(noden == -1)
- north1 = -1;
- else
- {
- north1 = r + (noden - nodeid)*CORENUM - r%CORENUM +(lpy-1) * lpx + local_x;
- //north1 = r + (noden - nodeid)*CORENUM - r%CORENUM + local_x;
- }
- }
- if(local_y < lpy-1)
- {
- //north
- south1 = r + lpx;
- }
- else
- {
- if(nodes == -1)
- south1 = -1;
- else
- {
- south1 = r + (nodes - nodeid)*CORENUM - r%CORENUM + local_x;
- //south1 = r + (noden - nodeid)*CORENUM - r%CORENUM +(lpy-1) * lpx + local_x;
- }
- }
- if(local_x != 0)
- {
- //west
- west1 = r-1;
- }
- else
- {
- if(nodew == -1)
- west1 = -1;
- else
- {
- west1 = r + (nodew - nodeid)*CORENUM - r%CORENUM + local_y*lpx + lpx-1;
- }
- }
- if(local_x == lpx-1)
- {
- //east
- if(nodee == -1)
- {
- east1 = -1;
- }
- else
- {
- east1 = r + (nodee - nodeid) * CORENUM - r%CORENUM + local_y*lpx;
- }
- }
- else
- {
-
- east1 = r+1;
- }
-
- // determine my four neighbors
- for(int i=0; i<p; i++)
- {
- if(r==i)
- printf("id=%d, sr=%d, nodeid=%d, north1=%d, south1=%d, west1=%d, east1=%d\n", r,sr,nodeid,north1,south1,west1,east1);
- MPI_Barrier(MPI_COMM_WORLD);
- }
- int north = north1; if(north1 < 0) north = MPI_PROC_NULL;
- int south = south1; if(south1 < 0) south = MPI_PROC_NULL;
- int west= west1; if(west1< 0) west = MPI_PROC_NULL;
- int east = east1; if(east1 < 0) east = MPI_PROC_NULL;
- //int temp = north; north = south; south = temp;
- int bx = n/px; // block size in x
- int by = n/py; // block size in y
- int offx = rx*bx; // offset in x
- int offy = ry*by; // offset in y
- double* volatile memO;
- double* volatile memN;
-
- //printf("rank %d N S W E is: %d %d %d %d\n",r,north, south, west, east);
- MPI_Win winO;
- MPI_Win winN;
- MPI_Win_allocate_shared((bx+2)*(by+2)*sizeof(double),1,MPI_INFO_NULL,shmcomm,(void*)&memO,&winO);
- MPI_Win_allocate_shared((bx+2)*(by+2)*sizeof(double),1,MPI_INFO_NULL,shmcomm,(void*)&memN,&winN);
- double* volatile westptrO;
- double* volatile eastptrO;
- double* volatile northptrO;
- double* volatile southptrO;
- double* volatile westptrN;
- double* volatile eastptrN;
- double* volatile northptrN;
- double* volatile southptrN;
- int needN = 0; int needS = 0; int needW = 0; int needE = 0;
- if(north != -1 && north / CORENUM == r/CORENUM)
- {
- needN = 1;
- }
- if(south != -1 && south / CORENUM == r/CORENUM)
- {
- needS = 1;
- }
- if(west != -1 && west / CORENUM == r/CORENUM)
- {
- needW = 1;
- }
- if(east != -1 && east / CORENUM == r/CORENUM)
- {
- needE = 1;
- }
- //temp = needN; needN =needS; needS = temp;
- MPI_Aint sz;
- int dsp_unit;
- if(needN)
- {
- MPI_Win_shared_query(winO,sr-lpx,&sz,&dsp_unit,(void*)&northptrO);
- MPI_Win_shared_query(winN,sr-lpx,&sz,&dsp_unit,(void*)&northptrN);
- }
- if(needS)
- {
- MPI_Win_shared_query(winO,sr+lpx,&sz,&dsp_unit,(void*)&southptrO);
- MPI_Win_shared_query(winN,sr+lpx,&sz,&dsp_unit,(void*)&southptrN);
- }
- if(needW)
- {
- MPI_Win_shared_query(winO,sr-1,&sz,&dsp_unit,(void*)&westptrO);
- MPI_Win_shared_query(winN,sr-1,&sz,&dsp_unit,(void*)&westptrN);
- }
- if(needE)
- {
- MPI_Win_shared_query(winO,sr+1,&sz,&dsp_unit,(void*)&eastptrO);
- MPI_Win_shared_query(winN,sr+1,&sz,&dsp_unit,(void*)&eastptrN);
- }
-
- // printf("rank %d N S W E is: %d %d %d %d\n",r,north, south, west, east);
- // decompose the domain
- // printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south);
- // allocate two work arrays
- double* volatile tmp;
- // initialize three heat sources
- const int nsources=3;
- int sources[nsources][2] = {{n/2,n/2}, {n/3,n/3}, {n*4/5,n*8/9}};
- int locnsources=0; // number of sources in my area
- int locsources[nsources][2]; // sources local to my rank
- for (int i=0; i<nsources; ++i) { // determine which sources are in my patch
- int locx = sources[i][0] - offx;
- int locy = sources[i][1] - offy;
- if(locx >= 0 && locx < bx && locy >= 0 && locy < by) {
- locsources[locnsources][0] = locx+1; // offset by halo zone
- locsources[locnsources][1] = locy+1; // offset by halo zone
- locnsources++;
- }
- }
+ rx = lpx * nodeidx + local_x;
+ ry = lpy * nodeidy + local_y;
+ int noden, nodes, nodew, nodee;
+ if (nodeidy < ny - 1) {
+ if (CH == 1)
+ nodes = nodeid + nx;
+ if (CH == 2)
+ nodes = nodeid + 1;
+ if (CH == 3) {
+ if (nodeidx % 2 == 0)
+ nodes = nodeid + 1;
+ else
+ nodes = nodeid - 1;
+ }
+ }
+ else {
+ nodes = -1;
+ }
+ if (nodeidy == 0) {
+ noden = -1;
+ }
+ else {
+ if (CH == 1)
+ noden = nodeid - nx;
+ if (CH == 2)
+ noden = nodeid - 1;
+ if (CH == 3) {
+ if (nodeidx % 2 == 0)
+ noden = nodeid - 1;
+ else
+ noden = nodeid + 1;
+ }
+
+ }
+ if (nodeidx == 0) {
+ nodew = -1;
+ }
+ else {
+ if (CH == 1)
+ nodew = nodeid - 1;
+ if (CH == 2)
+ nodew = nodeid - ny;
+ if (CH == 3) {
+
+ if (nodeidx % 2 == 0)
+ nodew = nodeid - nodeidy - 1 - nodeidy;
+ else {
+ nodew = nodeid + nodeidy + 1 - 2 * ny + nodeidy;
+ }
+ }
+ }
+ if (nodeidx == nx - 1) {
+ nodee = -1;
+ }
+ else {
+ if (CH == 1) {
+ nodee = nodeid + 1;
+ }
+ if (CH == 2) {
+ nodee = nodeid + ny;
+ }
+ if (CH == 3) {
+ if (nodeidx % 2 == 0) {
+ nodee = nodeid - nodeidy + ny + ny - 1 - nodeidy;
+ }
+ else {
+ nodee = nodeid + nodeidy + 1 + nodeidy;
+ }
+ }
+ }
+ int north1, south1, west1, east1;
+ if (local_y != 0) {
+ //south
+ north1 = r - lpx;
+ }
+ else {
+ if (noden == -1)
+ north1 = -1;
+ else {
+ north1 = r + (noden - nodeid) * CORENUM - r % CORENUM + (lpy - 1) * lpx + local_x;
+ //north1 = r + (noden - nodeid)*CORENUM - r%CORENUM + local_x;
+ }
+ }
+ if (local_y < lpy - 1) {
+ //north
+ south1 = r + lpx;
+ }
+ else {
+ if (nodes == -1)
+ south1 = -1;
+ else {
+ south1 = r + (nodes - nodeid) * CORENUM - r % CORENUM + local_x;
+ //south1 = r + (noden - nodeid)*CORENUM - r%CORENUM +(lpy-1) * lpx + local_x;
+ }
+ }
+ if (local_x != 0) {
+ //west
+ west1 = r - 1;
+ }
+ else {
+ if (nodew == -1)
+ west1 = -1;
+ else {
+ west1 = r + (nodew - nodeid) * CORENUM - r % CORENUM + local_y * lpx + lpx - 1;
+ }
+ }
+ if (local_x == lpx - 1) {
+ //east
+ if (nodee == -1) {
+ east1 = -1;
+ }
+ else {
+ east1 = r + (nodee - nodeid) * CORENUM - r % CORENUM + local_y * lpx;
+ }
+ }
+ else {
- double tp = 0.0;
- // allocate communication buffers
- double *sbufnorth = (double*)calloc(1,bx*sizeof(double)); // send buffers
- double *sbufsouth = (double*)calloc(1,bx*sizeof(double));
- double *sbufeast = (double*)calloc(1,by*sizeof(double));
- double *sbufwest = (double*)calloc(1,by*sizeof(double));
- double *rbufnorth = (double*)calloc(1,bx*sizeof(double)); // receive buffers
- double *rbufsouth = (double*)calloc(1,bx*sizeof(double));
- double *rbufeast = (double*)calloc(1,by*sizeof(double));
- double *rbufwest = (double*)calloc(1,by*sizeof(double));
+ east1 = r + 1;
+ }
- double thetime[6];
- thetime[0] = thetime[1] = thetime[2] = thetime[3] =thetime [4] = thetime[5] = 0.0;
- double t=-MPI_Wtime(); // take time
- double heat; // total heat in system
- //the following is the loop for niters steps.--zhuxm
- //double* tempnew = (double*)malloc(bx*sizeof(double));
- //int basesize = 2*(bx+by);
- //int halfindex = basesize+4;
- //MPI_Win_lock_all(0, winO);
- //MPI_Win_lock_all(0, winN);
+ // determine my four neighbors
+ for (int i = 0; i < p; i++) {
+ if (r == i)
+ printf("id=%d, sr=%d, nodeid=%d, north1=%d, south1=%d, west1=%d, east1=%d\n", r, sr,
+ nodeid, north1, south1, west1, east1);
+ MPI_Barrier(MPI_COMM_WORLD);
+ }
+ int north = north1;
+ if (north1 < 0)
+ north = MPI_PROC_NULL;
+ int south = south1;
+ if (south1 < 0)
+ south = MPI_PROC_NULL;
+ int west = west1;
+ if (west1 < 0)
+ west = MPI_PROC_NULL;
+ int east = east1;
+ if (east1 < 0)
+ east = MPI_PROC_NULL;
+ //int temp = north; north = south; south = temp;
+ int bx = n / px; // block size in x
+ int by = n / py; // block size in y
+ int offx = rx * bx; // offset in x
+ int offy = ry * by; // offset in y
+ double *volatile memO;
+ double *volatile memN;
- MPI_Barrier(shmcomm);
- for(int iter=0; iter<niters; ++iter) {
- // refresh heat sources
- //MPI_Barrier(comm);
- //if(!r) printf("Step %d\n",iter);
- double time1 = MPI_Wtime();
- for(int i=0; i<locnsources; ++i) {
- memO[ind(locsources[i][0],locsources[i][1])] += energy; // heat source
- }
- double time2 = MPI_Wtime();
- // exchange data with neighbors
- MPI_Request reqs[8];
- if(!needN)
- for(int i=0; i<bx; ++i) sbufnorth[i] = memO[ind(i+1,1)]; // pack loop - last valid region
- if(!needS)
- for(int i=0; i<bx; ++i) sbufsouth[i] = memO[ind(i+1,by)]; // pack loop
- if(!needE)
- for(int i=0; i<by; ++i) sbufeast[i] = memO[ind(bx,i+1)]; // pack loop
- if(!needW)
- for(int i=0; i<by; ++i) sbufwest[i] = memO[ind(1,i+1)]; // pack loop
+ //printf("rank %d N S W E is: %d %d %d %d\n",r,north, south, west, east);
+ MPI_Win winO;
+ MPI_Win winN;
+ MPI_Win_allocate_shared((bx + 2) * (by + 2) * sizeof(double), 1, MPI_INFO_NULL, shmcomm,
+ (void *) &memO, &winO);
+ MPI_Win_allocate_shared((bx + 2) * (by + 2) * sizeof(double), 1, MPI_INFO_NULL, shmcomm,
+ (void *) &memN, &winN);
+ double *volatile westptrO;
+ double *volatile eastptrO;
+ double *volatile northptrO;
+ double *volatile southptrO;
+ double *volatile westptrN;
+ double *volatile eastptrN;
+ double *volatile northptrN;
+ double *volatile southptrN;
+ int needN = 0;
+ int needS = 0;
+ int needW = 0;
+ int needE = 0;
+ if (north != -1 && north / CORENUM == r / CORENUM) {
+ needN = 1;
+ }
+ if (south != -1 && south / CORENUM == r / CORENUM) {
+ needS = 1;
+ }
+ if (west != -1 && west / CORENUM == r / CORENUM) {
+ needW = 1;
+ }
+ if (east != -1 && east / CORENUM == r / CORENUM) {
+ needE = 1;
+ }
+ //temp = needN; needN =needS; needS = temp;
+ MPI_Aint sz;
+ int dsp_unit;
+ if (needN) {
+ MPI_Win_shared_query(winO, sr - lpx, &sz, &dsp_unit, (void *) &northptrO);
+ MPI_Win_shared_query(winN, sr - lpx, &sz, &dsp_unit, (void *) &northptrN);
+ }
+ if (needS) {
+ MPI_Win_shared_query(winO, sr + lpx, &sz, &dsp_unit, (void *) &southptrO);
+ MPI_Win_shared_query(winN, sr + lpx, &sz, &dsp_unit, (void *) &southptrN);
+ }
+ if (needW) {
+ MPI_Win_shared_query(winO, sr - 1, &sz, &dsp_unit, (void *) &westptrO);
+ MPI_Win_shared_query(winN, sr - 1, &sz, &dsp_unit, (void *) &westptrN);
+ }
+ if (needE) {
+ MPI_Win_shared_query(winO, sr + 1, &sz, &dsp_unit, (void *) &eastptrO);
+ MPI_Win_shared_query(winN, sr + 1, &sz, &dsp_unit, (void *) &eastptrN);
+ }
+
+ // printf("rank %d N S W E is: %d %d %d %d\n",r,north, south, west, east);
+ // decompose the domain
+ // printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south);
+ // allocate two work arrays
+ double *volatile tmp;
+ // initialize three heat sources
+ const int nsources = 3;
+ int sources[nsources][2] = { {n / 2, n / 2}, {n / 3, n / 3}, {n * 4 / 5, n * 8 / 9} };
+ int locnsources = 0; // number of sources in my area
+ int locsources[nsources][2]; // sources local to my rank
+ for (int i = 0; i < nsources; ++i) { // determine which sources are in my patch
+ int locx = sources[i][0] - offx;
+ int locy = sources[i][1] - offy;
+ if (locx >= 0 && locx < bx && locy >= 0 && locy < by) {
+ locsources[locnsources][0] = locx + 1; // offset by halo zone
+ locsources[locnsources][1] = locy + 1; // offset by halo zone
+ locnsources++;
+ }
+ }
- double time3 = MPI_Wtime();
-
- int index = 0;
- if(!needN)
- {
- MPI_Isend(sbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[index++]);
- MPI_Irecv(rbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[index++]);
- }
- if(!needE)
- {
- MPI_Isend(sbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[index++]);
- MPI_Irecv(rbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[index++]);
- }
- if(!needW)
- {
- MPI_Isend(sbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[index++]);
- MPI_Irecv(rbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[index++]);
- }
- if(!needS)
- {
- MPI_Isend(sbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[index++]);
- MPI_Irecv(rbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[index++]);
- }
- printf("id = %d, iter=%d, OK\n", r, iter);
- int ReadN, ReadS, ReadW, ReadE;
- ReadN = ReadS = ReadW = ReadE = 0;
- if(!needN)
- {
- ReadN = 1;
- }
- if(!needS)
- {
- ReadS = 1;
- }
- if(!needW)
- {
- ReadW = 1;
- }
- if(!needE)
- {
- ReadE = 1;
- }
- //order: West0. East1, North2, South3
- while(!(ReadW * ReadE * ReadN * ReadS))
- {
- if(!ReadW)
- {
- for(int i=0; i<by; i++)
- // memO[(i+1)*(bx+2)]=westptrO[(i+1)*(bx+2)+bx];
- ReadW = 1;
- }
- if(!ReadE)
- {
- for(int i=0; i<by; i++)
- // memO[(i+1)*(bx+2)+bx+1]=eastptrO[(i+1)*(bx+2)+1];
- ReadE = 1;
- }
- if(!ReadN)
- {
- //memcpy(rbufnorth,northptr+2*by+bx,bx*sizeof(double));
- memcpy(memO+1,northptrO+(bx+2)*by+1,bx*sizeof(double));
- //northptr[basesize+3] = -1;
- ReadN = 1;
- }
- if(!ReadS)
- {
- //memcpy(rbufsouth,southptr+2*by, bx*sizeof(double));
- memcpy(memO+(bx+2)*(by+1)+1,southptrO+bx+2+1, bx*sizeof(double));
- //southptr[basesize+2] = -1;
- ReadS = 1;
- }
- }
- /*
- MPI_Isend(sbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[0]);
- MPI_Isend(sbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[1]);
- MPI_Isend(sbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[2]);
- MPI_Isend(sbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[3]);
- MPI_Irecv(rbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[4]);
- MPI_Irecv(rbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[5]);
- MPI_Irecv(rbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[6]);
- MPI_Irecv(rbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[7]);
- */
- MPI_Waitall(index, reqs, MPI_STATUS_IGNORE);
- double time4 = MPI_Wtime();
- if(!needN)
- for(int i=0; i<bx; ++i) memO[ind(i+1,0)] = rbufnorth[i]; // unpack loop - into ghost cells
- if(!needS)
- for(int i=0; i<bx; ++i) memO[ind(i+1,by+1)] = rbufsouth[i]; // unpack loop
- if(!needE)
- for(int i=0; i<by; ++i) memO[ind(bx+1,i+1)] = rbufeast[i]; // unpack loop
- if(!needW)
- for(int i=0; i<by; ++i) memO[ind(0,i+1)] = rbufwest[i]; // unpack loop
- double time5 = MPI_Wtime();
- heat = 0.0;
- tp = -MPI_Wtime();
- //double* aold2 = aold;
- //memcpy(aold2,aold,(bx+2)*(by+2)*sizeof(double));
- //double* anewtemp = malloc(by*sizeof(double));
- //for(int i=1; i<bx+1; ++i) {
- //for(int j=1; j<by+1; ++j) {
- for(int j=1; j<by+1; ++j) {
- for(int i=1; i<bx+1; ++i) {
- //double tt = anew[ind(i,j)]/2.0;
- double tt = memN[ind(i,j)]/2.0 + (memO[ind(i-1,j)] + memO[ind(i+1,j)] + memO[ind(i,j-1)] + memO[ind(i,j+1)])/4.0/2.0;
- //double tt = anew[ind(i,j)]/2.0 + ( aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
- //tt += (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
- memN[ind(i,j)] = tt;
- //double d = anew[ind(i,j)]/2.0 + ((double)r+1.0+double(r+3.46)+double(iter))/4.0/2.0;
- //anew[ind(i,j)] = anew[ind(i,j)]/2.0;
- //tempnew[i-1] = tt;
- heat += memN[ind(i,j)];
- }
- }
- // swap arrays
- double time6 = MPI_Wtime();
- thetime[0] += (time2-time1);
- thetime[1] += (time3-time2);
- thetime[2] += (time4-time3);
- thetime[3] += (time5-time4);
- thetime[4] += (time6-time5);
- thetime[5] += (time6-time1);
+ double tp = 0.0;
+ // allocate communication buffers
+ double *sbufnorth = (double *) calloc(1, bx * sizeof(double)); // send buffers
+ double *sbufsouth = (double *) calloc(1, bx * sizeof(double));
+ double *sbufeast = (double *) calloc(1, by * sizeof(double));
+ double *sbufwest = (double *) calloc(1, by * sizeof(double));
+ double *rbufnorth = (double *) calloc(1, bx * sizeof(double)); // receive buffers
+ double *rbufsouth = (double *) calloc(1, bx * sizeof(double));
+ double *rbufeast = (double *) calloc(1, by * sizeof(double));
+ double *rbufwest = (double *) calloc(1, by * sizeof(double));
+
+ double thetime[6];
+ thetime[0] = thetime[1] = thetime[2] = thetime[3] = thetime[4] = thetime[5] = 0.0;
+ double t = -MPI_Wtime(); // take time
+ double heat; // total heat in system
+ //the following is the loop for niters steps.--zhuxm
+ //double* tempnew = (double*)malloc(bx*sizeof(double));
+ //int basesize = 2*(bx+by);
+ //int halfindex = basesize+4;
+ //MPI_Win_lock_all(0, winO);
+ //MPI_Win_lock_all(0, winN);
- tp += MPI_Wtime();
- tmp=memN; memN=memO; memO=tmp;
- tmp=northptrN; northptrN=northptrO; northptrO=tmp;
- tmp=southptrN; southptrN=southptrO; southptrO=tmp;
- tmp=eastptrN; eastptrN=eastptrO; eastptrO=tmp;
- tmp=westptrN; westptrN=northptrO; westptrO=tmp;
MPI_Barrier(shmcomm);
- }
- t+=MPI_Wtime();
+ for (int iter = 0; iter < niters; ++iter) {
+ // refresh heat sources
+ //MPI_Barrier(comm);
+ //if (!r) printf("Step %d\n",iter);
+ double time1 = MPI_Wtime();
+ for (int i = 0; i < locnsources; ++i) {
+ memO[ind(locsources[i][0], locsources[i][1])] += energy; // heat source
+ }
+ double time2 = MPI_Wtime();
+ // exchange data with neighbors
+ MPI_Request reqs[8];
+ if (!needN)
+ for (int i = 0; i < bx; ++i)
+ sbufnorth[i] = memO[ind(i + 1, 1)]; // pack loop - last valid region
+ if (!needS)
+ for (int i = 0; i < bx; ++i)
+ sbufsouth[i] = memO[ind(i + 1, by)]; // pack loop
+ if (!needE)
+ for (int i = 0; i < by; ++i)
+ sbufeast[i] = memO[ind(bx, i + 1)]; // pack loop
+ if (!needW)
+ for (int i = 0; i < by; ++i)
+ sbufwest[i] = memO[ind(1, i + 1)]; // pack loop
- // get final heat in the system
- double rheat;
- MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm);
- double totaltime[6];
- MPI_Allreduce(thetime,totaltime,6,MPI_DOUBLE,MPI_SUM,comm);
-if(!r)
- {
- printf("[%i] last heat: %f time: %f\n", r, rheat, t);
- printf("Times are: %lf, %lf, %lf, %lf, %lf, --> %lf\n",totaltime[0]/p,totaltime[1]/p,totaltime[2]/p,totaltime[3]/p,totaltime[4]/p,totaltime[5]/p);
+ double time3 = MPI_Wtime();
+
+ int index = 0;
+ if (!needN) {
+ MPI_Isend(sbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[index++]);
+ }
+ if (!needE) {
+ MPI_Isend(sbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[index++]);
+ }
+ if (!needW) {
+ MPI_Isend(sbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[index++]);
+ }
+ if (!needS) {
+ MPI_Isend(sbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[index++]);
+ }
+ printf("id = %d, iter=%d, OK\n", r, iter);
+ int ReadN, ReadS, ReadW, ReadE;
+ ReadN = ReadS = ReadW = ReadE = 0;
+ if (!needN) {
+ ReadN = 1;
+ }
+ if (!needS) {
+ ReadS = 1;
}
- MPI_Barrier(comm);
+ if (!needW) {
+ ReadW = 1;
+ }
+ if (!needE) {
+ ReadE = 1;
+ }
+ //order: West0. East1, North2, South3
+ while (!(ReadW * ReadE * ReadN * ReadS)) {
+ if (!ReadW) {
+ for (int i = 0; i < by; i++)
+ // memO[(i+1)*(bx+2)]=westptrO[(i+1)*(bx+2)+bx];
+ ReadW = 1;
+ }
+ if (!ReadE) {
+ for (int i = 0; i < by; i++)
+ // memO[(i+1)*(bx+2)+bx+1]=eastptrO[(i+1)*(bx+2)+1];
+ ReadE = 1;
+ }
+ if (!ReadN) {
+ //memcpy(rbufnorth,northptr+2*by+bx,bx*sizeof(double));
+ memcpy(memO + 1, northptrO + (bx + 2) * by + 1, bx * sizeof(double));
+ //northptr[basesize+3] = -1;
+ ReadN = 1;
+ }
+ if (!ReadS) {
+ //memcpy(rbufsouth,southptr+2*by, bx*sizeof(double));
+ memcpy(memO + (bx + 2) * (by + 1) + 1, southptrO + bx + 2 + 1, bx * sizeof(double));
+ //southptr[basesize+2] = -1;
+ ReadS = 1;
+ }
+ }
+ /*
+ * MPI_Isend(sbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[0]);
+ * MPI_Isend(sbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[1]);
+ * MPI_Isend(sbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[2]);
+ * MPI_Isend(sbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[3]);
+ * MPI_Irecv(rbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[4]);
+ * MPI_Irecv(rbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[5]);
+ * MPI_Irecv(rbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[6]);
+ * MPI_Irecv(rbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[7]);
+ */
+ MPI_Waitall(index, reqs, MPI_STATUS_IGNORE);
+ double time4 = MPI_Wtime();
+ if (!needN)
+ for (int i = 0; i < bx; ++i)
+ memO[ind(i + 1, 0)] = rbufnorth[i]; // unpack loop - into ghost cells
+ if (!needS)
+ for (int i = 0; i < bx; ++i)
+ memO[ind(i + 1, by + 1)] = rbufsouth[i]; // unpack loop
+ if (!needE)
+ for (int i = 0; i < by; ++i)
+ memO[ind(bx + 1, i + 1)] = rbufeast[i]; // unpack loop
+ if (!needW)
+ for (int i = 0; i < by; ++i)
+ memO[ind(0, i + 1)] = rbufwest[i]; // unpack loop
+ double time5 = MPI_Wtime();
+ heat = 0.0;
+ tp = -MPI_Wtime();
+ //double* aold2 = aold;
+ //memcpy(aold2,aold,(bx+2)*(by+2)*sizeof(double));
+ //double* anewtemp = malloc(by*sizeof(double));
+ //for(int i=1; i<bx+1; ++i) {
+ //for(int j=1; j<by+1; ++j) {
+ for (int j = 1; j < by + 1; ++j) {
+ for (int i = 1; i < bx + 1; ++i) {
+ //double tt = anew[ind(i,j)]/2.0;
+ double tt =
+ memN[ind(i, j)] / 2.0 + (memO[ind(i - 1, j)] + memO[ind(i + 1, j)] +
+ memO[ind(i, j - 1)] + memO[ind(i, j + 1)]) / 4.0 / 2.0;
+ //double tt = anew[ind(i,j)]/2.0 + (aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
+ //tt += (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
+ memN[ind(i, j)] = tt;
+ //double d = anew[ind(i,j)]/2.0 + ((double)r+1.0+double(r+3.46)+double(iter))/4.0/2.0;
+ //anew[ind(i,j)] = anew[ind(i,j)]/2.0;
+ //tempnew[i-1] = tt;
+ heat += memN[ind(i, j)];
+ }
+ }
+ // swap arrays
+ double time6 = MPI_Wtime();
+ thetime[0] += (time2 - time1);
+ thetime[1] += (time3 - time2);
+ thetime[2] += (time4 - time3);
+ thetime[3] += (time5 - time4);
+ thetime[4] += (time6 - time5);
+ thetime[5] += (time6 - time1);
+
+ tp += MPI_Wtime();
+ tmp = memN;
+ memN = memO;
+ memO = tmp;
+ tmp = northptrN;
+ northptrN = northptrO;
+ northptrO = tmp;
+ tmp = southptrN;
+ southptrN = southptrO;
+ southptrO = tmp;
+ tmp = eastptrN;
+ eastptrN = eastptrO;
+ eastptrO = tmp;
+ tmp = westptrN;
+ westptrN = northptrO;
+ westptrO = tmp;
+ MPI_Barrier(shmcomm);
+ }
+ t += MPI_Wtime();
+
+ // get final heat in the system
+ double rheat;
+ MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm);
+ double totaltime[6];
+ MPI_Allreduce(thetime, totaltime, 6, MPI_DOUBLE, MPI_SUM, comm);
+ if (!r) {
+ printf("[%i] last heat: %f time: %f\n", r, rheat, t);
+ printf("Times are: %lf, %lf, %lf, %lf, %lf, --> %lf\n", totaltime[0] / p, totaltime[1] / p,
+ totaltime[2] / p, totaltime[3] / p, totaltime[4] / p, totaltime[5] / p);
+ }
+ MPI_Barrier(comm);
// printf("Rank %d times are: %lf, %lf, %lf, %lf, %lf, --> %lf\n",r, thetime[0],thetime[1],thetime[2],thetime[3],thetime[4],thetime[5]);
- MPI_Finalize();
+ MPI_Finalize();
}
http://git.mpich.org/software/dev/stencil.git/commitdiff/8ef7f7c268ec9f3866f90ad4c2f51c51495b7a15
commit 8ef7f7c268ec9f3866f90ad4c2f51c51495b7a15
Author: Pavan Balaji <balaji at anl.gov>
Date: Sat Sep 6 19:13:39 2014 -0500
Initial draft of the stencil code from Shigang.
diff --git a/stencil.cpp b/stencil.cpp
new file mode 100644
index 0000000..b4d7f2e
--- /dev/null
+++ b/stencil.cpp
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2012 Torsten Hoefler. All rights reserved.
+ *
+ * Author(s): Torsten Hoefler <htor at illinois.edu>
+ *
+ */
+
+#include "stencil_par.h"
+int main(int argc, char **argv) {
+
+ int provide;
+ MPI_Init(&argc,&argv);
+ int r,p;
+ MPI_Comm comm = MPI_COMM_WORLD;
+ MPI_Comm_rank(comm, &r);
+ MPI_Comm_size(comm, &p);
+ int n, energy, niters,nx,ny, lpx,lpy,px, py;
+
+ MPI_Comm shmcomm;
+ MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
+ int sr, sp; // rank and size in shmem comm
+
+ MPI_Comm_size(shmcomm, &sp);
+ MPI_Comm_rank(shmcomm, &sr);
+ printf("shmcomm size = %d\n",sp);
+ int CORENUM = 12;
+ if (r==0) {
+ // argument checking
+ if(argc < 8) {
+ if(!r) printf("usage: stencil_mpi <n> <energy> <niters> <nx> <ny> <lpx> <lpy>\n");
+ MPI_Finalize();
+ exit(1);
+ }
+
+ n = atoi(argv[1]); // nxn grid
+ energy = atoi(argv[2]); // energy to be injected per iteration
+ niters = atoi(argv[3]); // number of iterations
+
+ nx=atoi(argv[4]); // 1st dim processes
+ ny=atoi(argv[5]); // 2nd dim processes
+ lpx=atoi(argv[6]); // 1st dim processes
+ lpy=atoi(argv[7]); // 2nd dim processes
+ px = nx*lpx;
+ py = ny*lpy;
+ if(px * py != p) MPI_Abort(comm, 1);// abort if px or py are wrong
+ if(n % py != 0) MPI_Abort(comm, 2); // abort px needs to divide n
+ if(n % px != 0) MPI_Abort(comm, 3); // abort py needs to divide n
+ if(nx*ny != p/CORENUM) MPI_Abort(comm,66);
+ if(lpx*lpy != CORENUM) MPI_Abort(comm,77);
+ // distribute arguments
+ int args[7] = {n, energy, niters, nx,ny,lpx,lpy};
+ MPI_Bcast(args, 7, MPI_INT, 0, comm);
+ }
+ else {
+ int args[7];
+ MPI_Bcast(args, 7, MPI_INT, 0, comm);
+ n=args[0]; energy=args[1]; niters=args[2];
+
+ nx=args[3]; ny=args[4];
+ lpx = args[5];
+ lpy = args[6];
+
+ px = nx*lpx;
+ py = ny*lpy;
+ }
+
+ int nodenum = nx*ny;
+ int nodeidx, nodeidy;
+ int nodeid = r/CORENUM;
+ int CH = 2;
+ if(CH == 1)
+ {
+ nodeidx = nodeid % nx;
+ nodeidy = nodeid / nx;
+ }
+ else if(CH == 2)
+ {
+ nodeidx = nodeid / ny;
+ nodeidy = nodeid % ny;
+ }
+ else if(CH == 3)
+ {
+ nodeidx = nodeid / ny;
+ nodeidy = nodeid % ny;
+ if(nodeidx % 2 != 0)
+ {
+ nodeidy = ny-1 - nodeidy;
+ }
+ }
+//nodeidx idy is the node coordinates(x,y)
+//
+ int localprocessindex = r%CORENUM;
+ int local_x, local_y;
+ local_x = localprocessindex%lpx;
+ local_y = localprocessindex/lpx;
+ // determine my coordinates (x,y) -- r=x*a+y in the 2d processor array
+ // int rx = r % px;
+ // int ry = r / px;
+ int rx;
+ int ry;
+ rx = lpx*nodeidx+local_x;
+ ry = lpy*nodeidy+local_y;
+ int noden, nodes, nodew, nodee;
+ if(nodeidy < ny-1)
+ {
+ if(CH ==1 )
+ nodes = nodeid+ nx;
+ if(CH == 2)
+ nodes = nodeid+1;
+ if(CH == 3)
+ {
+ if(nodeidx %2 ==0)
+ nodes=nodeid+1;
+ else
+ nodes = nodeid-1;
+ }
+ }
+ else
+ {
+ nodes = -1;
+ }
+ if(nodeidy == 0)
+ {
+ noden = -1;
+ }
+ else
+ {
+ if(CH==1)
+ noden = nodeid - nx;
+ if(CH == 2)
+ noden = nodeid-1;
+ if(CH == 3)
+ {
+ if(nodeidx %2 == 0)
+ noden = nodeid-1;
+ else
+ noden = nodeid+1;
+ }
+
+ }
+ if(nodeidx == 0)
+ {
+ nodew = -1;
+ }
+ else
+ {
+ if(CH ==1)
+ nodew = nodeid -1;
+ if(CH == 2)
+ nodew = nodeid -ny;
+ if(CH == 3)
+ {
+
+ if(nodeidx%2 == 0)
+ nodew = nodeid - nodeidy -1-nodeidy;
+ else
+ {
+ nodew = nodeid +nodeidy+1-2*ny+nodeidy;
+ }
+ }
+ }
+ if(nodeidx == nx -1)
+ {
+ nodee = -1;
+ }
+ else
+ {
+ if(CH==1)
+ {
+ nodee = nodeid+1;
+ }
+ if(CH==2)
+ {
+ nodee = nodeid + ny;
+ }
+ if(CH==3)
+ {
+ if(nodeidx%2 == 0)
+ {
+ nodee = nodeid - nodeidy + ny + ny-1-nodeidy;
+ }
+ else
+ {
+ nodee = nodeid +nodeidy +1 + nodeidy;
+ }
+ }
+ }
+ int north1, south1, west1, east1;
+ if(local_y != 0)
+ {
+ //south
+ north1 = r - lpx;
+ }
+ else
+ {
+ if(noden == -1)
+ north1 = -1;
+ else
+ {
+ north1 = r + (noden - nodeid)*CORENUM - r%CORENUM +(lpy-1) * lpx + local_x;
+ //north1 = r + (noden - nodeid)*CORENUM - r%CORENUM + local_x;
+ }
+ }
+ if(local_y < lpy-1)
+ {
+ //north
+ south1 = r + lpx;
+ }
+ else
+ {
+ if(nodes == -1)
+ south1 = -1;
+ else
+ {
+ south1 = r + (nodes - nodeid)*CORENUM - r%CORENUM + local_x;
+ //south1 = r + (noden - nodeid)*CORENUM - r%CORENUM +(lpy-1) * lpx + local_x;
+ }
+ }
+ if(local_x != 0)
+ {
+ //west
+ west1 = r-1;
+ }
+ else
+ {
+ if(nodew == -1)
+ west1 = -1;
+ else
+ {
+ west1 = r + (nodew - nodeid)*CORENUM - r%CORENUM + local_y*lpx + lpx-1;
+ }
+ }
+ if(local_x == lpx-1)
+ {
+ //east
+ if(nodee == -1)
+ {
+ east1 = -1;
+ }
+ else
+ {
+ east1 = r + (nodee - nodeid) * CORENUM - r%CORENUM + local_y*lpx;
+ }
+ }
+ else
+ {
+
+ east1 = r+1;
+ }
+
+ // determine my four neighbors
+ for(int i=0; i<p; i++)
+ {
+ if(r==i)
+ printf("id=%d, sr=%d, nodeid=%d, north1=%d, south1=%d, west1=%d, east1=%d\n", r,sr,nodeid,north1,south1,west1,east1);
+ MPI_Barrier(MPI_COMM_WORLD);
+ }
+ int north = north1; if(north1 < 0) north = MPI_PROC_NULL;
+ int south = south1; if(south1 < 0) south = MPI_PROC_NULL;
+ int west= west1; if(west1< 0) west = MPI_PROC_NULL;
+ int east = east1; if(east1 < 0) east = MPI_PROC_NULL;
+ //int temp = north; north = south; south = temp;
+ int bx = n/px; // block size in x
+ int by = n/py; // block size in y
+ int offx = rx*bx; // offset in x
+ int offy = ry*by; // offset in y
+ double* volatile memO;
+ double* volatile memN;
+
+ //printf("rank %d N S W E is: %d %d %d %d\n",r,north, south, west, east);
+ MPI_Win winO;
+ MPI_Win winN;
+ MPI_Win_allocate_shared((bx+2)*(by+2)*sizeof(double),1,MPI_INFO_NULL,shmcomm,(void*)&memO,&winO);
+ MPI_Win_allocate_shared((bx+2)*(by+2)*sizeof(double),1,MPI_INFO_NULL,shmcomm,(void*)&memN,&winN);
+ double* volatile westptrO;
+ double* volatile eastptrO;
+ double* volatile northptrO;
+ double* volatile southptrO;
+ double* volatile westptrN;
+ double* volatile eastptrN;
+ double* volatile northptrN;
+ double* volatile southptrN;
+ int needN = 0; int needS = 0; int needW = 0; int needE = 0;
+ if(north != -1 && north / CORENUM == r/CORENUM)
+ {
+ needN = 1;
+ }
+ if(south != -1 && south / CORENUM == r/CORENUM)
+ {
+ needS = 1;
+ }
+ if(west != -1 && west / CORENUM == r/CORENUM)
+ {
+ needW = 1;
+ }
+ if(east != -1 && east / CORENUM == r/CORENUM)
+ {
+ needE = 1;
+ }
+ //temp = needN; needN =needS; needS = temp;
+ MPI_Aint sz;
+ int dsp_unit;
+ if(needN)
+ {
+ MPI_Win_shared_query(winO,sr-lpx,&sz,&dsp_unit,(void*)&northptrO);
+ MPI_Win_shared_query(winN,sr-lpx,&sz,&dsp_unit,(void*)&northptrN);
+ }
+ if(needS)
+ {
+ MPI_Win_shared_query(winO,sr+lpx,&sz,&dsp_unit,(void*)&southptrO);
+ MPI_Win_shared_query(winN,sr+lpx,&sz,&dsp_unit,(void*)&southptrN);
+ }
+ if(needW)
+ {
+ MPI_Win_shared_query(winO,sr-1,&sz,&dsp_unit,(void*)&westptrO);
+ MPI_Win_shared_query(winN,sr-1,&sz,&dsp_unit,(void*)&westptrN);
+ }
+ if(needE)
+ {
+ MPI_Win_shared_query(winO,sr+1,&sz,&dsp_unit,(void*)&eastptrO);
+ MPI_Win_shared_query(winN,sr+1,&sz,&dsp_unit,(void*)&eastptrN);
+ }
+
+ // printf("rank %d N S W E is: %d %d %d %d\n",r,north, south, west, east);
+ // decompose the domain
+ // printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south);
+ // allocate two work arrays
+ double* volatile tmp;
+ // initialize three heat sources
+ const int nsources=3;
+ int sources[nsources][2] = {{n/2,n/2}, {n/3,n/3}, {n*4/5,n*8/9}};
+ int locnsources=0; // number of sources in my area
+ int locsources[nsources][2]; // sources local to my rank
+ for (int i=0; i<nsources; ++i) { // determine which sources are in my patch
+ int locx = sources[i][0] - offx;
+ int locy = sources[i][1] - offy;
+ if(locx >= 0 && locx < bx && locy >= 0 && locy < by) {
+ locsources[locnsources][0] = locx+1; // offset by halo zone
+ locsources[locnsources][1] = locy+1; // offset by halo zone
+ locnsources++;
+ }
+ }
+
+ double tp = 0.0;
+ // allocate communication buffers
+ double *sbufnorth = (double*)calloc(1,bx*sizeof(double)); // send buffers
+ double *sbufsouth = (double*)calloc(1,bx*sizeof(double));
+ double *sbufeast = (double*)calloc(1,by*sizeof(double));
+ double *sbufwest = (double*)calloc(1,by*sizeof(double));
+ double *rbufnorth = (double*)calloc(1,bx*sizeof(double)); // receive buffers
+ double *rbufsouth = (double*)calloc(1,bx*sizeof(double));
+ double *rbufeast = (double*)calloc(1,by*sizeof(double));
+ double *rbufwest = (double*)calloc(1,by*sizeof(double));
+
+ double thetime[6];
+ thetime[0] = thetime[1] = thetime[2] = thetime[3] =thetime [4] = thetime[5] = 0.0;
+ double t=-MPI_Wtime(); // take time
+ double heat; // total heat in system
+ //the following is the loop for niters steps.--zhuxm
+ //double* tempnew = (double*)malloc(bx*sizeof(double));
+ //int basesize = 2*(bx+by);
+ //int halfindex = basesize+4;
+ //MPI_Win_lock_all(0, winO);
+ //MPI_Win_lock_all(0, winN);
+
+ MPI_Barrier(shmcomm);
+ for(int iter=0; iter<niters; ++iter) {
+ // refresh heat sources
+ //MPI_Barrier(comm);
+ //if(!r) printf("Step %d\n",iter);
+ double time1 = MPI_Wtime();
+ for(int i=0; i<locnsources; ++i) {
+ memO[ind(locsources[i][0],locsources[i][1])] += energy; // heat source
+ }
+ double time2 = MPI_Wtime();
+ // exchange data with neighbors
+ MPI_Request reqs[8];
+ if(!needN)
+ for(int i=0; i<bx; ++i) sbufnorth[i] = memO[ind(i+1,1)]; // pack loop - last valid region
+ if(!needS)
+ for(int i=0; i<bx; ++i) sbufsouth[i] = memO[ind(i+1,by)]; // pack loop
+ if(!needE)
+ for(int i=0; i<by; ++i) sbufeast[i] = memO[ind(bx,i+1)]; // pack loop
+ if(!needW)
+ for(int i=0; i<by; ++i) sbufwest[i] = memO[ind(1,i+1)]; // pack loop
+
+ double time3 = MPI_Wtime();
+
+ int index = 0;
+ if(!needN)
+ {
+ MPI_Isend(sbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[index++]);
+ }
+ if(!needE)
+ {
+ MPI_Isend(sbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[index++]);
+ }
+ if(!needW)
+ {
+ MPI_Isend(sbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[index++]);
+ }
+ if(!needS)
+ {
+ MPI_Isend(sbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[index++]);
+ MPI_Irecv(rbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[index++]);
+ }
+ printf("id = %d, iter=%d, OK\n", r, iter);
+ int ReadN, ReadS, ReadW, ReadE;
+ ReadN = ReadS = ReadW = ReadE = 0;
+ if(!needN)
+ {
+ ReadN = 1;
+ }
+ if(!needS)
+ {
+ ReadS = 1;
+ }
+ if(!needW)
+ {
+ ReadW = 1;
+ }
+ if(!needE)
+ {
+ ReadE = 1;
+ }
+ //order: West0. East1, North2, South3
+ while(!(ReadW * ReadE * ReadN * ReadS))
+ {
+ if(!ReadW)
+ {
+ for(int i=0; i<by; i++)
+ // memO[(i+1)*(bx+2)]=westptrO[(i+1)*(bx+2)+bx];
+ ReadW = 1;
+ }
+ if(!ReadE)
+ {
+ for(int i=0; i<by; i++)
+ // memO[(i+1)*(bx+2)+bx+1]=eastptrO[(i+1)*(bx+2)+1];
+ ReadE = 1;
+ }
+ if(!ReadN)
+ {
+ //memcpy(rbufnorth,northptr+2*by+bx,bx*sizeof(double));
+ memcpy(memO+1,northptrO+(bx+2)*by+1,bx*sizeof(double));
+ //northptr[basesize+3] = -1;
+ ReadN = 1;
+ }
+ if(!ReadS)
+ {
+ //memcpy(rbufsouth,southptr+2*by, bx*sizeof(double));
+ memcpy(memO+(bx+2)*(by+1)+1,southptrO+bx+2+1, bx*sizeof(double));
+ //southptr[basesize+2] = -1;
+ ReadS = 1;
+ }
+ }
+ /*
+ MPI_Isend(sbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[0]);
+ MPI_Isend(sbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[1]);
+ MPI_Isend(sbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[2]);
+ MPI_Isend(sbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[3]);
+ MPI_Irecv(rbufnorth, bx, MPI_DOUBLE, north, 9, comm, &reqs[4]);
+ MPI_Irecv(rbufsouth, bx, MPI_DOUBLE, south, 9, comm, &reqs[5]);
+ MPI_Irecv(rbufeast, by, MPI_DOUBLE, east, 9, comm, &reqs[6]);
+ MPI_Irecv(rbufwest, by, MPI_DOUBLE, west, 9, comm, &reqs[7]);
+ */
+ MPI_Waitall(index, reqs, MPI_STATUS_IGNORE);
+ double time4 = MPI_Wtime();
+ if(!needN)
+ for(int i=0; i<bx; ++i) memO[ind(i+1,0)] = rbufnorth[i]; // unpack loop - into ghost cells
+ if(!needS)
+ for(int i=0; i<bx; ++i) memO[ind(i+1,by+1)] = rbufsouth[i]; // unpack loop
+ if(!needE)
+ for(int i=0; i<by; ++i) memO[ind(bx+1,i+1)] = rbufeast[i]; // unpack loop
+ if(!needW)
+ for(int i=0; i<by; ++i) memO[ind(0,i+1)] = rbufwest[i]; // unpack loop
+ double time5 = MPI_Wtime();
+ heat = 0.0;
+ tp = -MPI_Wtime();
+ //double* aold2 = aold;
+ //memcpy(aold2,aold,(bx+2)*(by+2)*sizeof(double));
+ //double* anewtemp = malloc(by*sizeof(double));
+ //for(int i=1; i<bx+1; ++i) {
+ //for(int j=1; j<by+1; ++j) {
+ for(int j=1; j<by+1; ++j) {
+ for(int i=1; i<bx+1; ++i) {
+ //double tt = anew[ind(i,j)]/2.0;
+ double tt = memN[ind(i,j)]/2.0 + (memO[ind(i-1,j)] + memO[ind(i+1,j)] + memO[ind(i,j-1)] + memO[ind(i,j+1)])/4.0/2.0;
+ //double tt = anew[ind(i,j)]/2.0 + ( aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
+ //tt += (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
+ memN[ind(i,j)] = tt;
+ //double d = anew[ind(i,j)]/2.0 + ((double)r+1.0+double(r+3.46)+double(iter))/4.0/2.0;
+ //anew[ind(i,j)] = anew[ind(i,j)]/2.0;
+ //tempnew[i-1] = tt;
+ heat += memN[ind(i,j)];
+ }
+ }
+ // swap arrays
+ double time6 = MPI_Wtime();
+ thetime[0] += (time2-time1);
+ thetime[1] += (time3-time2);
+ thetime[2] += (time4-time3);
+ thetime[3] += (time5-time4);
+ thetime[4] += (time6-time5);
+ thetime[5] += (time6-time1);
+
+ tp += MPI_Wtime();
+ tmp=memN; memN=memO; memO=tmp;
+ tmp=northptrN; northptrN=northptrO; northptrO=tmp;
+ tmp=southptrN; southptrN=southptrO; southptrO=tmp;
+ tmp=eastptrN; eastptrN=eastptrO; eastptrO=tmp;
+ tmp=westptrN; westptrN=northptrO; westptrO=tmp;
+ MPI_Barrier(shmcomm);
+ }
+ t+=MPI_Wtime();
+
+ // get final heat in the system
+ double rheat;
+ MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm);
+ double totaltime[6];
+ MPI_Allreduce(thetime,totaltime,6,MPI_DOUBLE,MPI_SUM,comm);
+if(!r)
+ {
+ printf("[%i] last heat: %f time: %f\n", r, rheat, t);
+ printf("Times are: %lf, %lf, %lf, %lf, %lf, --> %lf\n",totaltime[0]/p,totaltime[1]/p,totaltime[2]/p,totaltime[3]/p,totaltime[4]/p,totaltime[5]/p);
+ }
+ MPI_Barrier(comm);
+// printf("Rank %d times are: %lf, %lf, %lf, %lf, %lf, --> %lf\n",r, thetime[0],thetime[1],thetime[2],thetime[3],thetime[4],thetime[5]);
+ MPI_Finalize();
+}
-----------------------------------------------------------------------
hooks/post-receive
--
Stencil code repository
More information about the commits
mailing list