#include
#include
#include
#include "mpi.h"
//
// The logical *global* problem size -- N x N elements; each process
// will own a fraction of the whole.
//
#ifndef N
#define N 10
//#define N 1000
#endif
//
// We'll terminate when the difference between all elements in
// adjacent iterations is less than this value of epsilon.
//
#define epsilon .01
//#define epsilon .000001
// START OF PROVIDED ROUTINES (should not need to change)
// ------------------------------------------------------------------------------
//
// This routine is a really lame way of computing a square-ish grid of
// processes, favoring more rows of processors than columns in the
// event that a perfect square is not possible. It returns the results
// via numRows and numCols.
//
void computeGridSize(int numProcs, int* numRows, int* numCols) {
int guess = sqrt(numProcs);
while (numProcs % guess != 0) {
guess--;
}
*numRows = numProcs / guess;
*numCols = guess;
}
//
// This routine calculates a given process's location within a virtual
// numRows x numCols grid, laying them out in row major order.
//
//
void computeGridPos(int me, int numRows, int numCols, int* myRow, int* myCol) {
*myRow = me / numCols;
*myCol = me % numCols;
}
// END OF PROVIDED ROUTINES (should not need to change)
// ------------------------------------------------------------------------------
int main(int argc, char* argv[]) {
int numProcs, myProcID;
int numRows, numCols;
int myRow, myCol;
//
// Boilerplate MPI startup -- query # processes/images and my unique ID
//
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &myProcID);
//
// Arrange the numProcs processes into a virtual 2D grid (numRows x
// numCols) and compute my logical position within it (myRow,
// myCol).
//
computeGridSize(numProcs, &numRows, &numCols);
computeGridPos(myProcID, numRows, numCols, &myRow, &myCol);
//
// Sanity check that we're up and running correctly. Feel free to
// disable this once you get things running.
//
printf("Process %d of %d checking in\n"
"I am at (%d, %d) of %d x %d processes\n\n", myProcID, numProcs,
myRow, myCol, numRows, numCols);
/* TODO (step 1): Using your block distribution (or a
corrected/improved/evolved version of it) from assignment #1,
compute the portion of the global N x N array that this task
owns, using a block x block distribution */
/* TODO (step 2): Allocate arrays corresponding to the local portion
of data owned by this process -- in particular, don't allocate an
O(N**2) array on each process, only the portion it owns.
Allocate an extra row/column of data as a halo around the array
to store global boundary conditions and/or overlap regions/ghost
cells for caching neighboring processors' values, similar to what
was shown for the 1D 3-point stencil in class, simply in 2D. */
/* TODO (step 3): Initialize the arrays to zero. */
/* TODO (step 4): Initialize the arrays to contain four +/-1.0
values, as in assignment #5. Note that you will need to do a
global -> local index calculation to determine (a) which
process(es) owns the points and (b) which array value the points
correspond to. */
/* TODO (step 5): Implement a routine to sequentially print out the
distributed array to the console in a coordinated manner such
that it appears as a global whole, as we logically think of it.
In other words, the output of this routine should be identical to
that of printArr() in assignment #5, in spite of the fact that
the array is decomposed across a number of processes. Use
Send/Recv calls to coordinate between the processes. Use this
routine to verify that your initialization is correct.
*/
/* TODO (step 6): Implement the 9-point stencil using ISend/IRecv
and Wait routines. Use the non-blocking routines in order to get
all the communication up and running in a safe manner. While it
is possible to compute on the innermost elements of the array
before the communication completes, there is no reason to do so
-- simply use the non-blocking calls as a means of getting a
number of communications up and running without waiting for
others to complete. */
/* TODO (step 7): Verify that the stencil seems to be progressing
correctly, as in assignment #5. */
/* TODO (step 8): Use an MPI reduction to compute the termination of
the routine, as in assignment #5. */
/* TODO (step 9): Verify that the results of the computation (output
array, number of iterations) are the same as assignment #5 for a
few different problem sizes and numbers of processors; be sure to
test a case in which there are interior processes (e.g., 9, 12,
16, ... processes...) */
MPI_Finalize();
return 0;
}