#include #include #include #include "mpi.h" // // The logical *global* problem size -- N x N elements; each process // will own a fraction of the whole. // #ifndef N #define N 10 //#define N 1000 #endif // // We'll terminate when the difference between all elements in // adjacent iterations is less than this value of epsilon. // #define epsilon .01 //#define epsilon .000001 // START OF PROVIDED ROUTINES (should not need to change) // ------------------------------------------------------------------------------ // // This routine is a really lame way of computing a square-ish grid of // processes, favoring more rows of processors than columns in the // event that a perfect square is not possible. It returns the results // via numRows and numCols. // void computeGridSize(int numProcs, int* numRows, int* numCols) { int guess = sqrt(numProcs); while (numProcs % guess != 0) { guess--; } *numRows = numProcs / guess; *numCols = guess; } // // This routine calculates a given process's location within a virtual // numRows x numCols grid, laying them out in row major order. // // void computeGridPos(int me, int numRows, int numCols, int* myRow, int* myCol) { *myRow = me / numCols; *myCol = me % numCols; } // END OF PROVIDED ROUTINES (should not need to change) // ------------------------------------------------------------------------------ int main(int argc, char* argv[]) { int numProcs, myProcID; int numRows, numCols; int myRow, myCol; // // Boilerplate MPI startup -- query # processes/images and my unique ID // MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); MPI_Comm_rank(MPI_COMM_WORLD, &myProcID); // // Arrange the numProcs processes into a virtual 2D grid (numRows x // numCols) and compute my logical position within it (myRow, // myCol). // computeGridSize(numProcs, &numRows, &numCols); computeGridPos(myProcID, numRows, numCols, &myRow, &myCol); // // Sanity check that we're up and running correctly. Feel free to // disable this once you get things running. // printf("Process %d of %d checking in\n" "I am at (%d, %d) of %d x %d processes\n\n", myProcID, numProcs, myRow, myCol, numRows, numCols); /* TODO (step 1): Using your block distribution (or a corrected/improved/evolved version of it) from assignment #1, compute the portion of the global N x N array that this task owns, using a block x block distribution */ /* TODO (step 2): Allocate arrays corresponding to the local portion of data owned by this process -- in particular, don't allocate an O(N**2) array on each process, only the portion it owns. Allocate an extra row/column of data as a halo around the array to store global boundary conditions and/or overlap regions/ghost cells for caching neighboring processors' values, similar to what was shown for the 1D 3-point stencil in class, simply in 2D. */ /* TODO (step 3): Initialize the arrays to zero. */ /* TODO (step 4): Initialize the arrays to contain four +/-1.0 values, as in assignment #5. Note that you will need to do a global -> local index calculation to determine (a) which process(es) owns the points and (b) which array value the points correspond to. */ /* TODO (step 5): Implement a routine to sequentially print out the distributed array to the console in a coordinated manner such that it appears as a global whole, as we logically think of it. In other words, the output of this routine should be identical to that of printArr() in assignment #5, in spite of the fact that the array is decomposed across a number of processes. Use Send/Recv calls to coordinate between the processes. Use this routine to verify that your initialization is correct. */ /* TODO (step 6): Implement the 9-point stencil using ISend/IRecv and Wait routines. Use the non-blocking routines in order to get all the communication up and running in a safe manner. While it is possible to compute on the innermost elements of the array before the communication completes, there is no reason to do so -- simply use the non-blocking calls as a means of getting a number of communications up and running without waiting for others to complete. */ /* TODO (step 7): Verify that the stencil seems to be progressing correctly, as in assignment #5. */ /* TODO (step 8): Use an MPI reduction to compute the termination of the routine, as in assignment #5. */ /* TODO (step 9): Verify that the results of the computation (output array, number of iterations) are the same as assignment #5 for a few different problem sizes and numbers of processors; be sure to test a case in which there are interior processes (e.g., 9, 12, 16, ... processes...) */ MPI_Finalize(); return 0; }