#include #include #include #include #define GLOBALX 1000 #define GLOBALY 100 #define LB(p,P,n) (p*n/P) #define UB(p,P,n) (LB((p+1),P,n)-1) double** allocTemp(int x, int y) { // allocate 2 dimensional arrays for A, B and C double **temp = malloc(y * sizeof(double *)); if (temp == NULL) { printf("Malloc failure at allocTemp"); exit(0); } temp[0] = malloc(x*y*sizeof(double)); if (temp == NULL) { printf("Malloc failure at allocTemp"); exit(0); } for (int i = 1; i < x; i++) { temp[i] = temp[0] + (y * i); } return temp; } void sequential( ) { int numCols = GLOBALX+2; int numRows = GLOBALY+2; double** a = allocTemp(numRows, numCols); for (int i = 0; i < numRows; i++) { for (int j = 0; j < numCols; j++) { a[i][j] = 0.0; } } for (int i = 0; i < numCols; i++) { a[0][i] = 100.0; } for (int i = 0; i < numRows; i++) { a[i][0] = -40.0; a[i][numCols-1] = -40.0; } double t = MPI_Wtime( ); double currentAvg = 1000.0; double lastAvg = 0.0; while (fabs(currentAvg - lastAvg) > 0.0001) { // do an update for (int i = 1; i < numRows - 1; i++) { for (int j = 1; j < numCols - 1; j++) { a[i][j] = (a[i][j] + a[i-1][j] + a[i+1][j] + a[i][j+1] + a[i][j-1]) / 5.0; } } } t = MPI_Wtime( ) - t; printf("sequential time is %f, average is %f\n", t, currentAvg); for (int i = 0; i < numRows; i++) { free(a[i]); } free(a); } void initTemp(double** temp, int numRows, int numCols, int pid, int numP) { if (pid == 0) { // top part of the structure, initialize top row to 100 for (int i = 0; i < numCols; i++) { temp[0][i] = 100.0; } } else { // initialize with standard interior initial values of 0.0 for (int i = 0; i < numCols; i++) { temp[numRows-1][i] = 0.0; } } if (pid == numP-1) { // top part of the structure, initialize top row to 100 for (int i = 0; i < numCols; i++) { temp[0][i] = 0.0; } } else { // initialize with standard interior initial values of 0.0 for (int i = 0; i < numCols; i++) { temp[numRows-1][i] = 100.0; } } for (int i = 1; i < numCols-1; i++) { for (int j = 0; j < numRows; j++) { if ((j==1) || (j == numRows)) { temp[i][j] = -40.0; } else { temp[i][j] = 0.0; } } } } // sums the non-halo elemtns of the array float sumElements(double** a, int rows, int cols) { float tot = 0.0; for (int i = 1; i < rows-2; i++) { for (int j = 1; j < cols-2; j++) { tot += a[i][j]; } } return tot; } int main(int argc, char* argv[ ]) { // MPI book keeping variables int numP; int pid; MPI_Request sendUpR, sendDownR, recvUpR, recvDownR; MPI_Status sendUpS, sendDownS, recvUpS, recvDownS; // let's get a sequential time first // if (pid==0) { // sequential( ); // } // convergence checking variables double tot; double lastAvg = 1000.0; double currentAvg = 0; // MPI Initialization MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numP); MPI_Comm_rank(MPI_COMM_WORLD, &pid); int up, down; if (pid == 0) { up = -1; down = 1; } else if (pid == numP-1) { up = numP-2; down = -1; } else { up = pid-1; down = pid+1; } // compute the local arrays sizes int rowLB = LB(pid, numP, GLOBALX); int rowUB = UB(pid, numP, GLOBALX); int numRows = rowUB - rowLB + 3; // + 2 for the halo rows int colLB = 0; int colUB = GLOBALY-1; int numCols = colUB + 1; // we need an old and a new array double** oldTemp = allocTemp(numRows, numCols); double** newTemp = allocTemp(numRows, numCols); double** swpTemp; // initialize the arrays initTemp(oldTemp, numRows, numCols, pid, numP); initTemp(newTemp, numRows, numCols, pid, numP); double t = MPI_Wtime( ); while (fabs(currentAvg - lastAvg) > 0.0001) { if (pid == 0) { printf("average = %f\n", currentAvg); fflush(stdout); } // do an update for (int i = 1; i < numRows - 1; i++) { for (int j = 1; j < numCols - 1; j++) { newTemp[i][j] = (newTemp[i][j] + oldTemp[i-1][j] + oldTemp[i+1][j] + oldTemp[i][j+1] + oldTemp[i][j-1]) / 5.0; } } if (pid != 0) { // send the just computed non-halo top row up MPI_Isend(newTemp[1], numCols, MPI_DOUBLE, up, 0, MPI_COMM_WORLD, &sendUpR); } if (pid != numP-1) { // send the just computed non-halo bootom row down MPI_Isend(newTemp[numRows-2], numCols, MPI_DOUBLE, down, 0, MPI_COMM_WORLD, &sendDownR); } if (pid != 0) { // recv from the processor above into the bottom halo row MPI_Irecv(newTemp[numRows-1], numCols, MPI_DOUBLE, up, 0, MPI_COMM_WORLD, &recvUpR); } if (pid != numP-1) { // recv from the processor into the to halo row MPI_Irecv(newTemp[0], numCols, MPI_DOUBLE, down, 0, MPI_COMM_WORLD, &recvDownR); } // Wait for the send and recv operations to finish if (pid != 0) { MPI_Wait(&sendUpR, &sendUpS); MPI_Wait(&recvUpR, &recvUpS); } if (pid != numP-1) { MPI_Wait(&sendDownR, &sendDownS); MPI_Wait(&recvDownR, &recvDownS); } tot = sumElements(newTemp, numRows, numCols); lastAvg = currentAvg; MPI_Allreduce(&tot, ¤tAvg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); currentAvg = currentAvg / ((numRows-2)*(numCols-2)); } t = MPI_Wtime( ) - t; }