36double run(
int rank,
int *params);
37void write_output(
int nprocs,
int *params,
double *times);
39int main(
int argc,
char **argv)
47 MPI_Init(&argc, &argv);
48 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
49 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
51 double *times = (
double *)calloc(nprocs,
sizeof(
double));
55 printf(
"[Error] Input file argument is needed\n");
59 FILE *inpfile = fopen(argv[1],
"r");
60 if (inpfile == NULL) {
61 printf(
"[Error] Input file not found\n");
71 if (nprocs != dm*dn*dl) {
72 printf(
"[Error] Partitioning number mismatch\n");
73 printf(
"%d processors, but total %d partitioning\n", nprocs, dm*dn*dl);
77 printf(
"[Main] 3D hexahedral example starts...\n");
83 MPI_Bcast(&err, 1, MPI_INT,
root, MPI_COMM_WORLD);
84 if (err == 1)
return -1;
86 MPI_Bcast(¶ms, 7, MPI_INT,
root, MPI_COMM_WORLD);
88 avtimei =
run(rank, params);
90 MPI_Gather(&avtimei, 1, MPI_DOUBLE, times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
93 printf(
"[Main] Run finished. Writing result...\n");
103double run(
int rank,
int *params)
105 const double gamma = 1.4;
106 const double rho =
gamma;
107 const double u = 1.5;
108 const double v = 0.0;
109 const double w = 0.0;
110 const double p = 1.0;
111 const double et = p / (
gamma - 1) + 0.5*rho*u*u;
112 const int bm = params[0]/params[3];
113 const int bn = params[1]/params[4];
114 const int bl = params[2]/params[5];
115 const int neles = bm*bn*bl;
116 const int nstep = params[6];
120 if (rank == 0) printf(
"[Run] Allocating arrays...\n");
121 double **upts = (
double **)
malloc_2d(
nvars, neles,
sizeof(
double));
122 double **rhs = (
double **)
malloc_2d(
nvars, neles,
sizeof(
double));
123 double **dub = (
double **)
malloc_2d(
nvars, neles,
sizeof(
double));
124 double *diag = (
double *) calloc(neles,
sizeof(
double));
126 double *dt = (
double *) calloc(neles,
sizeof(
double));
130 int *mapping = (
int *)calloc(neles,
sizeof(
int));
131 int *unmapping = (
int *)calloc(neles,
sizeof(
int));
132 double *tarr = (
double *)malloc(
sizeof(
double)*nstep);
135 double *uptsp = &upts[0][0];
136 double *rhsp = &rhs[0][0];
137 double *dubp = &dub[0][0];
138 double *fsprp = &fspr[0][0];
139 double *fnp = &fnorm_vol[0][0];
140 double *vfp = &vec_fnorm[0][0][0];
141 int *nep = &nei_ele[0][0];
144 if (rank == 0) printf(
"[Run] Initializing arrays...\n");
145 for (
int i=0; i < neles; i++) {
167 fnorm_vol[j][i] = 1.0;
169 for (
int k=0; k<
ndims; k++) {
170 vec_fnorm[j][k][i] = 0.0;
174 vec_fnorm[0][1][i] = -1.0;
175 vec_fnorm[1][0][i] = -1.0;
176 vec_fnorm[2][2][i] = 1.0;
177 vec_fnorm[3][0][i] = 1.0;
178 vec_fnorm[4][2][i] = -1.0;
179 vec_fnorm[5][1][i] = 1.0;
182 if (rank == 0) printf(
"[Run] Constructing nei_ele array...\n");
185 if (rank == 0) printf(
"[Run] Processing Reverse Cuthill-McKee...\n");
188 if (rank == 0) printf(
"[Run] Starting iteration...\n");
189 MPI_Barrier(MPI_COMM_WORLD);
190 for (
int i=0; i<nstep; i++) {
194 nep, mapping, unmapping, fnp, vfp, \
195 uptsp, rhsp, dubp, diag, fsprp);
197 nep, mapping, unmapping, fnp, vfp, \
198 uptsp, rhsp, dubp, diag, fsprp);
200 MPI_Barrier(MPI_COMM_WORLD);
201 tarr[i] = (double) MPI_Wtime() - start;
204 for (
int i=0; i<nstep; i++)
206 avtime = avtime*1000/nstep;
236 int nstep = params[6];
242 sprintf(filename,
"MPIOUTPUT_c_%d_%d.txt", nprocs, neles);
243 FILE *outfile = fopen(filename,
"w");
244 fprintf(outfile,
"========= LU-SGS example output =========\n\n");
245 fprintf(outfile,
"*** Problem setup ***\n");
246 fprintf(outfile,
"Number of cores: %d\n", nprocs);
247 fprintf(outfile,
"Number of iteration step: %d\n", nstep);
248 fprintf(outfile,
"Number of elements: %d = %d x %d x %d\n", neles, m, n, l);
249 fprintf(outfile,
"Partitioning with %d: %d x %d x %d\n", dm*dn*dl, dm, dn, dl);
250 fprintf(outfile,
"Elements per core: %d = %d x %d x %d\n\n", neles/nprocs, m/dm, n/dn, l/dl);
251 fprintf(outfile,
"*** Average runtime for each processor [ms] ***\n");
252 for (
int i=0; i<nprocs; i++) {
254 fprintf(outfile,
"%lf\n", times[i]);
256 fprintf(outfile,
"\n*** Average runtime for entire processors [ms] ***\n");
257 fprintf(outfile,
"%lf\n", avtime/nprocs);
void dealloc_2d(void **mat)
Deallocate 2D array.
void ** malloc_2d(const size_t rows, const size_t cols, const size_t T)
Allocate 2D array.
void *** malloc_3d(const size_t rows, const size_t cols, const size_t depth, const size_t T)
Allocate 3D array.
void dealloc_3d(void ***mat)
Deallocate 3D array.
void serial_update(int neles, int nvars, double *uptsb, double *dub, double *subres)
Updates solution array.
void ns_serial_upper_sweep(int neles, int nfvars, int nface, int ndims, int *nei_ele, int *mapping, int *unmapping, double *fnorm_vol, double *vec_fnorm, double *uptsb, double *rhsb, double *dub, double *diag, double *fspr)
Upper sweep of LU-SGS method for Navier-Stokes equations.
void ns_serial_lower_sweep(int neles, int nfvars, int nface, int ndims, int *nei_ele, int *mapping, int *unmapping, double *fnorm_vol, double *vec_fnorm, double *uptsb, double *rhsb, double *dub, double *diag, double *fspr)
Lower sweep of LU-SGS method for Navier-Stokes equations.
void serial_pre_lusgs(int neles, int nface, double factor, double *fnorm_vol, double *dt, double *diag, double *fspr)
Computes Diagonal matrix for LU-SGS method.
Header file for serial LU-SGS method.
void write_output(int nprocs, int *params, double *times)
Write output file as txt format.
int main(int argc, char **argv)
double run(int rank, int *params)
void make_reordering(const int nele, const int nface, int **nei_ele, int *mapping, int *unmapping)
Reverse Cuthill-McKee algorithm using neighbor elements.
void make_nei_ele(const int m, const int n, const int l, int **nei_ele)
Computes neighbor cell elements array.