37double run(
int rank,
int *params);
38void write_output(
int nprocs,
int nthreads,
int *params,
double *times);
41int main(
int argc,
char **argv){
42 int nprocs, nthreads, rank;
48 MPI_Init(&argc, &argv);
49 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
50 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
52 double *times = (
double *)calloc(nprocs,
sizeof(
double));
56 printf(
"[Error] Input file argument is needed\n");
60 FILE *inpfile = fopen(argv[1],
"r");
61 if (inpfile == NULL) {
62 printf(
"[Error] Input file not found\n");
72 if (nprocs != dm*dn*dl) {
73 printf(
"[Error] Partitioning number mismatch:\n");
74 printf(
"%d processors, but total %d partitioning\n", nprocs, dm*dn*dl);
78 printf(
"[Main] 3D hexahedral example starts...\n");
84 MPI_Bcast(&err, 1, MPI_INT,
root, MPI_COMM_WORLD);
85 if (err == 1)
return -1;
87 MPI_Bcast(¶ms, 7, MPI_INT,
root, MPI_COMM_WORLD);
90 avtimei =
run(rank, params);
92 MPI_Gather(&avtimei, 1, MPI_DOUBLE, times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
95 printf(
"[Main] Run finished. Writing result...\n");
100 nthreads = omp_get_num_threads();
112double run(
int rank,
int *params) {
114 const double gamma = 1.4;
115 const double rho =
gamma;
116 const double u = 1.5;
117 const double v = 0.0;
118 const double w = 0.0;
119 const double p = 1.0;
120 const double et = p / (
gamma - 1) + 0.5*rho*u*u;
121 const int bm = params[0]/params[3];
122 const int bn = params[1]/params[4];
123 const int bl = params[2]/params[5];
124 const int neles = bm*bn*bl;
125 const int nstep = params[6];
130 if (rank == 0) printf(
"[Run] Allocating arrays...\n");
131 double **upts = (
double **)
malloc_2d(
nvars, neles,
sizeof(
double));
132 double **rhs = (
double **)
malloc_2d(
nvars, neles,
sizeof(
double));
133 double **dub = (
double **)
malloc_2d(
nvars, neles,
sizeof(
double));
134 double *diag = (
double *) calloc(neles,
sizeof(
double));
136 double *dt = (
double *) calloc(neles,
sizeof(
double));
140 int *icolor = (
int *)calloc(neles,
sizeof(
int));
141 int *lcolor = (
int *)calloc(neles,
sizeof(
int));
142 double *tarr = (
double *)malloc(
sizeof(
double)*nstep);
145 double *uptsp = &upts[0][0];
146 double *rhsp = &rhs[0][0];
147 double *dubp = &dub[0][0];
148 double *fsprp = &fspr[0][0];
149 double *fnp = &fnorm_vol[0][0];
150 double *vfp = &vec_fnorm[0][0][0];
151 int *nep = &nei_ele[0][0];
154 if (rank == 0) printf(
"[Run] Initializing arrays...\n");
155 for (
int i=0; i < neles; i++) {
177 fnorm_vol[j][i] = 1.0;
179 for (
int k=0; k<
ndims; k++){
180 vec_fnorm[j][k][i] = 0.0;
184 vec_fnorm[0][1][i] = -1.0;
185 vec_fnorm[1][0][i] = -1.0;
186 vec_fnorm[2][2][i] = 1.0;
187 vec_fnorm[3][0][i] = 1.0;
188 vec_fnorm[4][2][i] = -1.0;
189 vec_fnorm[5][1][i] = 1.0;
192 if (rank == 0) printf(
"[Run] Constructing nei_ele array...\n");
195 if (rank == 0) printf(
"[Run] Processing multi-coloring algorithm...\n");
198 if (rank == 0) printf(
"[Run] Starting iteration...\n");
199 MPI_Barrier(MPI_COMM_WORLD);
200 for (
int i=0; i<nstep; i++) {
201 start = omp_get_wtime();
204 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
206 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
208 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
210 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
212 MPI_Barrier(MPI_COMM_WORLD);
213 tarr[i] = (double) omp_get_wtime() - start;
217 for (
int i=0; i<nstep; i++)
219 avtime = avtime*1000.0/nstep;
247 int nstep = params[6];
253 sprintf(filename,
"OMPOUTPUT_c_%d_%d.txt", nprocs, neles);
254 FILE *outfile = fopen(filename,
"w");
255 fprintf(outfile,
"========= Colored LU-SGS example output =========\n\n");
256 fprintf(outfile,
"*** Problem setup ***\n");
257 fprintf(outfile,
"Number of cores: %d\n", nprocs);
258 fprintf(outfile,
"Number of threads: %d\n", nthreads*nprocs);
259 fprintf(outfile,
"Number of threads per core: %d\n", nthreads);
260 fprintf(outfile,
"Number of iteration step: %d\n", nstep);
261 fprintf(outfile,
"Number of elements: %d = %d x %d x %d\n", neles, m, n, l);
262 fprintf(outfile,
"Partitioning with %d: %d x %d x %d\n", dm*dn*dl, dm, dn, dl);
263 fprintf(outfile,
"Elements per core: %d = %d x %d x %d\n\n", neles/nprocs, m/dm, n/dn, l/dl);
264 fprintf(outfile,
"*** Average runtime for each processor [ms] ***\n");
265 for (
int i=0; i<nprocs; i++) {
267 fprintf(outfile,
"%lf\n", times[i]);
269 fprintf(outfile,
"\n*** Average runtime for entire processors [ms] ***\n");
270 fprintf(outfile,
"%lf\n", avtime/nprocs);
void dealloc_2d(void **mat)
Deallocate 2D array.
void ** malloc_2d(const size_t rows, const size_t cols, const size_t T)
Allocate 2D array.
void *** malloc_3d(const size_t rows, const size_t cols, const size_t depth, const size_t T)
Allocate 3D array.
void dealloc_3d(void ***mat)
Deallocate 3D array.
void parallel_update(int neles, int nvars, double *uptsb, double *dub, double *subres)
void parallel_pre_lusgs(int neles, int nface, double factor, double *fnorm_vol, double *dt, double *diag, double *fspr)
Computes Diagonal matrix for Colored LU-SGS method.
void ns_parallel_upper_sweep(int n0, int ne, int neles, int nfvars, int nface, int ndims, int *nei_ele, int *icolor, int *lcolor, double *fnorm_vol, double *vec_fnorm, double *uptsb, double *rhsb, double *dub, double *diag, double *fspr)
Upper sweep of Colored LU-SGS method for Navier-Stokes equations.
void ns_parallel_lower_sweep(int n0, int ne, int neles, int nfvars, int nface, int ndims, int *nei_ele, int *icolor, int *lcolor, double *fnorm_vol, double *vec_fnorm, double *uptsb, double *rhsb, double *dub, double *diag, double *fspr)
Lower sweep of Colored LU-SGS method for Navier-Stokes equations.
Header file for Colored LU-SGS method.
int main(int argc, char **argv)
void write_output(int nprocs, int nthreads, int *params, double *times)
double run(int rank, int *params)
void make_coloring(const int m, const int n, const int l, int *icolor, int *lcolor)
Coloring algorithm for unstructured grid.
void make_nei_ele(const int m, const int n, const int l, int **nei_ele)
Computes neighbor cell elements array.