UCFD_SPARSE  v1.0
Documentation
Loading...
Searching...
No Matches
omp3d.c
Go to the documentation of this file.
1
21#include <math.h>
22#include <time.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <mpi.h>
26#include <omp.h>
27#include "pbutils.h"
28#include "readinput.h"
29#include "arrays.h"
30#include "coloredlusgs.h"
31
32#define nvars 5
33#define ndims 3
34#define ncellface 6
35#define root 0
36
37double run(int rank, int *params);
38void write_output(int nprocs, int nthreads, int *params, double *times);
39
40
41int main(int argc, char **argv){
42 int nprocs, nthreads, rank;
43 int dm, dn, dl;
44 int err = 0;
45 int params[7];
46 double avtimei;
47
48 MPI_Init(&argc, &argv);
49 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
50 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
51
52 double *times = (double *)calloc(nprocs, sizeof(double));
53
54 if (rank == 0) {
55 if (argc < 2) {
56 printf("[Error] Input file argument is needed\n");
57 err = 1;
58 }
59 else {
60 FILE *inpfile = fopen(argv[1], "r");
61 if (inpfile == NULL) {
62 printf("[Error] Input file not found\n");
63 err = 1;
64 }
65 else {
66 input_params(inpfile, params);
67 dm = params[3];
68 dn = params[4];
69 dl = params[5];
70 fclose(inpfile);
71
72 if (nprocs != dm*dn*dl) {
73 printf("[Error] Partitioning number mismatch:\n");
74 printf("%d processors, but total %d partitioning\n", nprocs, dm*dn*dl);
75 err = 1;
76 }
77 else {
78 printf("[Main] 3D hexahedral example starts...\n");
79 }
80 }
81 }
82 }
83
84 MPI_Bcast(&err, 1, MPI_INT, root, MPI_COMM_WORLD);
85 if (err == 1) return -1;
86
87 MPI_Bcast(&params, 7, MPI_INT, root, MPI_COMM_WORLD);
88
89 /* Main function */
90 avtimei = run(rank, params);
91
92 MPI_Gather(&avtimei, 1, MPI_DOUBLE, times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
93
94 if (rank == 0) {
95 printf("[Main] Run finished. Writing result...\n");
96 #pragma omp parallel
97 {
98 #pragma omp master
99 {
100 nthreads = omp_get_num_threads();
101 }
102 }
103 write_output(nprocs, nthreads, params, times);
104 }
105
106 MPI_Finalize();
107 free(times);
108 return 0;
109}
110
111
112double run(int rank, int *params) {
113 // Constants
114 const double gamma = 1.4;
115 const double rho = gamma;
116 const double u = 1.5;
117 const double v = 0.0;
118 const double w = 0.0;
119 const double p = 1.0;
120 const double et = p / (gamma - 1) + 0.5*rho*u*u;
121 const int bm = params[0]/params[3];
122 const int bn = params[1]/params[4];
123 const int bl = params[2]/params[5];
124 const int neles = bm*bn*bl;
125 const int nstep = params[6];
126 double start;
127 double avtime = 0.0;
128
129 // Arrays
130 if (rank == 0) printf("[Run] Allocating arrays...\n");
131 double **upts = (double **) malloc_2d(nvars, neles, sizeof(double));
132 double **rhs = (double **) malloc_2d(nvars, neles, sizeof(double));
133 double **dub = (double **) malloc_2d(nvars, neles, sizeof(double));
134 double *diag = (double *) calloc(neles, sizeof(double));
135 double **fspr = (double **) malloc_2d(ncellface, neles, sizeof(double));
136 double *dt = (double *) calloc(neles, sizeof(double));
137 double **fnorm_vol = (double **) malloc_2d(ncellface, neles, sizeof(double));
138 double ***vec_fnorm = (double ***) malloc_3d(ncellface, ndims, neles, sizeof(double));
139 int **nei_ele = (int **) malloc_2d(ncellface, neles, sizeof(int));
140 int *icolor = (int *)calloc(neles, sizeof(int));
141 int *lcolor = (int *)calloc(neles, sizeof(int));
142 double *tarr = (double *)malloc(sizeof(double)*nstep);
143
144 /* Pointer of each array */
145 double *uptsp = &upts[0][0];
146 double *rhsp = &rhs[0][0];
147 double *dubp = &dub[0][0];
148 double *fsprp = &fspr[0][0];
149 double *fnp = &fnorm_vol[0][0];
150 double *vfp = &vec_fnorm[0][0][0];
151 int *nep = &nei_ele[0][0];
152
153 // Initialize
154 if (rank == 0) printf("[Run] Initializing arrays...\n");
155 for (int i=0; i < neles; i++) {
156 upts[0][i] = rho;
157 upts[1][i] = rho*u;
158 upts[2][i] = rho*v;
159 upts[3][i] = rho*w;
160 upts[4][i] = et;
161
162 rhs[0][i] = rho;
163 rhs[1][i] = rho*u;
164 rhs[2][i] = rho*v;
165 rhs[3][i] = rho*w;
166 rhs[4][i] = et;
167
168 dub[0][i] = rho;
169 dub[1][i] = rho*u;
170 dub[2][i] = rho*v;
171 dub[3][i] = rho*w;
172 dub[4][i] = et;
173
174 dt[i] = 0.1;
175
176 for (int j=0; j<ncellface; j++){
177 fnorm_vol[j][i] = 1.0;
178 fspr[j][i] = 1.0;
179 for (int k=0; k<ndims; k++){
180 vec_fnorm[j][k][i] = 0.0;
181 }
182 }
183
184 vec_fnorm[0][1][i] = -1.0;
185 vec_fnorm[1][0][i] = -1.0;
186 vec_fnorm[2][2][i] = 1.0;
187 vec_fnorm[3][0][i] = 1.0;
188 vec_fnorm[4][2][i] = -1.0;
189 vec_fnorm[5][1][i] = 1.0;
190 }
191
192 if (rank == 0) printf("[Run] Constructing nei_ele array...\n");
193 make_nei_ele(bm, bn, bl, nei_ele);
194
195 if (rank == 0) printf("[Run] Processing multi-coloring algorithm...\n");
196 make_coloring(bm, bn, bl, icolor, lcolor);
197
198 if (rank == 0) printf("[Run] Starting iteration...\n");
199 MPI_Barrier(MPI_COMM_WORLD);
200 for (int i=0; i<nstep; i++) {
201 start = omp_get_wtime();
202 parallel_pre_lusgs(neles, ncellface, 1.0, fnp, dt, diag, fsprp);
203 ns_parallel_lower_sweep(0, (int)neles/2, neles, nvars, ncellface, ndims, \
204 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
205 ns_parallel_lower_sweep((int)neles/2, neles, neles, nvars, ncellface, ndims, \
206 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
207 ns_parallel_upper_sweep((int)neles/2, neles, neles, nvars, ncellface, ndims, \
208 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
209 ns_parallel_upper_sweep(0, (int)neles/2, neles, nvars, ncellface, ndims, \
210 nep, icolor, lcolor, fnp, vfp, uptsp, rhsp, dubp, diag, fsprp);
211 parallel_update(neles, nvars, uptsp, rhsp);
212 MPI_Barrier(MPI_COMM_WORLD);
213 tarr[i] = (double) omp_get_wtime() - start;
214 }
215
216 // Computes average time
217 for (int i=0; i<nstep; i++)
218 avtime += tarr[i];
219 avtime = avtime*1000.0/nstep;
220
221 //deallocate array
222 dealloc_2d((void **) upts);
223 dealloc_2d((void **) rhs);
224 dealloc_2d((void **) dub);
225 dealloc_2d((void **) fspr);
226 dealloc_2d((void **) fnorm_vol);
227 dealloc_2d((void **) nei_ele);
228 dealloc_3d((void ***) vec_fnorm);
229 free(diag);
230 free(dt);
231 free(icolor);
232 free(lcolor);
233 free(tarr);
234
235 return avtime;
236}
237
238
239void write_output(int nprocs, int nthreads, int *params, double *times)
240{
241 int m = params[0];
242 int n = params[1];
243 int l = params[2];
244 int dm = params[3];
245 int dn = params[4];
246 int dl = params[5];
247 int nstep = params[6];
248 int neles = m*n*l;
249 double avtime = 0.0;
250 char filename[100];
251
252 // Make filename
253 sprintf(filename, "OMPOUTPUT_c_%d_%d.txt", nprocs, neles);
254 FILE *outfile = fopen(filename, "w");
255 fprintf(outfile, "========= Colored LU-SGS example output =========\n\n");
256 fprintf(outfile, "*** Problem setup ***\n");
257 fprintf(outfile, "Number of cores: %d\n", nprocs);
258 fprintf(outfile, "Number of threads: %d\n", nthreads*nprocs);
259 fprintf(outfile, "Number of threads per core: %d\n", nthreads);
260 fprintf(outfile, "Number of iteration step: %d\n", nstep);
261 fprintf(outfile, "Number of elements: %d = %d x %d x %d\n", neles, m, n, l);
262 fprintf(outfile, "Partitioning with %d: %d x %d x %d\n", dm*dn*dl, dm, dn, dl);
263 fprintf(outfile, "Elements per core: %d = %d x %d x %d\n\n", neles/nprocs, m/dm, n/dn, l/dl);
264 fprintf(outfile, "*** Average runtime for each processor [ms] ***\n");
265 for (int i=0; i<nprocs; i++) {
266 avtime += times[i];
267 fprintf(outfile, "%lf\n", times[i]);
268 }
269 fprintf(outfile, "\n*** Average runtime for entire processors [ms] ***\n");
270 fprintf(outfile, "%lf\n", avtime/nprocs);
271
272 fclose(outfile);
273}
274
275
void dealloc_2d(void **mat)
Deallocate 2D array.
Definition: arrays.c:93
void ** malloc_2d(const size_t rows, const size_t cols, const size_t T)
Allocate 2D array.
Definition: arrays.c:33
void *** malloc_3d(const size_t rows, const size_t cols, const size_t depth, const size_t T)
Allocate 3D array.
Definition: arrays.c:52
void dealloc_3d(void ***mat)
Deallocate 3D array.
Definition: arrays.c:104
void parallel_update(int neles, int nvars, double *uptsb, double *dub, double *subres)
void parallel_pre_lusgs(int neles, int nface, double factor, double *fnorm_vol, double *dt, double *diag, double *fspr)
Computes Diagonal matrix for Colored LU-SGS method.
Definition: coloredlusgs.c:47
void ns_parallel_upper_sweep(int n0, int ne, int neles, int nfvars, int nface, int ndims, int *nei_ele, int *icolor, int *lcolor, double *fnorm_vol, double *vec_fnorm, double *uptsb, double *rhsb, double *dub, double *diag, double *fspr)
Upper sweep of Colored LU-SGS method for Navier-Stokes equations.
Definition: coloredlusgs.c:220
void ns_parallel_lower_sweep(int n0, int ne, int neles, int nfvars, int nface, int ndims, int *nei_ele, int *icolor, int *lcolor, double *fnorm_vol, double *vec_fnorm, double *uptsb, double *rhsb, double *dub, double *diag, double *fspr)
Lower sweep of Colored LU-SGS method for Navier-Stokes equations.
Definition: coloredlusgs.c:82
Header file for Colored LU-SGS method.
#define gamma
Definition: flux.c:24
#define ncellface
Definition: omp3d.c:34
int main(int argc, char **argv)
Definition: omp3d.c:41
#define ndims
Definition: omp3d.c:33
#define nvars
Definition: omp3d.c:32
#define root
Definition: omp3d.c:35
void write_output(int nprocs, int nthreads, int *params, double *times)
Definition: omp3d.c:239
double run(int rank, int *params)
Definition: omp3d.c:112
void make_coloring(const int m, const int n, const int l, int *icolor, int *lcolor)
Coloring algorithm for unstructured grid.
Definition: pbutils.c:153
void make_nei_ele(const int m, const int n, const int l, int **nei_ele)
Computes neighbor cell elements array.
Definition: pbutils.c:36
void input_params(FILE *fp, int *vars)
Definition: readinput.c:27