1from ctypes
import CDLL, c_double, c_void_p, c_int
8from utils.pbutils
import make_nei_ele, read_input, make_coloring
11def run(params, comm, lib):
13 nvars, nface, ndims = 5, 6, 3
15 nx, ny, nz, npx, npy, npz, nstep = params
16 m, n, l = int(nx/npx), int(ny/npy), int(nz/npz)
18 rank = comm.Get_rank()
21 update = lib.parallel_update
22 pre_lusgs = lib.parallel_pre_lusgs
23 lower_sweep = lib.ns_parallel_lower_sweep
24 upper_sweep = lib.ns_parallel_upper_sweep
26 update.argtypes = [c_int, c_int, c_void_p, c_void_p]
27 pre_lusgs.argtypes = [c_int, c_int, c_double, \
28 c_void_p, c_void_p, c_void_p, c_void_p]
29 lower_sweep.argtypes = [c_int, c_int, c_int, c_int, c_int, c_int, \
30 c_void_p, c_void_p, c_void_p, c_void_p, c_void_p, \
31 c_void_p, c_void_p, c_void_p, c_void_p, c_void_p]
32 upper_sweep.argtypes = [c_int, c_int, c_int, c_int, c_int, c_int, \
33 c_void_p, c_void_p, c_void_p, c_void_p, c_void_p, \
34 c_void_p, c_void_p, c_void_p, c_void_p, c_void_p]
37 print(
"[Run] Allocating arrays...")
40 nei_ele = np.empty((nface, nele), dtype=np.int32)
41 icolor = np.empty((nele,), dtype=np.int32)
42 lcolor = np.empty((nele,), dtype=np.int32)
45 rho, u, v, w, p = gamma, 1.5, 0, 0, 1
46 et = p / (gamma - 1) + 0.5*rho*u**2
47 upts = np.empty((nvars, nele), dtype=np.float64)
48 rhs = np.empty_like(upts)
49 dub = np.empty_like(upts)
50 diag = np.ones((nele,), dtype=np.float64)
51 fspr = np.ones((6, nele), dtype=np.float64)
52 dt = np.ones((nele,), dtype=np.float64)*0.1
53 fnorm_vol = np.ones((nface, nele), dtype=np.float64)
54 vfi = np.array([[0, -1, 0], [-1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, -1], [0, 1, 0]], dtype=np.float64)
55 vec_fnorm = np.repeat(vfi[:, :, np.newaxis], nele, axis=2)
59 print(
"[Run] Initializing arrays...")
60 upts[0] = rhs[0] = dub[0] = rho
61 upts[1] = rhs[1] = dub[1] = rho*u
62 upts[2] = rhs[2] = dub[2] = rho*v
63 upts[3] = rhs[3] = dub[3] = rho*w
64 upts[4] = rhs[4] = dub[4] = et
66 make_nei_ele(m, n, l, nei_ele)
67 make_coloring(m, n, l, icolor, lcolor)
71 print(
"[Run] Starting iteration...")
72 tarr = np.zeros(nstep, dtype=np.float64)
74 for i
in range(nstep):
76 pre_lusgs(nele, nface, 1.0, fnorm_vol.ctypes.data, dt.ctypes.data, \
77 diag.ctypes.data, fspr.ctypes.data)
78 lower_sweep(0, nele//2, nele, nvars, nface, ndims, nei_ele.ctypes.data, \
79 icolor.ctypes.data, lcolor.ctypes.data, fnorm_vol.ctypes.data, \
80 vec_fnorm.ctypes.data, upts.ctypes.data, rhs.ctypes.data, \
81 dub.ctypes.data, diag.ctypes.data, fspr.ctypes.data)
82 lower_sweep(nele//2, nele, nele, nvars, nface, ndims, nei_ele.ctypes.data, \
83 icolor.ctypes.data, lcolor.ctypes.data, fnorm_vol.ctypes.data, \
84 vec_fnorm.ctypes.data, upts.ctypes.data, rhs.ctypes.data, \
85 dub.ctypes.data, diag.ctypes.data, fspr.ctypes.data)
86 upper_sweep(nele//2, nele, nele, nvars, nface, ndims, nei_ele.ctypes.data, \
87 icolor.ctypes.data, lcolor.ctypes.data, fnorm_vol.ctypes.data, \
88 vec_fnorm.ctypes.data, upts.ctypes.data, rhs.ctypes.data, \
89 dub.ctypes.data, diag.ctypes.data, fspr.ctypes.data)
90 upper_sweep(0, nele//2, nele, nvars, nface, ndims, nei_ele.ctypes.data, \
91 icolor.ctypes.data, lcolor.ctypes.data, fnorm_vol.ctypes.data, \
92 vec_fnorm.ctypes.data, upts.ctypes.data, rhs.ctypes.data, \
93 dub.ctypes.data, diag.ctypes.data, fspr.ctypes.data)
94 update(nele, nvars, upts.ctypes.data, rhs.ctypes.data)
96 tarr[i] = time.time() - start
99 return tarr[15:].mean()*1000
101 return tarr.mean()*1000
105 m, n, l, dm, dn, dl, nstep = params
108 f = open(
"OMPOUTPUT_py_{}_{}.txt".format(nb.get_num_threads()*nprocs, neles),
'w')
109 f.write(
"========= LU-SGS example output =========\n\n")
110 f.write(
"*** Problem setup ***\n")
111 f.write(
"Number of cores: {}\n".format(nprocs))
112 f.write(
"Number of threads: {}\n".format(nb.get_num_threads()*nprocs))
113 f.write(
"Number of threads per core: {}\n".format(nb.get_num_threads()))
114 f.write(
"Number of iteration step: {}\n".format(nstep))
115 f.write(
"Number of elements: {} = {} x {} x {}\n".format(neles, m, n, l))
116 f.write(
"Partitioning with {}: {} x {} x {}\n".format(dm*dn*dl, dm, dn, dl))
117 f.write(
"Elements per core: {} = {} x {} x {}\n\n".format(neles//nprocs, m//dm, n//dn, l//dl))
118 f.write(
"*** Average runtime for each processor [ms] ***\n")
119 for i
in range(nprocs):
120 f.write(
"{:.6f}\n".format(times[i]))
121 f.write(
"\n*** Average runtime for entire processors [ms] ***\n")
122 f.write(
"{:.6f}\n".format(sum(times)/nprocs))
126if __name__ ==
"__main__":
128 comm = MPI.COMM_WORLD
129 rank = comm.Get_rank()
130 nproc = comm.Get_size()
133 if (len(sys.argv)) < 3:
134 print(
"[Error] Input file, dynamic library argument is needed")
137 inputname = sys.argv[1]
138 params = read_input(inputname)
146 if nproc != dm*dn*dl:
147 print(
"[Error] Partitioning number mismatch")
148 print(
"{} processors, but total {} partitioning".format(nproc, dm*dn*dl))
151 print(
"[Main] 3D hexahedral example starts...")
155 params = np.empty(7, dtype=np.int64)
157 err = comm.bcast(err, root=0)
161 if len(sys.argv) > 2:
162 nthreads = eval(sys.argv[3].split(
'=')[-1])
163 nb.set_num_threads(nthreads)
165 params = comm.bcast(params, root=0)
169 lib = CDLL(sys.argv[2])
170 except FileNotFoundError:
171 print(
"[Error] Dynamic library file not found")
174 avtimei =
run(params, comm, lib)
175 tbuf = comm.gather(avtimei)
178 print(
"[Main] Run finished. Writing result...")
def write_output(nprocs, params, times)
def run(params, comm, lib)