UCFD_SPARSE  v1.0
Documentation
Loading...
Searching...
No Matches
gmres.c
Go to the documentation of this file.
1
2// Doxygen comments
3
4
5
6
7#include <stdio.h>
8#include <math.h>
9#include "mkl.h"
10#include "gmres.h"
11#include "precon.h"
12
13
14
15ucfd_status_t serial_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, sparse_matrix_t precon, const int n, const int m, \
16 const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
17{
18 int it, i, j, err;
19 double beta, tmp, c, s, h1, h2, rr;
20 sparse_status_t status;
21
22 // Sparse matrix description for system matrix A(op)
23 struct matrix_descr descr;
24 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
25 descr.mode = 0;
26 descr.diag = 0;
27
28 // Computes residual : r := rhsb - A @ dub
29 cblas_dcopy(n, rhsb, 1, r, 1);
30 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
31 if (status != SPARSE_STATUS_SUCCESS) printf("Residual computation error\n");
32
33 for (it=0; it<itmax; it++) {
34 beta = cblas_dnrm2(n, r, 1);
35 printf("%d iteration, %.10f residual\n", it, beta);
36 if (beta < tol) return UCFD_STATUS_CONVERGED;
37
38 if (precon_type == ILU0) {
39 ilu0_sweep_bsr_mkl(precon, r);
40 beta = cblas_dnrm2(n, r, 1);
41 }
42 else if (precon_type == LUSGS) {
43 // TODO : LU-SGS preconditioner function
44 beta = cblas_dnrm2(n, r, 1);
45 }
46
47 y[0] = beta;
48
49 // V = r/beta
50 cblas_dcopy(n, r, 1, V, 1);
51 cblas_dscal(n, 1/beta, V, 1);
52
53 for (j=0; j<m; j++) {
54 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j*n], 0.0, w);
55 if (status != SPARSE_STATUS_SUCCESS) return -1;
56
57 if (precon_type == ILU0) {
58 ilu0_sweep_bsr_mkl(precon, w);
59 }
60 else if (precon_type == LUSGS) {
61 // TODO : LU-SGS preconditioner function
62 }
63
64 // Arnoldi iteration
65 for (i=0; i<(j+1); i++) {
66 tmp = cblas_ddot(n, w, 1, &V[i*n], 1);
67 H[j+m*i] = tmp;
68 cblas_daxpy(n, -tmp, &V[i*n], 1, w, 1);
69 }
70
71 tmp = cblas_dnrm2(n, w, 1);
72 H[j+(j+1)*m] = tmp;
73 cblas_dcopy(n, w, 1, &V[(j+1)*n], 1);
74 cblas_dscal(n, 1/tmp, &V[(j+1)*n], 1);
75
76 // Givens Rotation
77 for (i=0; i<j; i++) {
78 c = g[i*2]; // g[i, 0]
79 s = g[i*2+1]; // g[i, 1]
80 h1 = H[j+i*m];
81 h2 = H[j+(i+1)*m];
82 H[j+i*m] = c*h1 - s*h2;
83 H[j+(i+1)*m] = s*h1 + c*h2;
84 }
85
86 h1 = H[j*(m+1)];
87 h2 = H[j+(j+1)*m];
88 rr = sqrt(h1*h1 + h2*h2);
89 c = h1/rr;
90 s = -h2/rr;
91 H[j*(m+1)] = c*h1 - s*h2;
92 g[j*2] = c;
93 g[j*2+1] = s;
94
95 // Modify e1 vector
96 y[j+1] = y[j];
97 y[j] *= c;
98 y[j+1] *= s;
99 }
100
101 // Back substitution
102 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
103
104 // Update
105 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, dub, 1);
106
107 // Computes next iteration residual
108 cblas_dcopy(n, rhsb, 1, r, 1);
109 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
110 if (status != SPARSE_STATUS_SUCCESS) printf("Residual computation error\n");
111 }
112
113 if (it == itmax){
114 printf("Not converged in %d iteration, residual=%.5f", it, beta);
115 return it;
116 }
117
118 return UCFD_STATUS_ERROR;
119}
120
121
122ucfd_status_t serial_gmres2(sparse_matrix_t op, ucfd_precon_type_t precon_type, const int neles, const int nvars, const int m, \
123 const int *row_ptr, const int *col_ind, const int *diag_ind, double *pre_nnz_data, \
124 const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
125{
126 int it, i, j, err;
127 const int n = neles * nvars;
128 double beta, tmp, c, s, h1, h2, rr;
129 sparse_status_t status;
130
131 // Sparse matrix description for system matrix A(op)
132 struct matrix_descr descr;
133 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
134 descr.mode = 0;
135 descr.diag = 0;
136
137 // Computes residual : r := rhsb - A @ dub
138 cblas_dcopy(n, rhsb, 1, r, 1);
139 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
140 if (status != SPARSE_STATUS_SUCCESS) printf("Residual computation error\n");
141
142 for (it=0; it<itmax; it++) {
143 beta = cblas_dnrm2(n, r, 1);
144 // printf("%d iteration, %.10f residual\n", it, beta);
145 if (beta < tol) return UCFD_STATUS_CONVERGED;
146
147 if (precon_type == ILU0) {
148 ilu0_sweep_bsr(neles, nvars, row_ptr, col_ind, diag_ind, pre_nnz_data, r);
149 beta = cblas_dnrm2(n, r, 1);
150 }
151 else if (precon_type == LUSGS) {
152 // TODO : LU-SGS preconditioner function
153 beta = cblas_dnrm2(n, r, 1);
154 }
155
156 y[0] = beta;
157
158 // V = r/beta
159 cblas_dcopy(n, r, 1, V, 1);
160 cblas_dscal(n, 1/beta, V, 1);
161
162 for (j=0; j<m; j++) {
163 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j*n], 0.0, w);
164 if (status != SPARSE_STATUS_SUCCESS) return -1;
165
166 if (precon_type == ILU0) {
167 ilu0_sweep_bsr(neles, nvars, row_ptr, col_ind, diag_ind, pre_nnz_data, w);
168 }
169 else if (precon_type == LUSGS) {
170 // TODO : LU-SGS preconditioner function
171 }
172
173 // Arnoldi iteration
174 for (i=0; i<(j+1); i++) {
175 tmp = cblas_ddot(n, w, 1, &V[i*n], 1);
176 H[j+m*i] = tmp;
177 cblas_daxpy(n, -tmp, &V[i*n], 1, w, 1);
178 }
179
180 tmp = cblas_dnrm2(n, w, 1);
181 H[j+(j+1)*m] = tmp;
182 cblas_dcopy(n, w, 1, &V[(j+1)*n], 1);
183 cblas_dscal(n, 1/tmp, &V[(j+1)*n], 1);
184
185 // Givens Rotation
186 for (i=0; i<j; i++) {
187 c = g[i*2]; // g[i, 0]
188 s = g[i*2+1]; // g[i, 1]
189 h1 = H[j+i*m];
190 h2 = H[j+(i+1)*m];
191 H[j+i*m] = c*h1 - s*h2;
192 H[j+(i+1)*m] = s*h1 + c*h2;
193 }
194
195 h1 = H[j*(m+1)];
196 h2 = H[j+(j+1)*m];
197 rr = sqrt(h1*h1 + h2*h2);
198 c = h1/rr;
199 s = -h2/rr;
200 H[j*(m+1)] = c*h1 - s*h2;
201 g[j*2] = c;
202 g[j*2+1] = s;
203
204 // Modify e1 vector
205 y[j+1] = y[j];
206 y[j] *= c;
207 y[j+1] *= s;
208 }
209
210 // Back substitution
211 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
212
213 // Update
214 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, dub, 1);
215
216 // Computes next iteration residual
217 cblas_dcopy(n, rhsb, 1, r, 1);
218 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
219 if (status != SPARSE_STATUS_SUCCESS) printf("Residual computation error\n");
220 }
221
222 if (it == itmax){
223 printf("Not converged in %d iteration, residual=%.5f", it, beta);
224 return it;
225 }
226
227 return UCFD_STATUS_ERROR;
228}
229
230
231ucfd_status_t single_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, sparse_matrix_t precon, const int n, const int m, \
232 const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
233{
234 int it, i, j, err;
235 double beta, tmp, c, s, h1, h2, rr;
236 sparse_status_t status;
237
238 // Sparse matrix description for system matrix A(op)
239 struct matrix_descr descr;
240 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
241 descr.mode = 0;
242 descr.diag = 0;
243
244 // Computes residual : r := rhsb - A @ dub
245 cblas_dcopy(n, rhsb, 1, r, 1);
246 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
247 if (status != SPARSE_STATUS_SUCCESS) printf("Residual computation error\n");
248
249 beta = cblas_dnrm2(n, r, 1);
250
251 if (precon_type == ILU0) {
252 ilu0_sweep_bsr_mkl(precon, r);
253 beta = cblas_dnrm2(n, r, 1);
254 }
255 else if (precon_type == LUSGS) {
256 // TODO : LU-SGS preconditioner function
257 beta = cblas_dnrm2(n, r, 1);
258 }
259
260 // beta = cblas_dnrm2(n, r, 1);
261 y[0] = beta;
262
263 // V = r/beta
264 cblas_dcopy(n, r, 1, V, 1);
265 cblas_dscal(n, 1/beta, V, 1);
266
267 for (j=0; j<m; j++) {
268 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j*n], 0.0, w);
269 if (status != SPARSE_STATUS_SUCCESS)
270 return -1;
271
272 if (precon_type == ILU0) {
273 ilu0_sweep_bsr_mkl(precon, w);
274 }
275 else if (precon_type == LUSGS) {
276 // TODO : LU-SGS preconditioner function
277 }
278
279 // Arnoldi iteration
280 for (i=0; i<j+1; i++) {
281 tmp = cblas_ddot(n, w, 1, &V[i*n], 1);
282 H[j+m*i] = tmp;
283 cblas_daxpy(n, -tmp, &V[i*n], 1, w, 1);
284 }
285
286 tmp = cblas_dnrm2(n, w, 1);
287 H[j+(j+1)*m] = tmp;
288 cblas_dcopy(n, w, 1, &V[(j+1)*n], 1);
289 cblas_dscal(n, 1/tmp, &V[(j+1)*n], 1);
290
291 // Givens Rotation
292 for (i=0; i<j; i++) {
293 c = g[i*2];
294 s = g[i*2+1];
295 h1 = H[j+i*m];
296 h2 = H[j+(i+1)*m];
297 H[j+i*m] = c*h1 - s*h2;
298 H[j+(i+1)*m] = s*h1 + c*h2;
299 }
300
301 h1 = H[j*(m+1)];
302 h2 = H[j+(j+1)*m];
303 rr = sqrt(h1*h1 + h2*h2);
304 c = h1/rr;
305 s = -h2/rr;
306 H[j*(m+1)] = c*h1 - s*h2;
307 g[j*2] = c;
308 g[j*2+1] = s;
309
310 // Modify e1 vector
311 y[j+1] = y[j];
312 y[j] *= c;
313 y[j+1] *= s;
314 }
315
316 // Back substitution
317 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
318
319 // Update
320 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, dub, 1);
321
322 return UCFD_STATUS_SUCCESS;
323}
ucfd_status_t serial_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, sparse_matrix_t precon, const int n, const int m, const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
Definition: gmres.c:15
ucfd_status_t single_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, sparse_matrix_t precon, const int n, const int m, const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
Definition: gmres.c:231
ucfd_status_t serial_gmres2(sparse_matrix_t op, ucfd_precon_type_t precon_type, const int neles, const int nvars, const int m, const int *row_ptr, const int *col_ind, const int *diag_ind, double *pre_nnz_data, const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
Definition: gmres.c:122
#define nvars
Definition: mpi3d.c:31
ucfd_precon_status_t ilu0_sweep_bsr_mkl(sparse_matrix_t op, double *b)
Definition: precon.c:70
ucfd_precon_status_t ilu0_sweep_bsr(const int neles, const int nvars, const int *row_ptr, const int *col_ind, const int *diag_ind, double *nnz_data, double *b)
Definition: precon.c:96
ucfd_status_t
Definition: types.h:5
@ UCFD_STATUS_CONVERGED
Definition: types.h:7
@ UCFD_STATUS_ERROR
Definition: types.h:9
@ UCFD_STATUS_SUCCESS
Definition: types.h:8
ucfd_precon_type_t
Definition: types.h:14
@ ILU0
Definition: types.h:16
@ LUSGS
Definition: types.h:17