UCFD_SPARSE  v1.1
Documentation
Loading...
Searching...
No Matches
krylov.c
Go to the documentation of this file.
1
34#include "krylov.h"
35#include "precon.h"
36#include <math.h>
37#include <omp.h>
38
39
46ucfd_status_t serial_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, int bn, int block, int m, int *iter, double tol,
47 int *row_ptr, int *col_ind, int *diag_ind, double *precon_nnz_data,
48 double *x, double *b, double *H, double *V, double *g, double *y, double *w, double *r)
49{
50 int it, i, j, itmax;
51 const int n = bn * BLOCK;
52 double beta, tmp, c, s, h1, h2, rr;
53 ucfd_precon_solve psolve;
54 sparse_status_t mklstat;
55
56 /* --------------------------------
57 * Set variables
58 -------------------------------- */
59 // Sparse matrix description for system matrix A(op)
60 struct matrix_descr descr;
61 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
62 descr.mode = 0;
63 descr.diag = 0;
64
65 // Set preconditioner solver
66 if (precon_type == BILU) psolve = bilu_psolve;
67 else if (precon_type == LUSGS) psolve = lusgs_psolve;
68 else psolve = none_psolve;
69
70 // Maximum iteration
71 itmax = *iter;
72 it = 0;
73
74 /* --------------------------------
75 * Initial residual
76 1) r := -A @ x
77 2) r := b -A @ x (r += rhs)
78 -------------------------------- */
79 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, x, 0.0, r);
80 if (mklstat != SPARSE_STATUS_SUCCESS) {
81 *iter = mklstat;
82 return UCFD_MKL_FAILED;
83 }
84 cblas_daxpy(n, 1.0, b, 1, r, 1);
85
86 /* --------------------------------
87 * Outer iteration
88 -------------------------------- */
89 while (it < itmax)
90 {
91 beta = cblas_dnrm2(n, r, 1);
92 if (beta < tol) {
93 *iter = it;
95 }
96
97 // Left-preconditioning
98 psolve(bn, row_ptr, col_ind, diag_ind, precon_nnz_data, r);
99
100 beta = cblas_dnrm2(n, r, 1);
101 y[0] = beta;
102
103 // V = r/beta
104 #pragma omp parallel for
105 for (i=0; i<n; i++) {
106 V[i] = r[i]/beta;
107 }
108
109 /* --------------------------------
110 * Inner iteration (Restart)
111 -------------------------------- */
112 for (j = 0; j < m; j++)
113 {
114 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j * n], 0.0, w);
115 if (mklstat != SPARSE_STATUS_SUCCESS) {
116 *iter = mklstat;
117 return UCFD_MKL_FAILED;
118 }
119 psolve(bn, row_ptr, col_ind, diag_ind, precon_nnz_data, w);
120
121 // Arnoldi iteration
122 for (i = 0; i < (j + 1); i++) {
123 tmp = cblas_ddot(n, w, 1, &V[i * n], 1);
124 H[j + m * i] = tmp;
125 cblas_daxpy(n, -tmp, &V[i * n], 1, w, 1);
126 }
127
128 tmp = cblas_dnrm2(n, w, 1);
129 H[j + (j + 1) * m] = tmp;
130
131 #pragma omp parallel for
132 for (i=0; i<n; i++) {
133 V[(j+1)*n+i] = w[i]/tmp;
134 }
135
136 // Givens Rotation
137 for (i = 0; i < j; i++)
138 {
139 c = g[i * 2]; // g[i, 0]
140 s = g[i * 2 + 1]; // g[i, 1]
141 h1 = H[j + i * m];
142 h2 = H[j + (i + 1) * m];
143 H[j + i * m] = c * h1 - s * h2;
144 H[j + (i + 1) * m] = s * h1 + c * h2;
145 }
146
147 h1 = H[j * (m + 1)];
148 h2 = H[j + (j + 1) * m];
149 rr = sqrt(h1 * h1 + h2 * h2);
150 c = h1 / rr;
151 s = -h2 / rr;
152 H[j * (m + 1)] = c * h1 - s * h2;
153 g[j * 2] = c;
154 g[j * 2 + 1] = s;
155
156 // Modify e1 vector
157 y[j + 1] = y[j];
158 y[j] *= c;
159 y[j + 1] *= s;
160 }
161
162 // Back substitution
163 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
164
165 // Update
166 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, x, 1);
167
168 // Computes next iteration residual
169 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, x, 0.0, r);
170 if (mklstat != SPARSE_STATUS_SUCCESS) {
171 *iter = mklstat;
172 return UCFD_MKL_FAILED;
173 }
174 cblas_daxpy(n, 1.0, b, 1, r, 1);
175
176 ++it;
177 }
178 return UCFD_MAX_ITER;
179}
180
185ucfd_status_t step_gmres(sparse_matrix_t op, ucfd_precon_solve psolve, const struct matrix_descr descr,
186 int bn, int m, int *flag,
187 int *row_ptr, int *col_ind, int *diag_ind, double *precon_nnz_data,
188 double *x, double *b, double *H, double *V, double *g, double *y, double *w, double *r)
189{
190 int i, j;
191 const int n = bn * BLOCK;
192 double beta, tmp, c, s, h1, h2, rr;
193 sparse_status_t mklstat;
194 *flag = 0;
195
196 // Apply preconditioner
197 psolve(bn, row_ptr, col_ind, diag_ind, precon_nnz_data, r);
198
199 beta = cblas_dnrm2(n, r, 1);
200 y[0] = beta;
201
202 // V = r/beta
203 #pragma omp parallel for
204 for (i=0; i<n; i++) {
205 V[i] = r[i]/beta;
206 }
207
208 /* --------------------------------
209 * Inner iteration (Restart)
210 -------------------------------- */
211 for (j = 0; j < m; j++)
212 {
213 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j * n], 0.0, w);
214 if (mklstat != SPARSE_STATUS_SUCCESS) {
215 *flag = mklstat;
216 return UCFD_MKL_FAILED;
217 }
218
219 psolve(bn, row_ptr, col_ind, diag_ind, precon_nnz_data, w);
220
221 // Arnoldi iteration
222 for (i = 0; i < (j + 1); i++)
223 {
224 tmp = cblas_ddot(n, w, 1, &V[i * n], 1);
225 H[j + m * i] = tmp;
226 cblas_daxpy(n, -tmp, &V[i * n], 1, w, 1);
227 }
228
229 tmp = cblas_dnrm2(n, w, 1);
230 H[j + (j + 1) * m] = tmp;
231
232 #pragma omp parallel for
233 for (i=0; i<n; i++) {
234 V[(j+1)*n+i] = w[i]/tmp;
235 }
236
237 // Givens Rotation
238 for (i = 0; i < j; i++)
239 {
240 c = g[i * 2]; // g[i, 0]
241 s = g[i * 2 + 1]; // g[i, 1]
242 h1 = H[j + i * m];
243 h2 = H[j + (i + 1) * m];
244 H[j + i * m] = c * h1 - s * h2;
245 H[j + (i + 1) * m] = s * h1 + c * h2;
246 }
247
248 h1 = H[j * (m + 1)];
249 h2 = H[j + (j + 1) * m];
250 rr = sqrt(h1 * h1 + h2 * h2);
251 c = h1 / rr;
252 s = -h2 / rr;
253 H[j * (m + 1)] = c * h1 - s * h2;
254 g[j * 2] = c;
255 g[j * 2 + 1] = s;
256
257 // Modify e1 vector
258 y[j + 1] = y[j];
259 y[j] *= c;
260 y[j + 1] *= s;
261 }
262
263 // Back substitution
264 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
265
266 // Update
267 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, x, 1);
268
269 // Computes next iteration residual
270 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, x, 0.0, r);
271 if (mklstat != SPARSE_STATUS_SUCCESS) {
272 *flag = mklstat;
273 return UCFD_MKL_FAILED;
274 }
275 cblas_daxpy(n, 1.0, b, 1, r, 1);
276
277 return UCFD_STATUS_SUCCESS;
278}
279
286ucfd_status_t serial_bicgstab(sparse_matrix_t op, ucfd_precon_type_t precon_type, int bn, int *iter, double tol,
287 int *row_ptr, int *col_ind, int *diag_ind, double *precon_nnz_data,
288 double *x, double *b, double *r, double *p, double *v, double *s, double *t)
289{
290 int it, itmax;
291 const int n = bn * BLOCK;
292 double rho, rhoprev, alpha, beta, omega, resid;
293 double rv, ts, tt;
294 ucfd_precon_solve psolve;
295 sparse_status_t mklstat;
296
297 // Sparse matrix description for system matrix A(op)
298 struct matrix_descr descr;
299 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
300 descr.mode = 0;
301 descr.diag = 0;
302
303 // Set preconditioner solver
304 if (precon_type == BILU) psolve = bilu_psolve;
305 else if (precon_type == LUSGS) psolve = lusgs_psolve;
306 else psolve = none_psolve;
307
308 itmax = *iter;
309 it = 0;
310
311 // Computes residual : r := b - A @ x
312 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, x, 0.0, r);
313 if (mklstat != SPARSE_STATUS_SUCCESS) {
314 *iter = mklstat;
315 return UCFD_MKL_FAILED;
316 }
317 cblas_daxpy(n, 1.0, b, 1, r, 1);
318
319 // Choose r\tilde as r
320 /* r[:n] = r
321 r[n:] = r\tilde */
322 cblas_dcopy(n, r, 1, &r[n], 1);
323
324 // Outer iteration
325 while (it < itmax)
326 {
327 resid = cblas_dnrm2(n, r, 1);
328 if (resid < tol)
330
331 rho = cblas_ddot(n, r, 1, &r[n], 1);
332
333 // Rho breakdown
334 if (fabs(rho) < eps)
336
337 if (it == 0)
338 cblas_dcopy(n, r, 1, p, 1);
339 else
340 {
341 beta = (rho / rhoprev) * (alpha / omega);
342
343 // p = r + beta*(p - omega*v)
344 cblas_daxpy(n, -omega, v, 1, p, 1);
345 cblas_dscal(n, beta, p, 1);
346 cblas_daxpy(n, 1.0, r, 1, p, 1);
347 }
348
349 // phat = inv(M) @ p
350 cblas_dcopy(n, p, 1, &p[n], 1);
351 psolve(bn, row_ptr, col_ind, diag_ind, precon_nnz_data, &p[n]);
352
353 // v = A @ phat
354 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &p[0], 0.0, v);
355 if (mklstat != SPARSE_STATUS_SUCCESS)
356 {
357 *iter = mklstat;
358 return UCFD_MKL_FAILED;
359 }
360
361 rv = cblas_ddot(n, v, 1, &r[n], 1);
362 alpha = rho / rv;
363
364 // s = r - alpha*v
365 // 1) r := r - alpha*v (r = s)
366 cblas_daxpy(n, -alpha, v, 1, r, 1);
367 // 2) s := r
368 cblas_dcopy(n, r, 1, s, 1);
369
370 // x := x + alpha*phat
371 cblas_daxpy(n, alpha, &p[n], 1, x, 1);
372 resid = cblas_dnrm2(n, s, 1);
373 if (resid < tol)
375
376 // shat = inv(M) @ s
377 cblas_dcopy(n, s, 1, &s[n], 1);
378 psolve(bn, row_ptr, col_ind, diag_ind, precon_nnz_data, &s[n]);
379
380 mklstat = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &s[0], 0.0, t);
381 if (mklstat != SPARSE_STATUS_SUCCESS)
382 {
383 *iter = mklstat;
384 return UCFD_MKL_FAILED;
385 }
386
387 ts = cblas_ddot(n, t, 1, s, 1);
388 tt = cblas_ddot(n, t, 1, t, 1);
389 omega = ts / tt;
390
391 // Update solution
392 cblas_daxpy(n, omega, &s[0], 1, x, 1);
393
394 // r = s - omega*t
395 cblas_daxpy(n, -omega, t, 1, r, 1);
396
397 // Update rho
398 rhoprev = rho;
399
400 ++it;
401 }
402
403 return UCFD_MAX_ITER;
404}
ucfd_status_t serial_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, int bn, int block, int m, int *iter, double tol, int *row_ptr, int *col_ind, int *diag_ind, double *precon_nnz_data, double *x, double *b, double *H, double *V, double *g, double *y, double *w, double *r)
Serial GMRES routine.
Definition: krylov.c:46
ucfd_status_t step_gmres(sparse_matrix_t op, ucfd_precon_solve psolve, const struct matrix_descr descr, int bn, int m, int *flag, int *row_ptr, int *col_ind, int *diag_ind, double *precon_nnz_data, double *x, double *b, double *H, double *V, double *g, double *y, double *w, double *r)
Single GMRES iteration routine.
Definition: krylov.c:185
ucfd_status_t serial_bicgstab(sparse_matrix_t op, ucfd_precon_type_t precon_type, int bn, int *iter, double tol, int *row_ptr, int *col_ind, int *diag_ind, double *precon_nnz_data, double *x, double *b, double *r, double *p, double *v, double *s, double *t)
Serial BiCGstab routine.
Definition: krylov.c:286
Header file for Krylov subspace methods.
#define eps
Definition: krylov.h:12
Header file for preconditioners for Krylov subspace methods.
void none_psolve(int bn, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data, double *b)
Unpreconditioned solver.
Definition: precon.c:255
void lusgs_psolve(int bn, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data, double *b)
Solver function for LU-SGS preconditioner.
Definition: precon.c:187
void bilu_psolve(int bn, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data, double *b)
Solver function for BILU preconditioner.
Definition: precon.c:100
void(* ucfd_precon_solve)(int, int *, int *, int *, double *, double *)
Definition: ucfd_types.h:28
ucfd_status_t
Definition: ucfd_types.h:10
@ UCFD_MAX_ITER
Definition: ucfd_types.h:17
@ UCFD_MKL_FAILED
Definition: ucfd_types.h:14
@ UCFD_STATUS_CONVERGED
Definition: ucfd_types.h:15
@ UCFD_STATUS_SUCCESS
Definition: ucfd_types.h:12
@ UCFD_STATUS_RHO_BREAKDOWN
Definition: ucfd_types.h:11
ucfd_precon_type_t
Definition: ucfd_types.h:22
@ LUSGS
Definition: ucfd_types.h:25
@ BILU
Definition: ucfd_types.h:24