16 const double tol,
const double itmax,
double *dub,
double *rhsb,
double *H,
double *V,
double *g,
double *y,
double *w,
double *r)
19 double beta, tmp, c, s, h1, h2, rr;
20 sparse_status_t status;
23 struct matrix_descr descr;
24 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
29 cblas_dcopy(n, rhsb, 1, r, 1);
30 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
31 if (status != SPARSE_STATUS_SUCCESS) printf(
"Residual computation error\n");
33 for (it=0; it<itmax; it++) {
34 beta = cblas_dnrm2(n, r, 1);
35 printf(
"%d iteration, %.10f residual\n", it, beta);
38 if (precon_type ==
ILU0) {
40 beta = cblas_dnrm2(n, r, 1);
42 else if (precon_type ==
LUSGS) {
44 beta = cblas_dnrm2(n, r, 1);
50 cblas_dcopy(n, r, 1, V, 1);
51 cblas_dscal(n, 1/beta, V, 1);
54 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j*n], 0.0, w);
55 if (status != SPARSE_STATUS_SUCCESS)
return -1;
57 if (precon_type ==
ILU0) {
60 else if (precon_type ==
LUSGS) {
65 for (i=0; i<(j+1); i++) {
66 tmp = cblas_ddot(n, w, 1, &V[i*n], 1);
68 cblas_daxpy(n, -tmp, &V[i*n], 1, w, 1);
71 tmp = cblas_dnrm2(n, w, 1);
73 cblas_dcopy(n, w, 1, &V[(j+1)*n], 1);
74 cblas_dscal(n, 1/tmp, &V[(j+1)*n], 1);
82 H[j+i*m] = c*h1 - s*h2;
83 H[j+(i+1)*m] = s*h1 + c*h2;
88 rr = sqrt(h1*h1 + h2*h2);
91 H[j*(m+1)] = c*h1 - s*h2;
102 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
105 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, dub, 1);
108 cblas_dcopy(n, rhsb, 1, r, 1);
109 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
110 if (status != SPARSE_STATUS_SUCCESS) printf(
"Residual computation error\n");
114 printf(
"Not converged in %d iteration, residual=%.5f", it, beta);
123 const int *row_ptr,
const int *col_ind,
const int *diag_ind,
double *pre_nnz_data, \
124 const double tol,
const double itmax,
double *dub,
double *rhsb,
double *H,
double *V,
double *g,
double *y,
double *w,
double *r)
127 const int n = neles *
nvars;
128 double beta, tmp, c, s, h1, h2, rr;
129 sparse_status_t status;
132 struct matrix_descr descr;
133 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138 cblas_dcopy(n, rhsb, 1, r, 1);
139 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
140 if (status != SPARSE_STATUS_SUCCESS) printf(
"Residual computation error\n");
142 for (it=0; it<itmax; it++) {
143 beta = cblas_dnrm2(n, r, 1);
147 if (precon_type ==
ILU0) {
149 beta = cblas_dnrm2(n, r, 1);
151 else if (precon_type ==
LUSGS) {
153 beta = cblas_dnrm2(n, r, 1);
159 cblas_dcopy(n, r, 1, V, 1);
160 cblas_dscal(n, 1/beta, V, 1);
162 for (j=0; j<m; j++) {
163 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j*n], 0.0, w);
164 if (status != SPARSE_STATUS_SUCCESS)
return -1;
166 if (precon_type ==
ILU0) {
169 else if (precon_type ==
LUSGS) {
174 for (i=0; i<(j+1); i++) {
175 tmp = cblas_ddot(n, w, 1, &V[i*n], 1);
177 cblas_daxpy(n, -tmp, &V[i*n], 1, w, 1);
180 tmp = cblas_dnrm2(n, w, 1);
182 cblas_dcopy(n, w, 1, &V[(j+1)*n], 1);
183 cblas_dscal(n, 1/tmp, &V[(j+1)*n], 1);
186 for (i=0; i<j; i++) {
191 H[j+i*m] = c*h1 - s*h2;
192 H[j+(i+1)*m] = s*h1 + c*h2;
197 rr = sqrt(h1*h1 + h2*h2);
200 H[j*(m+1)] = c*h1 - s*h2;
211 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
214 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, dub, 1);
217 cblas_dcopy(n, rhsb, 1, r, 1);
218 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
219 if (status != SPARSE_STATUS_SUCCESS) printf(
"Residual computation error\n");
223 printf(
"Not converged in %d iteration, residual=%.5f", it, beta);
232 const double tol,
const double itmax,
double *dub,
double *rhsb,
double *H,
double *V,
double *g,
double *y,
double *w,
double *r)
235 double beta, tmp, c, s, h1, h2, rr;
236 sparse_status_t status;
239 struct matrix_descr descr;
240 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
245 cblas_dcopy(n, rhsb, 1, r, 1);
246 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.0, op, descr, dub, 1.0, r);
247 if (status != SPARSE_STATUS_SUCCESS) printf(
"Residual computation error\n");
249 beta = cblas_dnrm2(n, r, 1);
251 if (precon_type ==
ILU0) {
253 beta = cblas_dnrm2(n, r, 1);
255 else if (precon_type ==
LUSGS) {
257 beta = cblas_dnrm2(n, r, 1);
264 cblas_dcopy(n, r, 1, V, 1);
265 cblas_dscal(n, 1/beta, V, 1);
267 for (j=0; j<m; j++) {
268 status = mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, op, descr, &V[j*n], 0.0, w);
269 if (status != SPARSE_STATUS_SUCCESS)
272 if (precon_type ==
ILU0) {
275 else if (precon_type ==
LUSGS) {
280 for (i=0; i<j+1; i++) {
281 tmp = cblas_ddot(n, w, 1, &V[i*n], 1);
283 cblas_daxpy(n, -tmp, &V[i*n], 1, w, 1);
286 tmp = cblas_dnrm2(n, w, 1);
288 cblas_dcopy(n, w, 1, &V[(j+1)*n], 1);
289 cblas_dscal(n, 1/tmp, &V[(j+1)*n], 1);
292 for (i=0; i<j; i++) {
297 H[j+i*m] = c*h1 - s*h2;
298 H[j+(i+1)*m] = s*h1 + c*h2;
303 rr = sqrt(h1*h1 + h2*h2);
306 H[j*(m+1)] = c*h1 - s*h2;
317 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit, m, H, m, y, 1);
320 cblas_dgemv(CblasRowMajor, CblasTrans, m, n, 1.0, V, n, y, 1, 1.0, dub, 1);
ucfd_status_t serial_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, sparse_matrix_t precon, const int n, const int m, const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
ucfd_status_t single_gmres(sparse_matrix_t op, ucfd_precon_type_t precon_type, sparse_matrix_t precon, const int n, const int m, const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
ucfd_status_t serial_gmres2(sparse_matrix_t op, ucfd_precon_type_t precon_type, const int neles, const int nvars, const int m, const int *row_ptr, const int *col_ind, const int *diag_ind, double *pre_nnz_data, const double tol, const double itmax, double *dub, double *rhsb, double *H, double *V, double *g, double *y, double *w, double *r)
ucfd_precon_status_t ilu0_sweep_bsr_mkl(sparse_matrix_t op, double *b)
ucfd_precon_status_t ilu0_sweep_bsr(const int neles, const int nvars, const int *row_ptr, const int *col_ind, const int *diag_ind, double *nnz_data, double *b)