30#define blkdim BLOCK*BLOCK
36 int *row_ptr,
int *col_ind,
int *diag_ind,
double *nnz_data)
38 int idx, kdx, ck, row, col, ele;
39 int st, ed, jed, kk, kst, ked, jj, iwj;
40 double v, Aik[BLOCK][BLOCK];
42 for (idx = 0; idx < bn; idx++)
50 for (kdx = st; kdx < ed; kdx++)
60 for (row=0; row<BLOCK; row++) {
61 for (col=0; col<BLOCK; col++)
62 Aik[row][col] = nnz_data[kdx*
blkdim+row*BLOCK+col];
66 for (jj=kst; jj<ked; jj++) iw[col_ind[jj]] = jj;
69 for (jj=kdx+1; jj<jed; jj++) {
70 iwj = iw[col_ind[jj]];
74 for (row=0; row<BLOCK; row++) {
75 for (col=0; col<BLOCK; col++) {
77 for (ele=0; ele<BLOCK; ele++)
78 v += Aik[row][ele] * nnz_data[iwj*
blkdim+ele*BLOCK+col];
79 nnz_data[jj*
blkdim+row*BLOCK+col] -= v;
86 for (jj=kst; jj<ked; jj++) iw[col_ind[jj]] = 0;
101 int *col_ind,
int *diag_ind,
double *nnz_data,
double *b)
103 int idx, jdx, kdx, row, col;
104 int dd, st, ed, cind;
105 double v, arr[BLOCK];
108 for (idx = 0; idx < bn; idx++)
114 for (kdx = 0; kdx < BLOCK; kdx++)
115 arr[kdx] = b[kdx + idx * BLOCK];
117 for (jdx = st; jdx < dd; jdx++)
121 for (row = 0; row < BLOCK; row++)
124 for (col = 0; col < BLOCK; col++)
125 v += nnz_data[col + row * BLOCK + jdx *
blkdim] * b[col + cind * BLOCK];
130 for (kdx = 0; kdx < BLOCK; kdx++)
131 b[kdx + idx * BLOCK] = arr[kdx];
135 for (idx = bn - 1; idx > -1; idx--)
138 ed = row_ptr[idx + 1];
141 for (kdx = 0; kdx < BLOCK; kdx++)
142 arr[kdx] = b[kdx + idx * BLOCK];
144 for (jdx = dd + 1; jdx < ed; jdx++)
148 for (row = 0; row < BLOCK; row++)
151 for (col = 0; col < BLOCK; col++)
152 v += nnz_data[col + row * BLOCK + jdx *
blkdim] * b[col + cind * BLOCK];
159 for (row=0; row<BLOCK; row++) b[idx*BLOCK+row] = arr[row];
173 #pragma omp parallel for private(didx)
174 for (idx = 0; idx < bn; idx++)
176 didx = diag_ind[idx];
188 int *col_ind,
int *diag_ind,
double *nnz_data,
double *b)
190 int idx, jdx, kdx, row, col;
191 int dd, st, ed, cind;
192 double v, arr[BLOCK];
195 for (idx = 0; idx < bn; idx++)
201 for (kdx = 0; kdx < BLOCK; kdx++)
202 arr[kdx] = b[kdx + idx * BLOCK];
205 for (jdx = st; jdx < dd; jdx++)
208 for (row = 0; row < BLOCK; row++)
211 for (col = 0; col < BLOCK; col++)
212 v += nnz_data[col + row * BLOCK + jdx *
blkdim] * b[col + cind * BLOCK];
219 for (kdx = 0; kdx < BLOCK; kdx++)
220 b[kdx + idx * BLOCK] = arr[kdx];
224 for (idx = bn - 1; idx > -1; idx--)
227 ed = row_ptr[idx + 1];
230 for (kdx = 0; kdx < BLOCK; kdx++)
234 for (jdx = dd + 1; jdx < ed; jdx++)
237 for (row = 0; row < BLOCK; row++)
240 for (col = 0; col < BLOCK; col++)
241 v += nnz_data[col + row * BLOCK + jdx *
blkdim] * b[col + cind * BLOCK];
250 for (kdx = 0; kdx < BLOCK; kdx++)
251 b[kdx + idx * BLOCK] -= arr[kdx];
256 int *col_ind,
int *diag_ind,
double *nnz_data,
double *b) {};
void lusub(UCFD_FLOAT *LU, UCFD_FLOAT *b)
Forward/Backward substitution function.
void lusubmattrans(UCFD_FLOAT *LU, UCFD_FLOAT *B)
Forward/Backward substitution for transposed matrix.
void ludcmp(UCFD_FLOAT *A)
LU Decomposition function.
ucfd_status_t bilu_prepare(int bn, int *iw, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data)
Block fill-in Incomplete LU preconditioner for BSR matrix format.
void none_psolve(int bn, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data, double *b)
Unpreconditioned solver.
void lusgs_psolve(int bn, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data, double *b)
Solver function for LU-SGS preconditioner.
ucfd_status_t lusgs_prepare(int bn, int *diag_ind, double *nnz_data)
LU-SGS preconditioner for BSR matrix format.
void bilu_psolve(int bn, int *row_ptr, int *col_ind, int *diag_ind, double *nnz_data, double *b)
Solver function for BILU preconditioner.
Header file for preconditioners for Krylov subspace methods.