Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: /*
7: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10: */
11: #if 0
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data;
15: PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best, past = 0, future;
21: PetscFunctionBegin;
22: /* pick initial row */
23: best = -1;
24: for (i = 0; i < n; i++) {
25: future = 0.0;
26: for (j = ai[i]; j < ai[i + 1]; j++) {
27: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28: else past = PetscAbsScalar(aa[j]);
29: }
30: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31: if (past / future > best) {
32: best = past / future;
33: current = i;
34: }
35: }
37: PetscCall(PetscMalloc1(n, &done));
38: PetscCall(PetscArrayzero(done, n));
39: PetscCall(PetscMalloc1(n, &order));
40: order[0] = current;
41: for (i = 0; i < n - 1; i++) {
42: done[current] = PETSC_TRUE;
43: best = -1;
44: /* loop over all neighbors of current pivot */
45: for (j = ai[current]; j < ai[current + 1]; j++) {
46: jj = aj[j];
47: if (done[jj]) continue;
48: /* loop over columns of potential next row computing weights for below and above diagonal */
49: past = future = 0.0;
50: for (k = ai[jj]; k < ai[jj + 1]; k++) {
51: kk = aj[k];
52: if (done[kk]) past += PetscAbsScalar(aa[k]);
53: else if (kk != jj) future += PetscAbsScalar(aa[k]);
54: }
55: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56: if (past / future > best) {
57: best = past / future;
58: newcurrent = jj;
59: }
60: }
61: if (best == -1) { /* no neighbors to select from so select best of all that remain */
62: best = -1;
63: for (k = 0; k < n; k++) {
64: if (done[k]) continue;
65: future = 0.0;
66: past = 0.0;
67: for (j = ai[k]; j < ai[k + 1]; j++) {
68: kk = aj[j];
69: if (done[kk]) past += PetscAbsScalar(aa[j]);
70: else if (kk != k) future += PetscAbsScalar(aa[j]);
71: }
72: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73: if (past / future > best) {
74: best = past / future;
75: newcurrent = k;
76: }
77: }
78: }
79: PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80: current = newcurrent;
81: order[i + 1] = current;
82: }
83: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84: *icol = *irow;
85: PetscCall(PetscObjectReference((PetscObject)*irow));
86: PetscCall(PetscFree(done));
87: PetscCall(PetscFree(order));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
90: #endif
92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93: {
94: PetscFunctionBegin;
95: *type = MATSOLVERPETSC;
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101: PetscInt n = A->rmap->n;
103: PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105: PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
106: #endif
107: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
108: PetscCall(MatSetSizes(*B, n, n, n, n));
109: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
110: PetscCall(MatSetType(*B, MATSEQAIJ));
112: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
113: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
115: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
116: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
117: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
118: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
119: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
120: PetscCall(MatSetType(*B, MATSEQSBAIJ));
121: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
123: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
124: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
125: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
126: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
127: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
128: (*B)->factortype = ftype;
130: PetscCall(PetscFree((*B)->solvertype));
131: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
132: (*B)->canuseordering = PETSC_TRUE;
133: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
134: PetscFunctionReturn(PETSC_SUCCESS);
135: }
137: #if 0
138: // currently unused
139: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
140: {
141: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
142: IS isicol;
143: const PetscInt *r, *ic;
144: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j;
145: PetscInt *bi, *bj, *ajtmp;
146: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
147: PetscReal f;
148: PetscInt nlnk, *lnk, k, **bi_ptr;
149: PetscFreeSpaceList free_space = NULL, current_space = NULL;
150: PetscBT lnkbt;
151: PetscBool missing;
153: PetscFunctionBegin;
154: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
155: PetscCall(MatMissingDiagonal(A, &missing, &i));
156: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
158: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
159: PetscCall(ISGetIndices(isrow, &r));
160: PetscCall(ISGetIndices(isicol, &ic));
162: /* get new row pointers */
163: PetscCall(PetscMalloc1(n + 1, &bi));
164: bi[0] = 0;
166: /* bdiag is location of diagonal in factor */
167: PetscCall(PetscMalloc1(n + 1, &bdiag));
168: bdiag[0] = 0;
170: /* linked list for storing column indices of the active row */
171: nlnk = n + 1;
172: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
174: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
176: /* initial FreeSpace size is f*(ai[n]+1) */
177: f = info->fill;
178: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
179: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
180: current_space = free_space;
182: for (i = 0; i < n; i++) {
183: /* copy previous fill into linked list */
184: nzi = 0;
185: nnz = ai[r[i] + 1] - ai[r[i]];
186: ajtmp = aj + ai[r[i]];
187: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
188: nzi += nlnk;
190: /* add pivot rows into linked list */
191: row = lnk[n];
192: while (row < i) {
193: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
194: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
195: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
196: nzi += nlnk;
197: row = lnk[row];
198: }
199: bi[i + 1] = bi[i] + nzi;
200: im[i] = nzi;
202: /* mark bdiag */
203: nzbd = 0;
204: nnz = nzi;
205: k = lnk[n];
206: while (nnz-- && k < i) {
207: nzbd++;
208: k = lnk[k];
209: }
210: bdiag[i] = bi[i] + nzbd;
212: /* if free space is not available, make more free space */
213: if (current_space->local_remaining < nzi) {
214: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
215: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
216: reallocs++;
217: }
219: /* copy data into free space, then initialize lnk */
220: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
222: bi_ptr[i] = current_space->array;
223: current_space->array += nzi;
224: current_space->local_used += nzi;
225: current_space->local_remaining -= nzi;
226: }
227: #if defined(PETSC_USE_INFO)
228: if (ai[n] != 0) {
229: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
230: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
231: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
232: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
233: PetscCall(PetscInfo(A, "for best performance.\n"));
234: } else {
235: PetscCall(PetscInfo(A, "Empty matrix\n"));
236: }
237: #endif
239: PetscCall(ISRestoreIndices(isrow, &r));
240: PetscCall(ISRestoreIndices(isicol, &ic));
242: /* destroy list of free space and other temporary array(s) */
243: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
244: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
245: PetscCall(PetscLLDestroy(lnk, lnkbt));
246: PetscCall(PetscFree2(bi_ptr, im));
248: /* put together the new matrix */
249: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
250: b = (Mat_SeqAIJ *)(B)->data;
252: b->free_a = PETSC_TRUE;
253: b->free_ij = PETSC_TRUE;
254: b->singlemalloc = PETSC_FALSE;
256: PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
257: b->j = bj;
258: b->i = bi;
259: b->diag = bdiag;
260: b->ilen = NULL;
261: b->imax = NULL;
262: b->row = isrow;
263: b->col = iscol;
264: PetscCall(PetscObjectReference((PetscObject)isrow));
265: PetscCall(PetscObjectReference((PetscObject)iscol));
266: b->icol = isicol;
267: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
269: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
270: b->maxnz = b->nz = bi[n];
272: (B)->factortype = MAT_FACTOR_LU;
273: (B)->info.factor_mallocs = reallocs;
274: (B)->info.fill_ratio_given = f;
276: if (ai[n]) {
277: (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
278: } else {
279: (B)->info.fill_ratio_needed = 0.0;
280: }
281: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
282: if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
283: PetscFunctionReturn(PETSC_SUCCESS);
284: }
285: #endif
287: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
288: {
289: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
290: IS isicol;
291: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
292: PetscInt i, n = A->rmap->n;
293: PetscInt *bi, *bj;
294: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
295: PetscReal f;
296: PetscInt nlnk, *lnk, k, **bi_ptr;
297: PetscFreeSpaceList free_space = NULL, current_space = NULL;
298: PetscBT lnkbt;
299: PetscBool missing;
301: PetscFunctionBegin;
302: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
303: PetscCall(MatMissingDiagonal(A, &missing, &i));
304: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
306: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
307: PetscCall(ISGetIndices(isrow, &r));
308: PetscCall(ISGetIndices(isicol, &ic));
310: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311: PetscCall(PetscMalloc1(n + 1, &bi));
312: PetscCall(PetscMalloc1(n + 1, &bdiag));
313: bi[0] = bdiag[0] = 0;
315: /* linked list for storing column indices of the active row */
316: nlnk = n + 1;
317: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
319: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
321: /* initial FreeSpace size is f*(ai[n]+1) */
322: f = info->fill;
323: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
324: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
325: current_space = free_space;
327: for (i = 0; i < n; i++) {
328: /* copy previous fill into linked list */
329: nzi = 0;
330: nnz = ai[r[i] + 1] - ai[r[i]];
331: ajtmp = aj + ai[r[i]];
332: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
333: nzi += nlnk;
335: /* add pivot rows into linked list */
336: row = lnk[n];
337: while (row < i) {
338: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
339: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
341: nzi += nlnk;
342: row = lnk[row];
343: }
344: bi[i + 1] = bi[i] + nzi;
345: im[i] = nzi;
347: /* mark bdiag */
348: nzbd = 0;
349: nnz = nzi;
350: k = lnk[n];
351: while (nnz-- && k < i) {
352: nzbd++;
353: k = lnk[k];
354: }
355: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357: /* if free space is not available, make more free space */
358: if (current_space->local_remaining < nzi) {
359: /* estimated additional space needed */
360: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
361: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
362: reallocs++;
363: }
365: /* copy data into free space, then initialize lnk */
366: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
368: bi_ptr[i] = current_space->array;
369: current_space->array += nzi;
370: current_space->local_used += nzi;
371: current_space->local_remaining -= nzi;
372: }
374: PetscCall(ISRestoreIndices(isrow, &r));
375: PetscCall(ISRestoreIndices(isicol, &ic));
377: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
378: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
379: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
380: PetscCall(PetscLLDestroy(lnk, lnkbt));
381: PetscCall(PetscFree2(bi_ptr, im));
383: /* put together the new matrix */
384: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
385: b = (Mat_SeqAIJ *)(B)->data;
387: b->free_a = PETSC_TRUE;
388: b->free_ij = PETSC_TRUE;
389: b->singlemalloc = PETSC_FALSE;
391: PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
393: b->j = bj;
394: b->i = bi;
395: b->diag = bdiag;
396: b->ilen = NULL;
397: b->imax = NULL;
398: b->row = isrow;
399: b->col = iscol;
400: PetscCall(PetscObjectReference((PetscObject)isrow));
401: PetscCall(PetscObjectReference((PetscObject)iscol));
402: b->icol = isicol;
403: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
405: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
406: b->maxnz = b->nz = bdiag[0] + 1;
408: B->factortype = MAT_FACTOR_LU;
409: B->info.factor_mallocs = reallocs;
410: B->info.fill_ratio_given = f;
412: if (ai[n]) {
413: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
414: } else {
415: B->info.fill_ratio_needed = 0.0;
416: }
417: #if defined(PETSC_USE_INFO)
418: if (ai[n] != 0) {
419: PetscReal af = B->info.fill_ratio_needed;
420: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
421: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
422: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
423: PetscCall(PetscInfo(A, "for best performance.\n"));
424: } else {
425: PetscCall(PetscInfo(A, "Empty matrix\n"));
426: }
427: #endif
428: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
429: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
430: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
431: PetscFunctionReturn(PETSC_SUCCESS);
432: }
434: /*
435: Trouble in factorization, should we dump the original matrix?
436: */
437: PetscErrorCode MatFactorDumpMatrix(Mat A)
438: {
439: PetscBool flg = PETSC_FALSE;
441: PetscFunctionBegin;
442: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
443: if (flg) {
444: PetscViewer viewer;
445: char filename[PETSC_MAX_PATH_LEN];
447: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
448: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
449: PetscCall(MatView(A, viewer));
450: PetscCall(PetscViewerDestroy(&viewer));
451: }
452: PetscFunctionReturn(PETSC_SUCCESS);
453: }
455: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
456: {
457: Mat C = B;
458: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
459: IS isrow = b->row, isicol = b->icol;
460: const PetscInt *r, *ic, *ics;
461: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
462: PetscInt i, j, k, nz, nzL, row, *pj;
463: const PetscInt *ajtmp, *bjtmp;
464: MatScalar *rtmp, *pc, multiplier, *pv;
465: const MatScalar *aa = a->a, *v;
466: PetscBool row_identity, col_identity;
467: FactorShiftCtx sctx;
468: const PetscInt *ddiag;
469: PetscReal rs;
470: MatScalar d;
472: PetscFunctionBegin;
473: /* MatPivotSetUp(): initialize shift context sctx */
474: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
476: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477: ddiag = a->diag;
478: sctx.shift_top = info->zeropivot;
479: for (i = 0; i < n; i++) {
480: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481: d = (aa)[ddiag[i]];
482: rs = -PetscAbsScalar(d) - PetscRealPart(d);
483: v = aa + ai[i];
484: nz = ai[i + 1] - ai[i];
485: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486: if (rs > sctx.shift_top) sctx.shift_top = rs;
487: }
488: sctx.shift_top *= 1.1;
489: sctx.nshift_max = 5;
490: sctx.shift_lo = 0.;
491: sctx.shift_hi = 1.;
492: }
494: PetscCall(ISGetIndices(isrow, &r));
495: PetscCall(ISGetIndices(isicol, &ic));
496: PetscCall(PetscMalloc1(n + 1, &rtmp));
497: ics = ic;
499: do {
500: sctx.newshift = PETSC_FALSE;
501: for (i = 0; i < n; i++) {
502: /* zero rtmp */
503: /* L part */
504: nz = bi[i + 1] - bi[i];
505: bjtmp = bj + bi[i];
506: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
508: /* U part */
509: nz = bdiag[i] - bdiag[i + 1];
510: bjtmp = bj + bdiag[i + 1] + 1;
511: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
513: /* load in initial (unfactored row) */
514: nz = ai[r[i] + 1] - ai[r[i]];
515: ajtmp = aj + ai[r[i]];
516: v = aa + ai[r[i]];
517: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518: /* ZeropivotApply() */
519: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
521: /* elimination */
522: bjtmp = bj + bi[i];
523: row = *bjtmp++;
524: nzL = bi[i + 1] - bi[i];
525: for (k = 0; k < nzL; k++) {
526: pc = rtmp + row;
527: if (*pc != 0.0) {
528: pv = b->a + bdiag[row];
529: multiplier = *pc * (*pv);
530: *pc = multiplier;
532: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533: pv = b->a + bdiag[row + 1] + 1;
534: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
536: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
537: PetscCall(PetscLogFlops(1 + 2.0 * nz));
538: }
539: row = *bjtmp++;
540: }
542: /* finished row so stick it into b->a */
543: rs = 0.0;
544: /* L part */
545: pv = b->a + bi[i];
546: pj = b->j + bi[i];
547: nz = bi[i + 1] - bi[i];
548: for (j = 0; j < nz; j++) {
549: pv[j] = rtmp[pj[j]];
550: rs += PetscAbsScalar(pv[j]);
551: }
553: /* U part */
554: pv = b->a + bdiag[i + 1] + 1;
555: pj = b->j + bdiag[i + 1] + 1;
556: nz = bdiag[i] - bdiag[i + 1] - 1;
557: for (j = 0; j < nz; j++) {
558: pv[j] = rtmp[pj[j]];
559: rs += PetscAbsScalar(pv[j]);
560: }
562: sctx.rs = rs;
563: sctx.pv = rtmp[i];
564: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
565: if (sctx.newshift) break; /* break for-loop */
566: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
568: /* Mark diagonal and invert diagonal for simpler triangular solves */
569: pv = b->a + bdiag[i];
570: *pv = 1.0 / rtmp[i];
572: } /* endof for (i=0; i<n; i++) { */
574: /* MatPivotRefine() */
575: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576: /*
577: * if no shift in this attempt & shifting & started shifting & can refine,
578: * then try lower shift
579: */
580: sctx.shift_hi = sctx.shift_fraction;
581: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
583: sctx.newshift = PETSC_TRUE;
584: sctx.nshift++;
585: }
586: } while (sctx.newshift);
588: PetscCall(PetscFree(rtmp));
589: PetscCall(ISRestoreIndices(isicol, &ic));
590: PetscCall(ISRestoreIndices(isrow, &r));
592: PetscCall(ISIdentity(isrow, &row_identity));
593: PetscCall(ISIdentity(isicol, &col_identity));
594: if (b->inode.size) {
595: C->ops->solve = MatSolve_SeqAIJ_Inode;
596: } else if (row_identity && col_identity) {
597: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
598: } else {
599: C->ops->solve = MatSolve_SeqAIJ;
600: }
601: C->ops->solveadd = MatSolveAdd_SeqAIJ;
602: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
603: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
604: C->ops->matsolve = MatMatSolve_SeqAIJ;
605: C->assembled = PETSC_TRUE;
606: C->preallocated = PETSC_TRUE;
608: PetscCall(PetscLogFlops(C->cmap->n));
610: /* MatShiftView(A,info,&sctx) */
611: if (sctx.nshift) {
612: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
614: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
616: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
617: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
618: }
619: }
620: PetscFunctionReturn(PETSC_SUCCESS);
621: }
623: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
624: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
625: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
627: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
628: {
629: Mat C = B;
630: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
631: IS isrow = b->row, isicol = b->icol;
632: const PetscInt *r, *ic, *ics;
633: PetscInt nz, row, i, j, n = A->rmap->n, diag;
634: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
635: const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
636: MatScalar *pv, *rtmp, *pc, multiplier, d;
637: const MatScalar *v, *aa = a->a;
638: PetscReal rs = 0.0;
639: FactorShiftCtx sctx;
640: const PetscInt *ddiag;
641: PetscBool row_identity, col_identity;
643: PetscFunctionBegin;
644: /* MatPivotSetUp(): initialize shift context sctx */
645: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
647: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648: ddiag = a->diag;
649: sctx.shift_top = info->zeropivot;
650: for (i = 0; i < n; i++) {
651: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652: d = (aa)[ddiag[i]];
653: rs = -PetscAbsScalar(d) - PetscRealPart(d);
654: v = aa + ai[i];
655: nz = ai[i + 1] - ai[i];
656: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
657: if (rs > sctx.shift_top) sctx.shift_top = rs;
658: }
659: sctx.shift_top *= 1.1;
660: sctx.nshift_max = 5;
661: sctx.shift_lo = 0.;
662: sctx.shift_hi = 1.;
663: }
665: PetscCall(ISGetIndices(isrow, &r));
666: PetscCall(ISGetIndices(isicol, &ic));
667: PetscCall(PetscMalloc1(n + 1, &rtmp));
668: ics = ic;
670: do {
671: sctx.newshift = PETSC_FALSE;
672: for (i = 0; i < n; i++) {
673: nz = bi[i + 1] - bi[i];
674: bjtmp = bj + bi[i];
675: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
677: /* load in initial (unfactored row) */
678: nz = ai[r[i] + 1] - ai[r[i]];
679: ajtmp = aj + ai[r[i]];
680: v = aa + ai[r[i]];
681: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
682: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
684: row = *bjtmp++;
685: while (row < i) {
686: pc = rtmp + row;
687: if (*pc != 0.0) {
688: pv = b->a + diag_offset[row];
689: pj = b->j + diag_offset[row] + 1;
690: multiplier = *pc / *pv++;
691: *pc = multiplier;
692: nz = bi[row + 1] - diag_offset[row] - 1;
693: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
694: PetscCall(PetscLogFlops(1 + 2.0 * nz));
695: }
696: row = *bjtmp++;
697: }
698: /* finished row so stick it into b->a */
699: pv = b->a + bi[i];
700: pj = b->j + bi[i];
701: nz = bi[i + 1] - bi[i];
702: diag = diag_offset[i] - bi[i];
703: rs = 0.0;
704: for (j = 0; j < nz; j++) {
705: pv[j] = rtmp[pj[j]];
706: rs += PetscAbsScalar(pv[j]);
707: }
708: rs -= PetscAbsScalar(pv[diag]);
710: sctx.rs = rs;
711: sctx.pv = pv[diag];
712: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
713: if (sctx.newshift) break;
714: pv[diag] = sctx.pv;
715: }
717: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
718: /*
719: * if no shift in this attempt & shifting & started shifting & can refine,
720: * then try lower shift
721: */
722: sctx.shift_hi = sctx.shift_fraction;
723: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
724: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
725: sctx.newshift = PETSC_TRUE;
726: sctx.nshift++;
727: }
728: } while (sctx.newshift);
730: /* invert diagonal entries for simpler triangular solves */
731: for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
732: PetscCall(PetscFree(rtmp));
733: PetscCall(ISRestoreIndices(isicol, &ic));
734: PetscCall(ISRestoreIndices(isrow, &r));
736: PetscCall(ISIdentity(isrow, &row_identity));
737: PetscCall(ISIdentity(isicol, &col_identity));
738: if (row_identity && col_identity) {
739: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
740: } else {
741: C->ops->solve = MatSolve_SeqAIJ_inplace;
742: }
743: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
744: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
745: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
746: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
748: C->assembled = PETSC_TRUE;
749: C->preallocated = PETSC_TRUE;
751: PetscCall(PetscLogFlops(C->cmap->n));
752: if (sctx.nshift) {
753: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
754: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
755: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
756: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
757: }
758: }
759: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
760: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
762: PetscCall(MatSeqAIJCheckInode(C));
763: PetscFunctionReturn(PETSC_SUCCESS);
764: }
766: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
768: /*
769: This routine implements inplace ILU(0) with row or/and column permutations.
770: Input:
771: A - original matrix
772: Output;
773: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774: a->j (col index) is permuted by the inverse of colperm, then sorted
775: a->a reordered accordingly with a->j
776: a->diag (ptr to diagonal elements) is updated.
777: */
778: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
779: {
780: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
781: IS isrow = a->row, isicol = a->icol;
782: const PetscInt *r, *ic, *ics;
783: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
784: PetscInt *ajtmp, nz, row;
785: PetscInt *diag = a->diag, nbdiag, *pj;
786: PetscScalar *rtmp, *pc, multiplier, d;
787: MatScalar *pv, *v;
788: PetscReal rs;
789: FactorShiftCtx sctx;
790: const MatScalar *aa = a->a, *vtmp;
792: PetscFunctionBegin;
793: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
795: /* MatPivotSetUp(): initialize shift context sctx */
796: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
798: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
799: const PetscInt *ddiag = a->diag;
800: sctx.shift_top = info->zeropivot;
801: for (i = 0; i < n; i++) {
802: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
803: d = (aa)[ddiag[i]];
804: rs = -PetscAbsScalar(d) - PetscRealPart(d);
805: vtmp = aa + ai[i];
806: nz = ai[i + 1] - ai[i];
807: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
808: if (rs > sctx.shift_top) sctx.shift_top = rs;
809: }
810: sctx.shift_top *= 1.1;
811: sctx.nshift_max = 5;
812: sctx.shift_lo = 0.;
813: sctx.shift_hi = 1.;
814: }
816: PetscCall(ISGetIndices(isrow, &r));
817: PetscCall(ISGetIndices(isicol, &ic));
818: PetscCall(PetscMalloc1(n + 1, &rtmp));
819: PetscCall(PetscArrayzero(rtmp, n + 1));
820: ics = ic;
822: #if defined(MV)
823: sctx.shift_top = 0.;
824: sctx.nshift_max = 0;
825: sctx.shift_lo = 0.;
826: sctx.shift_hi = 0.;
827: sctx.shift_fraction = 0.;
829: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
830: sctx.shift_top = 0.;
831: for (i = 0; i < n; i++) {
832: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
833: d = (a->a)[diag[i]];
834: rs = -PetscAbsScalar(d) - PetscRealPart(d);
835: v = a->a + ai[i];
836: nz = ai[i + 1] - ai[i];
837: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
838: if (rs > sctx.shift_top) sctx.shift_top = rs;
839: }
840: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
841: sctx.shift_top *= 1.1;
842: sctx.nshift_max = 5;
843: sctx.shift_lo = 0.;
844: sctx.shift_hi = 1.;
845: }
847: sctx.shift_amount = 0.;
848: sctx.nshift = 0;
849: #endif
851: do {
852: sctx.newshift = PETSC_FALSE;
853: for (i = 0; i < n; i++) {
854: /* load in initial unfactored row */
855: nz = ai[r[i] + 1] - ai[r[i]];
856: ajtmp = aj + ai[r[i]];
857: v = a->a + ai[r[i]];
858: /* sort permuted ajtmp and values v accordingly */
859: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
860: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
862: diag[r[i]] = ai[r[i]];
863: for (j = 0; j < nz; j++) {
864: rtmp[ajtmp[j]] = v[j];
865: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
866: }
867: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
869: row = *ajtmp++;
870: while (row < i) {
871: pc = rtmp + row;
872: if (*pc != 0.0) {
873: pv = a->a + diag[r[row]];
874: pj = aj + diag[r[row]] + 1;
876: multiplier = *pc / *pv++;
877: *pc = multiplier;
878: nz = ai[r[row] + 1] - diag[r[row]] - 1;
879: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
880: PetscCall(PetscLogFlops(1 + 2.0 * nz));
881: }
882: row = *ajtmp++;
883: }
884: /* finished row so overwrite it onto a->a */
885: pv = a->a + ai[r[i]];
886: pj = aj + ai[r[i]];
887: nz = ai[r[i] + 1] - ai[r[i]];
888: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
890: rs = 0.0;
891: for (j = 0; j < nz; j++) {
892: pv[j] = rtmp[pj[j]];
893: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
894: }
896: sctx.rs = rs;
897: sctx.pv = pv[nbdiag];
898: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
899: if (sctx.newshift) break;
900: pv[nbdiag] = sctx.pv;
901: }
903: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
904: /*
905: * if no shift in this attempt & shifting & started shifting & can refine,
906: * then try lower shift
907: */
908: sctx.shift_hi = sctx.shift_fraction;
909: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
910: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
911: sctx.newshift = PETSC_TRUE;
912: sctx.nshift++;
913: }
914: } while (sctx.newshift);
916: /* invert diagonal entries for simpler triangular solves */
917: for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
919: PetscCall(PetscFree(rtmp));
920: PetscCall(ISRestoreIndices(isicol, &ic));
921: PetscCall(ISRestoreIndices(isrow, &r));
923: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
924: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
925: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
926: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
928: A->assembled = PETSC_TRUE;
929: A->preallocated = PETSC_TRUE;
931: PetscCall(PetscLogFlops(A->cmap->n));
932: if (sctx.nshift) {
933: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
934: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
935: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
936: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
937: }
938: }
939: PetscFunctionReturn(PETSC_SUCCESS);
940: }
942: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
943: {
944: Mat C;
946: PetscFunctionBegin;
947: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
948: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
949: PetscCall(MatLUFactorNumeric(C, A, info));
951: A->ops->solve = C->ops->solve;
952: A->ops->solvetranspose = C->ops->solvetranspose;
954: PetscCall(MatHeaderMerge(A, &C));
955: PetscFunctionReturn(PETSC_SUCCESS);
956: }
958: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
959: {
960: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
961: IS iscol = a->col, isrow = a->row;
962: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
963: PetscInt nz;
964: const PetscInt *rout, *cout, *r, *c;
965: PetscScalar *x, *tmp, *tmps, sum;
966: const PetscScalar *b;
967: const MatScalar *aa = a->a, *v;
969: PetscFunctionBegin;
970: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
972: PetscCall(VecGetArrayRead(bb, &b));
973: PetscCall(VecGetArrayWrite(xx, &x));
974: tmp = a->solve_work;
976: PetscCall(ISGetIndices(isrow, &rout));
977: r = rout;
978: PetscCall(ISGetIndices(iscol, &cout));
979: c = cout + (n - 1);
981: /* forward solve the lower triangular */
982: tmp[0] = b[*r++];
983: tmps = tmp;
984: for (i = 1; i < n; i++) {
985: v = aa + ai[i];
986: vi = aj + ai[i];
987: nz = a->diag[i] - ai[i];
988: sum = b[*r++];
989: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
990: tmp[i] = sum;
991: }
993: /* backward solve the upper triangular */
994: for (i = n - 1; i >= 0; i--) {
995: v = aa + a->diag[i] + 1;
996: vi = aj + a->diag[i] + 1;
997: nz = ai[i + 1] - a->diag[i] - 1;
998: sum = tmp[i];
999: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1000: x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1001: }
1003: PetscCall(ISRestoreIndices(isrow, &rout));
1004: PetscCall(ISRestoreIndices(iscol, &cout));
1005: PetscCall(VecRestoreArrayRead(bb, &b));
1006: PetscCall(VecRestoreArrayWrite(xx, &x));
1007: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1008: PetscFunctionReturn(PETSC_SUCCESS);
1009: }
1011: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1012: {
1013: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1014: IS iscol = a->col, isrow = a->row;
1015: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1016: PetscInt nz, neq, ldb, ldx;
1017: const PetscInt *rout, *cout, *r, *c;
1018: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
1019: const PetscScalar *b, *aa = a->a, *v;
1020: PetscBool isdense;
1022: PetscFunctionBegin;
1023: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1024: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1025: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1026: if (X != B) {
1027: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1028: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1029: }
1030: PetscCall(MatDenseGetArrayRead(B, &b));
1031: PetscCall(MatDenseGetLDA(B, &ldb));
1032: PetscCall(MatDenseGetArray(X, &x));
1033: PetscCall(MatDenseGetLDA(X, &ldx));
1034: PetscCall(ISGetIndices(isrow, &rout));
1035: r = rout;
1036: PetscCall(ISGetIndices(iscol, &cout));
1037: c = cout;
1038: for (neq = 0; neq < B->cmap->n; neq++) {
1039: /* forward solve the lower triangular */
1040: tmp[0] = b[r[0]];
1041: tmps = tmp;
1042: for (i = 1; i < n; i++) {
1043: v = aa + ai[i];
1044: vi = aj + ai[i];
1045: nz = a->diag[i] - ai[i];
1046: sum = b[r[i]];
1047: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1048: tmp[i] = sum;
1049: }
1050: /* backward solve the upper triangular */
1051: for (i = n - 1; i >= 0; i--) {
1052: v = aa + a->diag[i] + 1;
1053: vi = aj + a->diag[i] + 1;
1054: nz = ai[i + 1] - a->diag[i] - 1;
1055: sum = tmp[i];
1056: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1057: x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1058: }
1059: b += ldb;
1060: x += ldx;
1061: }
1062: PetscCall(ISRestoreIndices(isrow, &rout));
1063: PetscCall(ISRestoreIndices(iscol, &cout));
1064: PetscCall(MatDenseRestoreArrayRead(B, &b));
1065: PetscCall(MatDenseRestoreArray(X, &x));
1066: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1067: PetscFunctionReturn(PETSC_SUCCESS);
1068: }
1070: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1071: {
1072: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1073: IS iscol = a->col, isrow = a->row;
1074: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1075: PetscInt nz, neq, ldb, ldx;
1076: const PetscInt *rout, *cout, *r, *c;
1077: PetscScalar *x, *tmp = a->solve_work, sum;
1078: const PetscScalar *b, *aa = a->a, *v;
1079: PetscBool isdense;
1081: PetscFunctionBegin;
1082: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1083: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1084: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1085: if (X != B) {
1086: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1087: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1088: }
1089: PetscCall(MatDenseGetArrayRead(B, &b));
1090: PetscCall(MatDenseGetLDA(B, &ldb));
1091: PetscCall(MatDenseGetArray(X, &x));
1092: PetscCall(MatDenseGetLDA(X, &ldx));
1093: PetscCall(ISGetIndices(isrow, &rout));
1094: r = rout;
1095: PetscCall(ISGetIndices(iscol, &cout));
1096: c = cout;
1097: for (neq = 0; neq < B->cmap->n; neq++) {
1098: /* forward solve the lower triangular */
1099: tmp[0] = b[r[0]];
1100: v = aa;
1101: vi = aj;
1102: for (i = 1; i < n; i++) {
1103: nz = ai[i + 1] - ai[i];
1104: sum = b[r[i]];
1105: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1106: tmp[i] = sum;
1107: v += nz;
1108: vi += nz;
1109: }
1110: /* backward solve the upper triangular */
1111: for (i = n - 1; i >= 0; i--) {
1112: v = aa + adiag[i + 1] + 1;
1113: vi = aj + adiag[i + 1] + 1;
1114: nz = adiag[i] - adiag[i + 1] - 1;
1115: sum = tmp[i];
1116: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1117: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1118: }
1119: b += ldb;
1120: x += ldx;
1121: }
1122: PetscCall(ISRestoreIndices(isrow, &rout));
1123: PetscCall(ISRestoreIndices(iscol, &cout));
1124: PetscCall(MatDenseRestoreArrayRead(B, &b));
1125: PetscCall(MatDenseRestoreArray(X, &x));
1126: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1127: PetscFunctionReturn(PETSC_SUCCESS);
1128: }
1130: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1131: {
1132: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1133: IS iscol = a->col, isrow = a->row;
1134: const PetscInt *r, *c, *rout, *cout;
1135: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1136: PetscInt nz, row;
1137: PetscScalar *x, *tmp, *tmps, sum;
1138: const PetscScalar *b;
1139: const MatScalar *aa = a->a, *v;
1141: PetscFunctionBegin;
1142: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1144: PetscCall(VecGetArrayRead(bb, &b));
1145: PetscCall(VecGetArrayWrite(xx, &x));
1146: tmp = a->solve_work;
1148: PetscCall(ISGetIndices(isrow, &rout));
1149: r = rout;
1150: PetscCall(ISGetIndices(iscol, &cout));
1151: c = cout + (n - 1);
1153: /* forward solve the lower triangular */
1154: tmp[0] = b[*r++];
1155: tmps = tmp;
1156: for (row = 1; row < n; row++) {
1157: i = rout[row]; /* permuted row */
1158: v = aa + ai[i];
1159: vi = aj + ai[i];
1160: nz = a->diag[i] - ai[i];
1161: sum = b[*r++];
1162: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1163: tmp[row] = sum;
1164: }
1166: /* backward solve the upper triangular */
1167: for (row = n - 1; row >= 0; row--) {
1168: i = rout[row]; /* permuted row */
1169: v = aa + a->diag[i] + 1;
1170: vi = aj + a->diag[i] + 1;
1171: nz = ai[i + 1] - a->diag[i] - 1;
1172: sum = tmp[row];
1173: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1174: x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1175: }
1177: PetscCall(ISRestoreIndices(isrow, &rout));
1178: PetscCall(ISRestoreIndices(iscol, &cout));
1179: PetscCall(VecRestoreArrayRead(bb, &b));
1180: PetscCall(VecRestoreArrayWrite(xx, &x));
1181: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1182: PetscFunctionReturn(PETSC_SUCCESS);
1183: }
1185: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1186: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1187: {
1188: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1189: PetscInt n = A->rmap->n;
1190: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag;
1191: PetscScalar *x;
1192: const PetscScalar *b;
1193: const MatScalar *aa = a->a;
1194: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1195: PetscInt adiag_i, i, nz, ai_i;
1196: const PetscInt *vi;
1197: const MatScalar *v;
1198: PetscScalar sum;
1199: #endif
1201: PetscFunctionBegin;
1202: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1204: PetscCall(VecGetArrayRead(bb, &b));
1205: PetscCall(VecGetArrayWrite(xx, &x));
1207: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1208: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1209: #else
1210: /* forward solve the lower triangular */
1211: x[0] = b[0];
1212: for (i = 1; i < n; i++) {
1213: ai_i = ai[i];
1214: v = aa + ai_i;
1215: vi = aj + ai_i;
1216: nz = adiag[i] - ai_i;
1217: sum = b[i];
1218: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1219: x[i] = sum;
1220: }
1222: /* backward solve the upper triangular */
1223: for (i = n - 1; i >= 0; i--) {
1224: adiag_i = adiag[i];
1225: v = aa + adiag_i + 1;
1226: vi = aj + adiag_i + 1;
1227: nz = ai[i + 1] - adiag_i - 1;
1228: sum = x[i];
1229: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1230: x[i] = sum * aa[adiag_i];
1231: }
1232: #endif
1233: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1234: PetscCall(VecRestoreArrayRead(bb, &b));
1235: PetscCall(VecRestoreArrayWrite(xx, &x));
1236: PetscFunctionReturn(PETSC_SUCCESS);
1237: }
1239: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1240: {
1241: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1242: IS iscol = a->col, isrow = a->row;
1243: PetscInt i, n = A->rmap->n, j;
1244: PetscInt nz;
1245: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1246: PetscScalar *x, *tmp, sum;
1247: const PetscScalar *b;
1248: const MatScalar *aa = a->a, *v;
1250: PetscFunctionBegin;
1251: if (yy != xx) PetscCall(VecCopy(yy, xx));
1253: PetscCall(VecGetArrayRead(bb, &b));
1254: PetscCall(VecGetArray(xx, &x));
1255: tmp = a->solve_work;
1257: PetscCall(ISGetIndices(isrow, &rout));
1258: r = rout;
1259: PetscCall(ISGetIndices(iscol, &cout));
1260: c = cout + (n - 1);
1262: /* forward solve the lower triangular */
1263: tmp[0] = b[*r++];
1264: for (i = 1; i < n; i++) {
1265: v = aa + ai[i];
1266: vi = aj + ai[i];
1267: nz = a->diag[i] - ai[i];
1268: sum = b[*r++];
1269: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1270: tmp[i] = sum;
1271: }
1273: /* backward solve the upper triangular */
1274: for (i = n - 1; i >= 0; i--) {
1275: v = aa + a->diag[i] + 1;
1276: vi = aj + a->diag[i] + 1;
1277: nz = ai[i + 1] - a->diag[i] - 1;
1278: sum = tmp[i];
1279: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1280: tmp[i] = sum * aa[a->diag[i]];
1281: x[*c--] += tmp[i];
1282: }
1284: PetscCall(ISRestoreIndices(isrow, &rout));
1285: PetscCall(ISRestoreIndices(iscol, &cout));
1286: PetscCall(VecRestoreArrayRead(bb, &b));
1287: PetscCall(VecRestoreArray(xx, &x));
1288: PetscCall(PetscLogFlops(2.0 * a->nz));
1289: PetscFunctionReturn(PETSC_SUCCESS);
1290: }
1292: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1293: {
1294: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1295: IS iscol = a->col, isrow = a->row;
1296: PetscInt i, n = A->rmap->n, j;
1297: PetscInt nz;
1298: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1299: PetscScalar *x, *tmp, sum;
1300: const PetscScalar *b;
1301: const MatScalar *aa = a->a, *v;
1303: PetscFunctionBegin;
1304: if (yy != xx) PetscCall(VecCopy(yy, xx));
1306: PetscCall(VecGetArrayRead(bb, &b));
1307: PetscCall(VecGetArray(xx, &x));
1308: tmp = a->solve_work;
1310: PetscCall(ISGetIndices(isrow, &rout));
1311: r = rout;
1312: PetscCall(ISGetIndices(iscol, &cout));
1313: c = cout;
1315: /* forward solve the lower triangular */
1316: tmp[0] = b[r[0]];
1317: v = aa;
1318: vi = aj;
1319: for (i = 1; i < n; i++) {
1320: nz = ai[i + 1] - ai[i];
1321: sum = b[r[i]];
1322: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1323: tmp[i] = sum;
1324: v += nz;
1325: vi += nz;
1326: }
1328: /* backward solve the upper triangular */
1329: v = aa + adiag[n - 1];
1330: vi = aj + adiag[n - 1];
1331: for (i = n - 1; i >= 0; i--) {
1332: nz = adiag[i] - adiag[i + 1] - 1;
1333: sum = tmp[i];
1334: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1335: tmp[i] = sum * v[nz];
1336: x[c[i]] += tmp[i];
1337: v += nz + 1;
1338: vi += nz + 1;
1339: }
1341: PetscCall(ISRestoreIndices(isrow, &rout));
1342: PetscCall(ISRestoreIndices(iscol, &cout));
1343: PetscCall(VecRestoreArrayRead(bb, &b));
1344: PetscCall(VecRestoreArray(xx, &x));
1345: PetscCall(PetscLogFlops(2.0 * a->nz));
1346: PetscFunctionReturn(PETSC_SUCCESS);
1347: }
1349: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1350: {
1351: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1352: IS iscol = a->col, isrow = a->row;
1353: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1354: PetscInt i, n = A->rmap->n, j;
1355: PetscInt nz;
1356: PetscScalar *x, *tmp, s1;
1357: const MatScalar *aa = a->a, *v;
1358: const PetscScalar *b;
1360: PetscFunctionBegin;
1361: PetscCall(VecGetArrayRead(bb, &b));
1362: PetscCall(VecGetArrayWrite(xx, &x));
1363: tmp = a->solve_work;
1365: PetscCall(ISGetIndices(isrow, &rout));
1366: r = rout;
1367: PetscCall(ISGetIndices(iscol, &cout));
1368: c = cout;
1370: /* copy the b into temp work space according to permutation */
1371: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1373: /* forward solve the U^T */
1374: for (i = 0; i < n; i++) {
1375: v = aa + diag[i];
1376: vi = aj + diag[i] + 1;
1377: nz = ai[i + 1] - diag[i] - 1;
1378: s1 = tmp[i];
1379: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1380: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1381: tmp[i] = s1;
1382: }
1384: /* backward solve the L^T */
1385: for (i = n - 1; i >= 0; i--) {
1386: v = aa + diag[i] - 1;
1387: vi = aj + diag[i] - 1;
1388: nz = diag[i] - ai[i];
1389: s1 = tmp[i];
1390: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1391: }
1393: /* copy tmp into x according to permutation */
1394: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1396: PetscCall(ISRestoreIndices(isrow, &rout));
1397: PetscCall(ISRestoreIndices(iscol, &cout));
1398: PetscCall(VecRestoreArrayRead(bb, &b));
1399: PetscCall(VecRestoreArrayWrite(xx, &x));
1401: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1402: PetscFunctionReturn(PETSC_SUCCESS);
1403: }
1405: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1406: {
1407: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1408: IS iscol = a->col, isrow = a->row;
1409: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1410: PetscInt i, n = A->rmap->n, j;
1411: PetscInt nz;
1412: PetscScalar *x, *tmp, s1;
1413: const MatScalar *aa = a->a, *v;
1414: const PetscScalar *b;
1416: PetscFunctionBegin;
1417: PetscCall(VecGetArrayRead(bb, &b));
1418: PetscCall(VecGetArrayWrite(xx, &x));
1419: tmp = a->solve_work;
1421: PetscCall(ISGetIndices(isrow, &rout));
1422: r = rout;
1423: PetscCall(ISGetIndices(iscol, &cout));
1424: c = cout;
1426: /* copy the b into temp work space according to permutation */
1427: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1429: /* forward solve the U^T */
1430: for (i = 0; i < n; i++) {
1431: v = aa + adiag[i + 1] + 1;
1432: vi = aj + adiag[i + 1] + 1;
1433: nz = adiag[i] - adiag[i + 1] - 1;
1434: s1 = tmp[i];
1435: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1436: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1437: tmp[i] = s1;
1438: }
1440: /* backward solve the L^T */
1441: for (i = n - 1; i >= 0; i--) {
1442: v = aa + ai[i];
1443: vi = aj + ai[i];
1444: nz = ai[i + 1] - ai[i];
1445: s1 = tmp[i];
1446: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1447: }
1449: /* copy tmp into x according to permutation */
1450: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1452: PetscCall(ISRestoreIndices(isrow, &rout));
1453: PetscCall(ISRestoreIndices(iscol, &cout));
1454: PetscCall(VecRestoreArrayRead(bb, &b));
1455: PetscCall(VecRestoreArrayWrite(xx, &x));
1457: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1458: PetscFunctionReturn(PETSC_SUCCESS);
1459: }
1461: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1462: {
1463: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1464: IS iscol = a->col, isrow = a->row;
1465: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1466: PetscInt i, n = A->rmap->n, j;
1467: PetscInt nz;
1468: PetscScalar *x, *tmp, s1;
1469: const MatScalar *aa = a->a, *v;
1470: const PetscScalar *b;
1472: PetscFunctionBegin;
1473: if (zz != xx) PetscCall(VecCopy(zz, xx));
1474: PetscCall(VecGetArrayRead(bb, &b));
1475: PetscCall(VecGetArray(xx, &x));
1476: tmp = a->solve_work;
1478: PetscCall(ISGetIndices(isrow, &rout));
1479: r = rout;
1480: PetscCall(ISGetIndices(iscol, &cout));
1481: c = cout;
1483: /* copy the b into temp work space according to permutation */
1484: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1486: /* forward solve the U^T */
1487: for (i = 0; i < n; i++) {
1488: v = aa + diag[i];
1489: vi = aj + diag[i] + 1;
1490: nz = ai[i + 1] - diag[i] - 1;
1491: s1 = tmp[i];
1492: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1493: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1494: tmp[i] = s1;
1495: }
1497: /* backward solve the L^T */
1498: for (i = n - 1; i >= 0; i--) {
1499: v = aa + diag[i] - 1;
1500: vi = aj + diag[i] - 1;
1501: nz = diag[i] - ai[i];
1502: s1 = tmp[i];
1503: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1504: }
1506: /* copy tmp into x according to permutation */
1507: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1509: PetscCall(ISRestoreIndices(isrow, &rout));
1510: PetscCall(ISRestoreIndices(iscol, &cout));
1511: PetscCall(VecRestoreArrayRead(bb, &b));
1512: PetscCall(VecRestoreArray(xx, &x));
1514: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1515: PetscFunctionReturn(PETSC_SUCCESS);
1516: }
1518: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1519: {
1520: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1521: IS iscol = a->col, isrow = a->row;
1522: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1523: PetscInt i, n = A->rmap->n, j;
1524: PetscInt nz;
1525: PetscScalar *x, *tmp, s1;
1526: const MatScalar *aa = a->a, *v;
1527: const PetscScalar *b;
1529: PetscFunctionBegin;
1530: if (zz != xx) PetscCall(VecCopy(zz, xx));
1531: PetscCall(VecGetArrayRead(bb, &b));
1532: PetscCall(VecGetArray(xx, &x));
1533: tmp = a->solve_work;
1535: PetscCall(ISGetIndices(isrow, &rout));
1536: r = rout;
1537: PetscCall(ISGetIndices(iscol, &cout));
1538: c = cout;
1540: /* copy the b into temp work space according to permutation */
1541: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1543: /* forward solve the U^T */
1544: for (i = 0; i < n; i++) {
1545: v = aa + adiag[i + 1] + 1;
1546: vi = aj + adiag[i + 1] + 1;
1547: nz = adiag[i] - adiag[i + 1] - 1;
1548: s1 = tmp[i];
1549: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1550: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1551: tmp[i] = s1;
1552: }
1554: /* backward solve the L^T */
1555: for (i = n - 1; i >= 0; i--) {
1556: v = aa + ai[i];
1557: vi = aj + ai[i];
1558: nz = ai[i + 1] - ai[i];
1559: s1 = tmp[i];
1560: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1561: }
1563: /* copy tmp into x according to permutation */
1564: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1566: PetscCall(ISRestoreIndices(isrow, &rout));
1567: PetscCall(ISRestoreIndices(iscol, &cout));
1568: PetscCall(VecRestoreArrayRead(bb, &b));
1569: PetscCall(VecRestoreArray(xx, &x));
1571: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1572: PetscFunctionReturn(PETSC_SUCCESS);
1573: }
1575: /*
1576: ilu() under revised new data structure.
1577: Factored arrays bj and ba are stored as
1578: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1580: bi=fact->i is an array of size n+1, in which
1581: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1582: bi[n]: points to L(n-1,n-1)+1
1584: bdiag=fact->diag is an array of size n+1,in which
1585: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1586: bdiag[n]: points to entry of U(n-1,0)-1
1588: U(i,:) contains bdiag[i] as its last entry, i.e.,
1589: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1590: */
1591: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1592: {
1593: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1594: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1595: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1596: IS isicol;
1598: PetscFunctionBegin;
1599: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1600: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1601: b = (Mat_SeqAIJ *)(fact)->data;
1603: /* allocate matrix arrays for new data structure */
1604: PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));
1606: b->singlemalloc = PETSC_TRUE;
1607: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1608: bdiag = b->diag;
1610: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1612: /* set bi and bj with new data structure */
1613: bi = b->i;
1614: bj = b->j;
1616: /* L part */
1617: bi[0] = 0;
1618: for (i = 0; i < n; i++) {
1619: nz = adiag[i] - ai[i];
1620: bi[i + 1] = bi[i] + nz;
1621: aj = a->j + ai[i];
1622: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1623: }
1625: /* U part */
1626: bdiag[n] = bi[n] - 1;
1627: for (i = n - 1; i >= 0; i--) {
1628: nz = ai[i + 1] - adiag[i] - 1;
1629: aj = a->j + adiag[i] + 1;
1630: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1631: /* diag[i] */
1632: bj[k++] = i;
1633: bdiag[i] = bdiag[i + 1] + nz + 1;
1634: }
1636: fact->factortype = MAT_FACTOR_ILU;
1637: fact->info.factor_mallocs = 0;
1638: fact->info.fill_ratio_given = info->fill;
1639: fact->info.fill_ratio_needed = 1.0;
1640: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1641: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1643: b = (Mat_SeqAIJ *)(fact)->data;
1644: b->row = isrow;
1645: b->col = iscol;
1646: b->icol = isicol;
1647: PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1648: PetscCall(PetscObjectReference((PetscObject)isrow));
1649: PetscCall(PetscObjectReference((PetscObject)iscol));
1650: PetscFunctionReturn(PETSC_SUCCESS);
1651: }
1653: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1654: {
1655: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1656: IS isicol;
1657: const PetscInt *r, *ic;
1658: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1659: PetscInt *bi, *cols, nnz, *cols_lvl;
1660: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1661: PetscInt i, levels, diagonal_fill;
1662: PetscBool col_identity, row_identity, missing;
1663: PetscReal f;
1664: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1665: PetscBT lnkbt;
1666: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1667: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1668: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1670: PetscFunctionBegin;
1671: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1672: PetscCall(MatMissingDiagonal(A, &missing, &i));
1673: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1675: levels = (PetscInt)info->levels;
1676: PetscCall(ISIdentity(isrow, &row_identity));
1677: PetscCall(ISIdentity(iscol, &col_identity));
1678: if (!levels && row_identity && col_identity) {
1679: /* special case: ilu(0) with natural ordering */
1680: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1681: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1682: PetscFunctionReturn(PETSC_SUCCESS);
1683: }
1685: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1686: PetscCall(ISGetIndices(isrow, &r));
1687: PetscCall(ISGetIndices(isicol, &ic));
1689: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1690: PetscCall(PetscMalloc1(n + 1, &bi));
1691: PetscCall(PetscMalloc1(n + 1, &bdiag));
1692: bi[0] = bdiag[0] = 0;
1693: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1695: /* create a linked list for storing column indices of the active row */
1696: nlnk = n + 1;
1697: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1699: /* initial FreeSpace size is f*(ai[n]+1) */
1700: f = info->fill;
1701: diagonal_fill = (PetscInt)info->diagonal_fill;
1702: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1703: current_space = free_space;
1704: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1705: current_space_lvl = free_space_lvl;
1706: for (i = 0; i < n; i++) {
1707: nzi = 0;
1708: /* copy current row into linked list */
1709: nnz = ai[r[i] + 1] - ai[r[i]];
1710: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1711: cols = aj + ai[r[i]];
1712: lnk[i] = -1; /* marker to indicate if diagonal exists */
1713: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1714: nzi += nlnk;
1716: /* make sure diagonal entry is included */
1717: if (diagonal_fill && lnk[i] == -1) {
1718: fm = n;
1719: while (lnk[fm] < i) fm = lnk[fm];
1720: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1721: lnk[fm] = i;
1722: lnk_lvl[i] = 0;
1723: nzi++;
1724: dcount++;
1725: }
1727: /* add pivot rows into the active row */
1728: nzbd = 0;
1729: prow = lnk[n];
1730: while (prow < i) {
1731: nnz = bdiag[prow];
1732: cols = bj_ptr[prow] + nnz + 1;
1733: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1734: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1735: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1736: nzi += nlnk;
1737: prow = lnk[prow];
1738: nzbd++;
1739: }
1740: bdiag[i] = nzbd;
1741: bi[i + 1] = bi[i] + nzi;
1742: /* if free space is not available, make more free space */
1743: if (current_space->local_remaining < nzi) {
1744: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1745: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1746: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1747: reallocs++;
1748: }
1750: /* copy data into free_space and free_space_lvl, then initialize lnk */
1751: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1752: bj_ptr[i] = current_space->array;
1753: bjlvl_ptr[i] = current_space_lvl->array;
1755: /* make sure the active row i has diagonal entry */
1756: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1758: current_space->array += nzi;
1759: current_space->local_used += nzi;
1760: current_space->local_remaining -= nzi;
1761: current_space_lvl->array += nzi;
1762: current_space_lvl->local_used += nzi;
1763: current_space_lvl->local_remaining -= nzi;
1764: }
1766: PetscCall(ISRestoreIndices(isrow, &r));
1767: PetscCall(ISRestoreIndices(isicol, &ic));
1768: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1769: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1770: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1772: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1773: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1774: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1776: #if defined(PETSC_USE_INFO)
1777: {
1778: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1779: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1780: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1781: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1782: PetscCall(PetscInfo(A, "for best performance.\n"));
1783: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1784: }
1785: #endif
1786: /* put together the new matrix */
1787: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1788: b = (Mat_SeqAIJ *)(fact)->data;
1790: b->free_a = PETSC_TRUE;
1791: b->free_ij = PETSC_TRUE;
1792: b->singlemalloc = PETSC_FALSE;
1794: PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));
1796: b->j = bj;
1797: b->i = bi;
1798: b->diag = bdiag;
1799: b->ilen = NULL;
1800: b->imax = NULL;
1801: b->row = isrow;
1802: b->col = iscol;
1803: PetscCall(PetscObjectReference((PetscObject)isrow));
1804: PetscCall(PetscObjectReference((PetscObject)iscol));
1805: b->icol = isicol;
1807: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1808: /* In b structure: Free imax, ilen, old a, old j.
1809: Allocate bdiag, solve_work, new a, new j */
1810: b->maxnz = b->nz = bdiag[0] + 1;
1812: (fact)->info.factor_mallocs = reallocs;
1813: (fact)->info.fill_ratio_given = f;
1814: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1815: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1816: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1817: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1818: PetscFunctionReturn(PETSC_SUCCESS);
1819: }
1821: #if 0
1822: // unused
1823: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1824: {
1825: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1826: IS isicol;
1827: const PetscInt *r, *ic;
1828: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1829: PetscInt *bi, *cols, nnz, *cols_lvl;
1830: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1831: PetscInt i, levels, diagonal_fill;
1832: PetscBool col_identity, row_identity;
1833: PetscReal f;
1834: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1835: PetscBT lnkbt;
1836: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1837: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1838: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1839: PetscBool missing;
1841: PetscFunctionBegin;
1842: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1843: PetscCall(MatMissingDiagonal(A, &missing, &i));
1844: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1846: f = info->fill;
1847: levels = (PetscInt)info->levels;
1848: diagonal_fill = (PetscInt)info->diagonal_fill;
1850: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1852: PetscCall(ISIdentity(isrow, &row_identity));
1853: PetscCall(ISIdentity(iscol, &col_identity));
1854: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1855: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1857: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1858: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1859: fact->factortype = MAT_FACTOR_ILU;
1860: (fact)->info.factor_mallocs = 0;
1861: (fact)->info.fill_ratio_given = info->fill;
1862: (fact)->info.fill_ratio_needed = 1.0;
1864: b = (Mat_SeqAIJ *)(fact)->data;
1865: b->row = isrow;
1866: b->col = iscol;
1867: b->icol = isicol;
1868: PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1869: PetscCall(PetscObjectReference((PetscObject)isrow));
1870: PetscCall(PetscObjectReference((PetscObject)iscol));
1871: PetscFunctionReturn(PETSC_SUCCESS);
1872: }
1874: PetscCall(ISGetIndices(isrow, &r));
1875: PetscCall(ISGetIndices(isicol, &ic));
1877: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1878: PetscCall(PetscMalloc1(n + 1, &bi));
1879: PetscCall(PetscMalloc1(n + 1, &bdiag));
1880: bi[0] = bdiag[0] = 0;
1882: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1884: /* create a linked list for storing column indices of the active row */
1885: nlnk = n + 1;
1886: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1888: /* initial FreeSpace size is f*(ai[n]+1) */
1889: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1890: current_space = free_space;
1891: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1892: current_space_lvl = free_space_lvl;
1894: for (i = 0; i < n; i++) {
1895: nzi = 0;
1896: /* copy current row into linked list */
1897: nnz = ai[r[i] + 1] - ai[r[i]];
1898: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1899: cols = aj + ai[r[i]];
1900: lnk[i] = -1; /* marker to indicate if diagonal exists */
1901: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1902: nzi += nlnk;
1904: /* make sure diagonal entry is included */
1905: if (diagonal_fill && lnk[i] == -1) {
1906: fm = n;
1907: while (lnk[fm] < i) fm = lnk[fm];
1908: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1909: lnk[fm] = i;
1910: lnk_lvl[i] = 0;
1911: nzi++;
1912: dcount++;
1913: }
1915: /* add pivot rows into the active row */
1916: nzbd = 0;
1917: prow = lnk[n];
1918: while (prow < i) {
1919: nnz = bdiag[prow];
1920: cols = bj_ptr[prow] + nnz + 1;
1921: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1922: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1923: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1924: nzi += nlnk;
1925: prow = lnk[prow];
1926: nzbd++;
1927: }
1928: bdiag[i] = nzbd;
1929: bi[i + 1] = bi[i] + nzi;
1931: /* if free space is not available, make more free space */
1932: if (current_space->local_remaining < nzi) {
1933: nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1934: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1935: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1936: reallocs++;
1937: }
1939: /* copy data into free_space and free_space_lvl, then initialize lnk */
1940: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1941: bj_ptr[i] = current_space->array;
1942: bjlvl_ptr[i] = current_space_lvl->array;
1944: /* make sure the active row i has diagonal entry */
1945: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1947: current_space->array += nzi;
1948: current_space->local_used += nzi;
1949: current_space->local_remaining -= nzi;
1950: current_space_lvl->array += nzi;
1951: current_space_lvl->local_used += nzi;
1952: current_space_lvl->local_remaining -= nzi;
1953: }
1955: PetscCall(ISRestoreIndices(isrow, &r));
1956: PetscCall(ISRestoreIndices(isicol, &ic));
1958: /* destroy list of free space and other temporary arrays */
1959: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1960: PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1961: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1962: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1963: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1965: #if defined(PETSC_USE_INFO)
1966: {
1967: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1968: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1969: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1970: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1971: PetscCall(PetscInfo(A, "for best performance.\n"));
1972: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1973: }
1974: #endif
1976: /* put together the new matrix */
1977: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1978: b = (Mat_SeqAIJ *)(fact)->data;
1980: b->free_a = PETSC_TRUE;
1981: b->free_ij = PETSC_TRUE;
1982: b->singlemalloc = PETSC_FALSE;
1984: PetscCall(PetscMalloc1(bi[n], &b->a));
1985: b->j = bj;
1986: b->i = bi;
1987: for (i = 0; i < n; i++) bdiag[i] += bi[i];
1988: b->diag = bdiag;
1989: b->ilen = NULL;
1990: b->imax = NULL;
1991: b->row = isrow;
1992: b->col = iscol;
1993: PetscCall(PetscObjectReference((PetscObject)isrow));
1994: PetscCall(PetscObjectReference((PetscObject)iscol));
1995: b->icol = isicol;
1996: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1997: /* In b structure: Free imax, ilen, old a, old j.
1998: Allocate bdiag, solve_work, new a, new j */
1999: b->maxnz = b->nz = bi[n];
2001: (fact)->info.factor_mallocs = reallocs;
2002: (fact)->info.fill_ratio_given = f;
2003: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2004: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2005: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2006: PetscFunctionReturn(PETSC_SUCCESS);
2007: }
2008: #endif
2010: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2011: {
2012: Mat C = B;
2013: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2014: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2015: IS ip = b->row, iip = b->icol;
2016: const PetscInt *rip, *riip;
2017: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2018: PetscInt *ai = a->i, *aj = a->j;
2019: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2020: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2021: PetscBool perm_identity;
2022: FactorShiftCtx sctx;
2023: PetscReal rs;
2024: MatScalar d, *v;
2026: PetscFunctionBegin;
2027: /* MatPivotSetUp(): initialize shift context sctx */
2028: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2030: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2031: sctx.shift_top = info->zeropivot;
2032: for (i = 0; i < mbs; i++) {
2033: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2034: d = (aa)[a->diag[i]];
2035: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2036: v = aa + ai[i];
2037: nz = ai[i + 1] - ai[i];
2038: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2039: if (rs > sctx.shift_top) sctx.shift_top = rs;
2040: }
2041: sctx.shift_top *= 1.1;
2042: sctx.nshift_max = 5;
2043: sctx.shift_lo = 0.;
2044: sctx.shift_hi = 1.;
2045: }
2047: PetscCall(ISGetIndices(ip, &rip));
2048: PetscCall(ISGetIndices(iip, &riip));
2050: /* allocate working arrays
2051: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2052: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2053: */
2054: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2056: do {
2057: sctx.newshift = PETSC_FALSE;
2059: for (i = 0; i < mbs; i++) c2r[i] = mbs;
2060: if (mbs) il[0] = 0;
2062: for (k = 0; k < mbs; k++) {
2063: /* zero rtmp */
2064: nz = bi[k + 1] - bi[k];
2065: bjtmp = bj + bi[k];
2066: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2068: /* load in initial unfactored row */
2069: bval = ba + bi[k];
2070: jmin = ai[rip[k]];
2071: jmax = ai[rip[k] + 1];
2072: for (j = jmin; j < jmax; j++) {
2073: col = riip[aj[j]];
2074: if (col >= k) { /* only take upper triangular entry */
2075: rtmp[col] = aa[j];
2076: *bval++ = 0.0; /* for in-place factorization */
2077: }
2078: }
2079: /* shift the diagonal of the matrix: ZeropivotApply() */
2080: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2082: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2083: dk = rtmp[k];
2084: i = c2r[k]; /* first row to be added to k_th row */
2086: while (i < k) {
2087: nexti = c2r[i]; /* next row to be added to k_th row */
2089: /* compute multiplier, update diag(k) and U(i,k) */
2090: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2091: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2092: dk += uikdi * ba[ili]; /* update diag[k] */
2093: ba[ili] = uikdi; /* -U(i,k) */
2095: /* add multiple of row i to k-th row */
2096: jmin = ili + 1;
2097: jmax = bi[i + 1];
2098: if (jmin < jmax) {
2099: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2100: /* update il and c2r for row i */
2101: il[i] = jmin;
2102: j = bj[jmin];
2103: c2r[i] = c2r[j];
2104: c2r[j] = i;
2105: }
2106: i = nexti;
2107: }
2109: /* copy data into U(k,:) */
2110: rs = 0.0;
2111: jmin = bi[k];
2112: jmax = bi[k + 1] - 1;
2113: if (jmin < jmax) {
2114: for (j = jmin; j < jmax; j++) {
2115: col = bj[j];
2116: ba[j] = rtmp[col];
2117: rs += PetscAbsScalar(ba[j]);
2118: }
2119: /* add the k-th row into il and c2r */
2120: il[k] = jmin;
2121: i = bj[jmin];
2122: c2r[k] = c2r[i];
2123: c2r[i] = k;
2124: }
2126: /* MatPivotCheck() */
2127: sctx.rs = rs;
2128: sctx.pv = dk;
2129: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2130: if (sctx.newshift) break;
2131: dk = sctx.pv;
2133: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2134: }
2135: } while (sctx.newshift);
2137: PetscCall(PetscFree3(rtmp, il, c2r));
2138: PetscCall(ISRestoreIndices(ip, &rip));
2139: PetscCall(ISRestoreIndices(iip, &riip));
2141: PetscCall(ISIdentity(ip, &perm_identity));
2142: if (perm_identity) {
2143: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2144: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2145: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2146: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2147: } else {
2148: B->ops->solve = MatSolve_SeqSBAIJ_1;
2149: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2150: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2151: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2152: }
2154: C->assembled = PETSC_TRUE;
2155: C->preallocated = PETSC_TRUE;
2157: PetscCall(PetscLogFlops(C->rmap->n));
2159: /* MatPivotView() */
2160: if (sctx.nshift) {
2161: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2162: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2163: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2164: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2165: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2166: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2167: }
2168: }
2169: PetscFunctionReturn(PETSC_SUCCESS);
2170: }
2172: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2173: {
2174: Mat C = B;
2175: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2176: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2177: IS ip = b->row, iip = b->icol;
2178: const PetscInt *rip, *riip;
2179: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2180: PetscInt *ai = a->i, *aj = a->j;
2181: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2182: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2183: PetscBool perm_identity;
2184: FactorShiftCtx sctx;
2185: PetscReal rs;
2186: MatScalar d, *v;
2188: PetscFunctionBegin;
2189: /* MatPivotSetUp(): initialize shift context sctx */
2190: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2192: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2193: sctx.shift_top = info->zeropivot;
2194: for (i = 0; i < mbs; i++) {
2195: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2196: d = (aa)[a->diag[i]];
2197: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2198: v = aa + ai[i];
2199: nz = ai[i + 1] - ai[i];
2200: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2201: if (rs > sctx.shift_top) sctx.shift_top = rs;
2202: }
2203: sctx.shift_top *= 1.1;
2204: sctx.nshift_max = 5;
2205: sctx.shift_lo = 0.;
2206: sctx.shift_hi = 1.;
2207: }
2209: PetscCall(ISGetIndices(ip, &rip));
2210: PetscCall(ISGetIndices(iip, &riip));
2212: /* initialization */
2213: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2215: do {
2216: sctx.newshift = PETSC_FALSE;
2218: for (i = 0; i < mbs; i++) jl[i] = mbs;
2219: il[0] = 0;
2221: for (k = 0; k < mbs; k++) {
2222: /* zero rtmp */
2223: nz = bi[k + 1] - bi[k];
2224: bjtmp = bj + bi[k];
2225: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2227: bval = ba + bi[k];
2228: /* initialize k-th row by the perm[k]-th row of A */
2229: jmin = ai[rip[k]];
2230: jmax = ai[rip[k] + 1];
2231: for (j = jmin; j < jmax; j++) {
2232: col = riip[aj[j]];
2233: if (col >= k) { /* only take upper triangular entry */
2234: rtmp[col] = aa[j];
2235: *bval++ = 0.0; /* for in-place factorization */
2236: }
2237: }
2238: /* shift the diagonal of the matrix */
2239: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2241: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2242: dk = rtmp[k];
2243: i = jl[k]; /* first row to be added to k_th row */
2245: while (i < k) {
2246: nexti = jl[i]; /* next row to be added to k_th row */
2248: /* compute multiplier, update diag(k) and U(i,k) */
2249: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2250: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2251: dk += uikdi * ba[ili];
2252: ba[ili] = uikdi; /* -U(i,k) */
2254: /* add multiple of row i to k-th row */
2255: jmin = ili + 1;
2256: jmax = bi[i + 1];
2257: if (jmin < jmax) {
2258: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2259: /* update il and jl for row i */
2260: il[i] = jmin;
2261: j = bj[jmin];
2262: jl[i] = jl[j];
2263: jl[j] = i;
2264: }
2265: i = nexti;
2266: }
2268: /* shift the diagonals when zero pivot is detected */
2269: /* compute rs=sum of abs(off-diagonal) */
2270: rs = 0.0;
2271: jmin = bi[k] + 1;
2272: nz = bi[k + 1] - jmin;
2273: bcol = bj + jmin;
2274: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2276: sctx.rs = rs;
2277: sctx.pv = dk;
2278: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2279: if (sctx.newshift) break;
2280: dk = sctx.pv;
2282: /* copy data into U(k,:) */
2283: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2284: jmin = bi[k] + 1;
2285: jmax = bi[k + 1];
2286: if (jmin < jmax) {
2287: for (j = jmin; j < jmax; j++) {
2288: col = bj[j];
2289: ba[j] = rtmp[col];
2290: }
2291: /* add the k-th row into il and jl */
2292: il[k] = jmin;
2293: i = bj[jmin];
2294: jl[k] = jl[i];
2295: jl[i] = k;
2296: }
2297: }
2298: } while (sctx.newshift);
2300: PetscCall(PetscFree3(rtmp, il, jl));
2301: PetscCall(ISRestoreIndices(ip, &rip));
2302: PetscCall(ISRestoreIndices(iip, &riip));
2304: PetscCall(ISIdentity(ip, &perm_identity));
2305: if (perm_identity) {
2306: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2307: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2308: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2309: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2310: } else {
2311: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2312: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2313: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2314: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2315: }
2317: C->assembled = PETSC_TRUE;
2318: C->preallocated = PETSC_TRUE;
2320: PetscCall(PetscLogFlops(C->rmap->n));
2321: if (sctx.nshift) {
2322: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2323: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2324: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2325: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2326: }
2327: }
2328: PetscFunctionReturn(PETSC_SUCCESS);
2329: }
2331: /*
2332: icc() under revised new data structure.
2333: Factored arrays bj and ba are stored as
2334: U(0,:),...,U(i,:),U(n-1,:)
2336: ui=fact->i is an array of size n+1, in which
2337: ui+
2338: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2339: ui[n]: points to U(n-1,n-1)+1
2341: udiag=fact->diag is an array of size n,in which
2342: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2344: U(i,:) contains udiag[i] as its last entry, i.e.,
2345: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2346: */
2348: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2349: {
2350: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2351: Mat_SeqSBAIJ *b;
2352: PetscBool perm_identity, missing;
2353: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2354: const PetscInt *rip, *riip;
2355: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2356: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2357: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2358: PetscReal fill = info->fill, levels = info->levels;
2359: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2360: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2361: PetscBT lnkbt;
2362: IS iperm;
2364: PetscFunctionBegin;
2365: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2366: PetscCall(MatMissingDiagonal(A, &missing, &d));
2367: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2368: PetscCall(ISIdentity(perm, &perm_identity));
2369: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2371: PetscCall(PetscMalloc1(am + 1, &ui));
2372: PetscCall(PetscMalloc1(am + 1, &udiag));
2373: ui[0] = 0;
2375: /* ICC(0) without matrix ordering: simply rearrange column indices */
2376: if (!levels && perm_identity) {
2377: for (i = 0; i < am; i++) {
2378: ncols = ai[i + 1] - a->diag[i];
2379: ui[i + 1] = ui[i] + ncols;
2380: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2381: }
2382: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2383: cols = uj;
2384: for (i = 0; i < am; i++) {
2385: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2386: ncols = ai[i + 1] - a->diag[i] - 1;
2387: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2388: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2389: }
2390: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2391: PetscCall(ISGetIndices(iperm, &riip));
2392: PetscCall(ISGetIndices(perm, &rip));
2394: /* initialization */
2395: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2397: /* jl: linked list for storing indices of the pivot rows
2398: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2399: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2400: for (i = 0; i < am; i++) {
2401: jl[i] = am;
2402: il[i] = 0;
2403: }
2405: /* create and initialize a linked list for storing column indices of the active row k */
2406: nlnk = am + 1;
2407: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2409: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2410: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2411: current_space = free_space;
2412: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2413: current_space_lvl = free_space_lvl;
2415: for (k = 0; k < am; k++) { /* for each active row k */
2416: /* initialize lnk by the column indices of row rip[k] of A */
2417: nzk = 0;
2418: ncols = ai[rip[k] + 1] - ai[rip[k]];
2419: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2420: ncols_upper = 0;
2421: for (j = 0; j < ncols; j++) {
2422: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2423: if (riip[i] >= k) { /* only take upper triangular entry */
2424: ajtmp[ncols_upper] = i;
2425: ncols_upper++;
2426: }
2427: }
2428: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2429: nzk += nlnk;
2431: /* update lnk by computing fill-in for each pivot row to be merged in */
2432: prow = jl[k]; /* 1st pivot row */
2434: while (prow < k) {
2435: nextprow = jl[prow];
2437: /* merge prow into k-th row */
2438: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2439: jmax = ui[prow + 1];
2440: ncols = jmax - jmin;
2441: i = jmin - ui[prow];
2442: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2443: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2444: j = *(uj - 1);
2445: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2446: nzk += nlnk;
2448: /* update il and jl for prow */
2449: if (jmin < jmax) {
2450: il[prow] = jmin;
2451: j = *cols;
2452: jl[prow] = jl[j];
2453: jl[j] = prow;
2454: }
2455: prow = nextprow;
2456: }
2458: /* if free space is not available, make more free space */
2459: if (current_space->local_remaining < nzk) {
2460: i = am - k + 1; /* num of unfactored rows */
2461: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2462: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2463: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2464: reallocs++;
2465: }
2467: /* copy data into free_space and free_space_lvl, then initialize lnk */
2468: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2469: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2471: /* add the k-th row into il and jl */
2472: if (nzk > 1) {
2473: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2474: jl[k] = jl[i];
2475: jl[i] = k;
2476: il[k] = ui[k] + 1;
2477: }
2478: uj_ptr[k] = current_space->array;
2479: uj_lvl_ptr[k] = current_space_lvl->array;
2481: current_space->array += nzk;
2482: current_space->local_used += nzk;
2483: current_space->local_remaining -= nzk;
2485: current_space_lvl->array += nzk;
2486: current_space_lvl->local_used += nzk;
2487: current_space_lvl->local_remaining -= nzk;
2489: ui[k + 1] = ui[k] + nzk;
2490: }
2492: PetscCall(ISRestoreIndices(perm, &rip));
2493: PetscCall(ISRestoreIndices(iperm, &riip));
2494: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2495: PetscCall(PetscFree(ajtmp));
2497: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2498: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2499: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2500: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2501: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2503: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2505: /* put together the new matrix in MATSEQSBAIJ format */
2506: b = (Mat_SeqSBAIJ *)(fact)->data;
2507: b->singlemalloc = PETSC_FALSE;
2509: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2511: b->j = uj;
2512: b->i = ui;
2513: b->diag = udiag;
2514: b->free_diag = PETSC_TRUE;
2515: b->ilen = NULL;
2516: b->imax = NULL;
2517: b->row = perm;
2518: b->col = perm;
2519: PetscCall(PetscObjectReference((PetscObject)perm));
2520: PetscCall(PetscObjectReference((PetscObject)perm));
2521: b->icol = iperm;
2522: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2524: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2526: b->maxnz = b->nz = ui[am];
2527: b->free_a = PETSC_TRUE;
2528: b->free_ij = PETSC_TRUE;
2530: fact->info.factor_mallocs = reallocs;
2531: fact->info.fill_ratio_given = fill;
2532: if (ai[am] != 0) {
2533: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2534: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2535: } else {
2536: fact->info.fill_ratio_needed = 0.0;
2537: }
2538: #if defined(PETSC_USE_INFO)
2539: if (ai[am] != 0) {
2540: PetscReal af = fact->info.fill_ratio_needed;
2541: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2542: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2543: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2544: } else {
2545: PetscCall(PetscInfo(A, "Empty matrix\n"));
2546: }
2547: #endif
2548: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2549: PetscFunctionReturn(PETSC_SUCCESS);
2550: }
2552: #if 0
2553: // unused
2554: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2555: {
2556: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2557: Mat_SeqSBAIJ *b;
2558: PetscBool perm_identity, missing;
2559: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2560: const PetscInt *rip, *riip;
2561: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2562: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2563: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2564: PetscReal fill = info->fill, levels = info->levels;
2565: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2566: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2567: PetscBT lnkbt;
2568: IS iperm;
2570: PetscFunctionBegin;
2571: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2572: PetscCall(MatMissingDiagonal(A, &missing, &d));
2573: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2574: PetscCall(ISIdentity(perm, &perm_identity));
2575: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2577: PetscCall(PetscMalloc1(am + 1, &ui));
2578: PetscCall(PetscMalloc1(am + 1, &udiag));
2579: ui[0] = 0;
2581: /* ICC(0) without matrix ordering: simply copies fill pattern */
2582: if (!levels && perm_identity) {
2583: for (i = 0; i < am; i++) {
2584: ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2585: udiag[i] = ui[i];
2586: }
2587: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2588: cols = uj;
2589: for (i = 0; i < am; i++) {
2590: aj = a->j + a->diag[i];
2591: ncols = ui[i + 1] - ui[i];
2592: for (j = 0; j < ncols; j++) *cols++ = *aj++;
2593: }
2594: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2595: PetscCall(ISGetIndices(iperm, &riip));
2596: PetscCall(ISGetIndices(perm, &rip));
2598: /* initialization */
2599: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2601: /* jl: linked list for storing indices of the pivot rows
2602: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2603: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2604: for (i = 0; i < am; i++) {
2605: jl[i] = am;
2606: il[i] = 0;
2607: }
2609: /* create and initialize a linked list for storing column indices of the active row k */
2610: nlnk = am + 1;
2611: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2613: /* initial FreeSpace size is fill*(ai[am]+1) */
2614: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2615: current_space = free_space;
2616: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2617: current_space_lvl = free_space_lvl;
2619: for (k = 0; k < am; k++) { /* for each active row k */
2620: /* initialize lnk by the column indices of row rip[k] of A */
2621: nzk = 0;
2622: ncols = ai[rip[k] + 1] - ai[rip[k]];
2623: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2624: ncols_upper = 0;
2625: for (j = 0; j < ncols; j++) {
2626: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2627: if (riip[i] >= k) { /* only take upper triangular entry */
2628: ajtmp[ncols_upper] = i;
2629: ncols_upper++;
2630: }
2631: }
2632: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2633: nzk += nlnk;
2635: /* update lnk by computing fill-in for each pivot row to be merged in */
2636: prow = jl[k]; /* 1st pivot row */
2638: while (prow < k) {
2639: nextprow = jl[prow];
2641: /* merge prow into k-th row */
2642: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2643: jmax = ui[prow + 1];
2644: ncols = jmax - jmin;
2645: i = jmin - ui[prow];
2646: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2647: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2648: j = *(uj - 1);
2649: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2650: nzk += nlnk;
2652: /* update il and jl for prow */
2653: if (jmin < jmax) {
2654: il[prow] = jmin;
2655: j = *cols;
2656: jl[prow] = jl[j];
2657: jl[j] = prow;
2658: }
2659: prow = nextprow;
2660: }
2662: /* if free space is not available, make more free space */
2663: if (current_space->local_remaining < nzk) {
2664: i = am - k + 1; /* num of unfactored rows */
2665: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2666: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2667: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2668: reallocs++;
2669: }
2671: /* copy data into free_space and free_space_lvl, then initialize lnk */
2672: PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2673: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2675: /* add the k-th row into il and jl */
2676: if (nzk > 1) {
2677: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2678: jl[k] = jl[i];
2679: jl[i] = k;
2680: il[k] = ui[k] + 1;
2681: }
2682: uj_ptr[k] = current_space->array;
2683: uj_lvl_ptr[k] = current_space_lvl->array;
2685: current_space->array += nzk;
2686: current_space->local_used += nzk;
2687: current_space->local_remaining -= nzk;
2689: current_space_lvl->array += nzk;
2690: current_space_lvl->local_used += nzk;
2691: current_space_lvl->local_remaining -= nzk;
2693: ui[k + 1] = ui[k] + nzk;
2694: }
2696: #if defined(PETSC_USE_INFO)
2697: if (ai[am] != 0) {
2698: PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2699: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2700: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2701: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2702: } else {
2703: PetscCall(PetscInfo(A, "Empty matrix\n"));
2704: }
2705: #endif
2707: PetscCall(ISRestoreIndices(perm, &rip));
2708: PetscCall(ISRestoreIndices(iperm, &riip));
2709: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2710: PetscCall(PetscFree(ajtmp));
2712: /* destroy list of free space and other temporary array(s) */
2713: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2714: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2715: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2716: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2718: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2720: /* put together the new matrix in MATSEQSBAIJ format */
2722: b = (Mat_SeqSBAIJ *)fact->data;
2723: b->singlemalloc = PETSC_FALSE;
2725: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2727: b->j = uj;
2728: b->i = ui;
2729: b->diag = udiag;
2730: b->free_diag = PETSC_TRUE;
2731: b->ilen = NULL;
2732: b->imax = NULL;
2733: b->row = perm;
2734: b->col = perm;
2736: PetscCall(PetscObjectReference((PetscObject)perm));
2737: PetscCall(PetscObjectReference((PetscObject)perm));
2739: b->icol = iperm;
2740: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2741: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2742: b->maxnz = b->nz = ui[am];
2743: b->free_a = PETSC_TRUE;
2744: b->free_ij = PETSC_TRUE;
2746: fact->info.factor_mallocs = reallocs;
2747: fact->info.fill_ratio_given = fill;
2748: if (ai[am] != 0) {
2749: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2750: } else {
2751: fact->info.fill_ratio_needed = 0.0;
2752: }
2753: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2754: PetscFunctionReturn(PETSC_SUCCESS);
2755: }
2756: #endif
2758: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2759: {
2760: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2761: Mat_SeqSBAIJ *b;
2762: PetscBool perm_identity, missing;
2763: PetscReal fill = info->fill;
2764: const PetscInt *rip, *riip;
2765: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2766: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2767: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2768: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2769: PetscBT lnkbt;
2770: IS iperm;
2772: PetscFunctionBegin;
2773: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2774: PetscCall(MatMissingDiagonal(A, &missing, &i));
2775: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2777: /* check whether perm is the identity mapping */
2778: PetscCall(ISIdentity(perm, &perm_identity));
2779: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2780: PetscCall(ISGetIndices(iperm, &riip));
2781: PetscCall(ISGetIndices(perm, &rip));
2783: /* initialization */
2784: PetscCall(PetscMalloc1(am + 1, &ui));
2785: PetscCall(PetscMalloc1(am + 1, &udiag));
2786: ui[0] = 0;
2788: /* jl: linked list for storing indices of the pivot rows
2789: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2790: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2791: for (i = 0; i < am; i++) {
2792: jl[i] = am;
2793: il[i] = 0;
2794: }
2796: /* create and initialize a linked list for storing column indices of the active row k */
2797: nlnk = am + 1;
2798: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2800: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2801: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2802: current_space = free_space;
2804: for (k = 0; k < am; k++) { /* for each active row k */
2805: /* initialize lnk by the column indices of row rip[k] of A */
2806: nzk = 0;
2807: ncols = ai[rip[k] + 1] - ai[rip[k]];
2808: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2809: ncols_upper = 0;
2810: for (j = 0; j < ncols; j++) {
2811: i = riip[*(aj + ai[rip[k]] + j)];
2812: if (i >= k) { /* only take upper triangular entry */
2813: cols[ncols_upper] = i;
2814: ncols_upper++;
2815: }
2816: }
2817: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2818: nzk += nlnk;
2820: /* update lnk by computing fill-in for each pivot row to be merged in */
2821: prow = jl[k]; /* 1st pivot row */
2823: while (prow < k) {
2824: nextprow = jl[prow];
2825: /* merge prow into k-th row */
2826: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2827: jmax = ui[prow + 1];
2828: ncols = jmax - jmin;
2829: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2830: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2831: nzk += nlnk;
2833: /* update il and jl for prow */
2834: if (jmin < jmax) {
2835: il[prow] = jmin;
2836: j = *uj_ptr;
2837: jl[prow] = jl[j];
2838: jl[j] = prow;
2839: }
2840: prow = nextprow;
2841: }
2843: /* if free space is not available, make more free space */
2844: if (current_space->local_remaining < nzk) {
2845: i = am - k + 1; /* num of unfactored rows */
2846: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2847: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2848: reallocs++;
2849: }
2851: /* copy data into free space, then initialize lnk */
2852: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2854: /* add the k-th row into il and jl */
2855: if (nzk > 1) {
2856: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2857: jl[k] = jl[i];
2858: jl[i] = k;
2859: il[k] = ui[k] + 1;
2860: }
2861: ui_ptr[k] = current_space->array;
2863: current_space->array += nzk;
2864: current_space->local_used += nzk;
2865: current_space->local_remaining -= nzk;
2867: ui[k + 1] = ui[k] + nzk;
2868: }
2870: PetscCall(ISRestoreIndices(perm, &rip));
2871: PetscCall(ISRestoreIndices(iperm, &riip));
2872: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2874: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2875: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2876: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2877: PetscCall(PetscLLDestroy(lnk, lnkbt));
2879: /* put together the new matrix in MATSEQSBAIJ format */
2881: b = (Mat_SeqSBAIJ *)fact->data;
2882: b->singlemalloc = PETSC_FALSE;
2883: b->free_a = PETSC_TRUE;
2884: b->free_ij = PETSC_TRUE;
2886: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
2888: b->j = uj;
2889: b->i = ui;
2890: b->diag = udiag;
2891: b->free_diag = PETSC_TRUE;
2892: b->ilen = NULL;
2893: b->imax = NULL;
2894: b->row = perm;
2895: b->col = perm;
2897: PetscCall(PetscObjectReference((PetscObject)perm));
2898: PetscCall(PetscObjectReference((PetscObject)perm));
2900: b->icol = iperm;
2901: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2903: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2905: b->maxnz = b->nz = ui[am];
2907: fact->info.factor_mallocs = reallocs;
2908: fact->info.fill_ratio_given = fill;
2909: if (ai[am] != 0) {
2910: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2911: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2912: } else {
2913: fact->info.fill_ratio_needed = 0.0;
2914: }
2915: #if defined(PETSC_USE_INFO)
2916: if (ai[am] != 0) {
2917: PetscReal af = fact->info.fill_ratio_needed;
2918: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2919: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2920: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2921: } else {
2922: PetscCall(PetscInfo(A, "Empty matrix\n"));
2923: }
2924: #endif
2925: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2926: PetscFunctionReturn(PETSC_SUCCESS);
2927: }
2929: #if 0
2930: // unused
2931: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2932: {
2933: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2934: Mat_SeqSBAIJ *b;
2935: PetscBool perm_identity, missing;
2936: PetscReal fill = info->fill;
2937: const PetscInt *rip, *riip;
2938: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2939: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2940: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2941: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2942: PetscBT lnkbt;
2943: IS iperm;
2945: PetscFunctionBegin;
2946: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2947: PetscCall(MatMissingDiagonal(A, &missing, &i));
2948: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2950: /* check whether perm is the identity mapping */
2951: PetscCall(ISIdentity(perm, &perm_identity));
2952: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2953: PetscCall(ISGetIndices(iperm, &riip));
2954: PetscCall(ISGetIndices(perm, &rip));
2956: /* initialization */
2957: PetscCall(PetscMalloc1(am + 1, &ui));
2958: ui[0] = 0;
2960: /* jl: linked list for storing indices of the pivot rows
2961: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2962: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2963: for (i = 0; i < am; i++) {
2964: jl[i] = am;
2965: il[i] = 0;
2966: }
2968: /* create and initialize a linked list for storing column indices of the active row k */
2969: nlnk = am + 1;
2970: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2972: /* initial FreeSpace size is fill*(ai[am]+1) */
2973: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2974: current_space = free_space;
2976: for (k = 0; k < am; k++) { /* for each active row k */
2977: /* initialize lnk by the column indices of row rip[k] of A */
2978: nzk = 0;
2979: ncols = ai[rip[k] + 1] - ai[rip[k]];
2980: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2981: ncols_upper = 0;
2982: for (j = 0; j < ncols; j++) {
2983: i = riip[*(aj + ai[rip[k]] + j)];
2984: if (i >= k) { /* only take upper triangular entry */
2985: cols[ncols_upper] = i;
2986: ncols_upper++;
2987: }
2988: }
2989: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2990: nzk += nlnk;
2992: /* update lnk by computing fill-in for each pivot row to be merged in */
2993: prow = jl[k]; /* 1st pivot row */
2995: while (prow < k) {
2996: nextprow = jl[prow];
2997: /* merge prow into k-th row */
2998: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2999: jmax = ui[prow + 1];
3000: ncols = jmax - jmin;
3001: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3002: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3003: nzk += nlnk;
3005: /* update il and jl for prow */
3006: if (jmin < jmax) {
3007: il[prow] = jmin;
3008: j = *uj_ptr;
3009: jl[prow] = jl[j];
3010: jl[j] = prow;
3011: }
3012: prow = nextprow;
3013: }
3015: /* if free space is not available, make more free space */
3016: if (current_space->local_remaining < nzk) {
3017: i = am - k + 1; /* num of unfactored rows */
3018: i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3019: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
3020: reallocs++;
3021: }
3023: /* copy data into free space, then initialize lnk */
3024: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3026: /* add the k-th row into il and jl */
3027: if (nzk - 1 > 0) {
3028: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3029: jl[k] = jl[i];
3030: jl[i] = k;
3031: il[k] = ui[k] + 1;
3032: }
3033: ui_ptr[k] = current_space->array;
3035: current_space->array += nzk;
3036: current_space->local_used += nzk;
3037: current_space->local_remaining -= nzk;
3039: ui[k + 1] = ui[k] + nzk;
3040: }
3042: #if defined(PETSC_USE_INFO)
3043: if (ai[am] != 0) {
3044: PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3045: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3046: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3047: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3048: } else {
3049: PetscCall(PetscInfo(A, "Empty matrix\n"));
3050: }
3051: #endif
3053: PetscCall(ISRestoreIndices(perm, &rip));
3054: PetscCall(ISRestoreIndices(iperm, &riip));
3055: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3057: /* destroy list of free space and other temporary array(s) */
3058: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3059: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3060: PetscCall(PetscLLDestroy(lnk, lnkbt));
3062: /* put together the new matrix in MATSEQSBAIJ format */
3064: b = (Mat_SeqSBAIJ *)fact->data;
3065: b->singlemalloc = PETSC_FALSE;
3066: b->free_a = PETSC_TRUE;
3067: b->free_ij = PETSC_TRUE;
3069: PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
3071: b->j = uj;
3072: b->i = ui;
3073: b->diag = NULL;
3074: b->ilen = NULL;
3075: b->imax = NULL;
3076: b->row = perm;
3077: b->col = perm;
3079: PetscCall(PetscObjectReference((PetscObject)perm));
3080: PetscCall(PetscObjectReference((PetscObject)perm));
3082: b->icol = iperm;
3083: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3085: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3086: b->maxnz = b->nz = ui[am];
3088: fact->info.factor_mallocs = reallocs;
3089: fact->info.fill_ratio_given = fill;
3090: if (ai[am] != 0) {
3091: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3092: } else {
3093: fact->info.fill_ratio_needed = 0.0;
3094: }
3095: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3096: PetscFunctionReturn(PETSC_SUCCESS);
3097: }
3098: #endif
3100: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3101: {
3102: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3103: PetscInt n = A->rmap->n;
3104: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3105: PetscScalar *x, sum;
3106: const PetscScalar *b;
3107: const MatScalar *aa = a->a, *v;
3108: PetscInt i, nz;
3110: PetscFunctionBegin;
3111: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3113: PetscCall(VecGetArrayRead(bb, &b));
3114: PetscCall(VecGetArrayWrite(xx, &x));
3116: /* forward solve the lower triangular */
3117: x[0] = b[0];
3118: v = aa;
3119: vi = aj;
3120: for (i = 1; i < n; i++) {
3121: nz = ai[i + 1] - ai[i];
3122: sum = b[i];
3123: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3124: v += nz;
3125: vi += nz;
3126: x[i] = sum;
3127: }
3129: /* backward solve the upper triangular */
3130: for (i = n - 1; i >= 0; i--) {
3131: v = aa + adiag[i + 1] + 1;
3132: vi = aj + adiag[i + 1] + 1;
3133: nz = adiag[i] - adiag[i + 1] - 1;
3134: sum = x[i];
3135: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3136: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3137: }
3139: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3140: PetscCall(VecRestoreArrayRead(bb, &b));
3141: PetscCall(VecRestoreArrayWrite(xx, &x));
3142: PetscFunctionReturn(PETSC_SUCCESS);
3143: }
3145: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3146: {
3147: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3148: IS iscol = a->col, isrow = a->row;
3149: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3150: const PetscInt *rout, *cout, *r, *c;
3151: PetscScalar *x, *tmp, sum;
3152: const PetscScalar *b;
3153: const MatScalar *aa = a->a, *v;
3155: PetscFunctionBegin;
3156: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3158: PetscCall(VecGetArrayRead(bb, &b));
3159: PetscCall(VecGetArrayWrite(xx, &x));
3160: tmp = a->solve_work;
3162: PetscCall(ISGetIndices(isrow, &rout));
3163: r = rout;
3164: PetscCall(ISGetIndices(iscol, &cout));
3165: c = cout;
3167: /* forward solve the lower triangular */
3168: tmp[0] = b[r[0]];
3169: v = aa;
3170: vi = aj;
3171: for (i = 1; i < n; i++) {
3172: nz = ai[i + 1] - ai[i];
3173: sum = b[r[i]];
3174: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3175: tmp[i] = sum;
3176: v += nz;
3177: vi += nz;
3178: }
3180: /* backward solve the upper triangular */
3181: for (i = n - 1; i >= 0; i--) {
3182: v = aa + adiag[i + 1] + 1;
3183: vi = aj + adiag[i + 1] + 1;
3184: nz = adiag[i] - adiag[i + 1] - 1;
3185: sum = tmp[i];
3186: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3187: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3188: }
3190: PetscCall(ISRestoreIndices(isrow, &rout));
3191: PetscCall(ISRestoreIndices(iscol, &cout));
3192: PetscCall(VecRestoreArrayRead(bb, &b));
3193: PetscCall(VecRestoreArrayWrite(xx, &x));
3194: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3195: PetscFunctionReturn(PETSC_SUCCESS);
3196: }
3198: #if 0
3199: // unused
3200: /*
3201: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3202: */
3203: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3204: {
3205: Mat B = *fact;
3206: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
3207: IS isicol;
3208: const PetscInt *r, *ic;
3209: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3210: PetscInt *bi, *bj, *bdiag, *bdiag_rev;
3211: PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3212: PetscInt nlnk, *lnk;
3213: PetscBT lnkbt;
3214: PetscBool row_identity, icol_identity;
3215: MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3216: const PetscInt *ics;
3217: PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3218: PetscReal dt = info->dt, shift = info->shiftamount;
3219: PetscInt dtcount = (PetscInt)info->dtcount, nnz_max;
3220: PetscBool missing;
3222: PetscFunctionBegin;
3223: if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3224: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3226: /* symbolic factorization, can be reused */
3227: PetscCall(MatMissingDiagonal(A, &missing, &i));
3228: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3229: adiag = a->diag;
3231: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3233: /* bdiag is location of diagonal in factor */
3234: PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */
3235: PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3237: /* allocate row pointers bi */
3238: PetscCall(PetscMalloc1(2 * n + 2, &bi));
3240: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3241: if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3242: nnz_max = ai[n] + 2 * n * dtcount + 2;
3244: PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3245: PetscCall(PetscMalloc1(nnz_max + 1, &ba));
3247: /* put together the new matrix */
3248: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3249: b = (Mat_SeqAIJ *)B->data;
3251: b->free_a = PETSC_TRUE;
3252: b->free_ij = PETSC_TRUE;
3253: b->singlemalloc = PETSC_FALSE;
3255: b->a = ba;
3256: b->j = bj;
3257: b->i = bi;
3258: b->diag = bdiag;
3259: b->ilen = NULL;
3260: b->imax = NULL;
3261: b->row = isrow;
3262: b->col = iscol;
3263: PetscCall(PetscObjectReference((PetscObject)isrow));
3264: PetscCall(PetscObjectReference((PetscObject)iscol));
3265: b->icol = isicol;
3267: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3268: b->maxnz = nnz_max;
3270: B->factortype = MAT_FACTOR_ILUDT;
3271: B->info.factor_mallocs = 0;
3272: B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3273: /* end of symbolic factorization */
3275: PetscCall(ISGetIndices(isrow, &r));
3276: PetscCall(ISGetIndices(isicol, &ic));
3277: ics = ic;
3279: /* linked list for storing column indices of the active row */
3280: nlnk = n + 1;
3281: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3283: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3284: PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3285: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3286: PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3287: PetscCall(PetscArrayzero(rtmp, n));
3289: bi[0] = 0;
3290: bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */
3291: bdiag_rev[n] = bdiag[0];
3292: bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3293: for (i = 0; i < n; i++) {
3294: /* copy initial fill into linked list */
3295: nzi = ai[r[i] + 1] - ai[r[i]];
3296: PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3297: nzi_al = adiag[r[i]] - ai[r[i]];
3298: nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3299: ajtmp = aj + ai[r[i]];
3300: PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3302: /* load in initial (unfactored row) */
3303: aatmp = a->a + ai[r[i]];
3304: for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3306: /* add pivot rows into linked list */
3307: row = lnk[n];
3308: while (row < i) {
3309: nzi_bl = bi[row + 1] - bi[row] + 1;
3310: bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3311: PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3312: nzi += nlnk;
3313: row = lnk[row];
3314: }
3316: /* copy data from lnk into jtmp, then initialize lnk */
3317: PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3319: /* numerical factorization */
3320: bjtmp = jtmp;
3321: row = *bjtmp++; /* 1st pivot row */
3322: while (row < i) {
3323: pc = rtmp + row;
3324: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3325: multiplier = (*pc) * (*pv);
3326: *pc = multiplier;
3327: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3328: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3329: pv = ba + bdiag[row + 1] + 1;
3330: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3331: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3332: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3333: }
3334: row = *bjtmp++;
3335: }
3337: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3338: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3339: nzi_bl = 0;
3340: j = 0;
3341: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3342: vtmp[j] = rtmp[jtmp[j]];
3343: rtmp[jtmp[j]] = 0.0;
3344: nzi_bl++;
3345: j++;
3346: }
3347: nzi_bu = nzi - nzi_bl - 1;
3348: while (j < nzi) {
3349: vtmp[j] = rtmp[jtmp[j]];
3350: rtmp[jtmp[j]] = 0.0;
3351: j++;
3352: }
3354: bjtmp = bj + bi[i];
3355: batmp = ba + bi[i];
3356: /* apply level dropping rule to L part */
3357: ncut = nzi_al + dtcount;
3358: if (ncut < nzi_bl) {
3359: PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3360: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3361: } else {
3362: ncut = nzi_bl;
3363: }
3364: for (j = 0; j < ncut; j++) {
3365: bjtmp[j] = jtmp[j];
3366: batmp[j] = vtmp[j];
3367: }
3368: bi[i + 1] = bi[i] + ncut;
3369: nzi = ncut + 1;
3371: /* apply level dropping rule to U part */
3372: ncut = nzi_au + dtcount;
3373: if (ncut < nzi_bu) {
3374: PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3375: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3376: } else {
3377: ncut = nzi_bu;
3378: }
3379: nzi += ncut;
3381: /* mark bdiagonal */
3382: bdiag[i + 1] = bdiag[i] - (ncut + 1);
3383: bdiag_rev[n - i - 1] = bdiag[i + 1];
3384: bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1);
3385: bjtmp = bj + bdiag[i];
3386: batmp = ba + bdiag[i];
3387: *bjtmp = i;
3388: *batmp = diag_tmp; /* rtmp[i]; */
3389: if (*batmp == 0.0) *batmp = dt + shift;
3390: *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3392: bjtmp = bj + bdiag[i + 1] + 1;
3393: batmp = ba + bdiag[i + 1] + 1;
3394: for (k = 0; k < ncut; k++) {
3395: bjtmp[k] = jtmp[nzi_bl + 1 + k];
3396: batmp[k] = vtmp[nzi_bl + 1 + k];
3397: }
3399: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3400: } /* for (i=0; i<n; i++) */
3401: PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3403: PetscCall(ISRestoreIndices(isrow, &r));
3404: PetscCall(ISRestoreIndices(isicol, &ic));
3406: PetscCall(PetscLLDestroy(lnk, lnkbt));
3407: PetscCall(PetscFree2(im, jtmp));
3408: PetscCall(PetscFree2(rtmp, vtmp));
3409: PetscCall(PetscFree(bdiag_rev));
3411: PetscCall(PetscLogFlops(B->cmap->n));
3412: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3414: PetscCall(ISIdentity(isrow, &row_identity));
3415: PetscCall(ISIdentity(isicol, &icol_identity));
3416: if (row_identity && icol_identity) {
3417: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3418: } else {
3419: B->ops->solve = MatSolve_SeqAIJ;
3420: }
3422: B->ops->solveadd = NULL;
3423: B->ops->solvetranspose = NULL;
3424: B->ops->solvetransposeadd = NULL;
3425: B->ops->matsolve = NULL;
3426: B->assembled = PETSC_TRUE;
3427: B->preallocated = PETSC_TRUE;
3428: PetscFunctionReturn(PETSC_SUCCESS);
3429: }
3431: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3432: /*
3433: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3434: */
3436: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3437: {
3438: PetscFunctionBegin;
3439: PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3440: PetscFunctionReturn(PETSC_SUCCESS);
3441: }
3443: /*
3444: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3445: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3446: */
3447: /*
3448: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3449: */
3451: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3452: {
3453: Mat C = fact;
3454: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3455: IS isrow = b->row, isicol = b->icol;
3456: const PetscInt *r, *ic, *ics;
3457: PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3458: PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3459: MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3460: PetscReal dt = info->dt, shift = info->shiftamount;
3461: PetscBool row_identity, col_identity;
3463: PetscFunctionBegin;
3464: PetscCall(ISGetIndices(isrow, &r));
3465: PetscCall(ISGetIndices(isicol, &ic));
3466: PetscCall(PetscMalloc1(n + 1, &rtmp));
3467: ics = ic;
3469: for (i = 0; i < n; i++) {
3470: /* initialize rtmp array */
3471: nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3472: bjtmp = bj + bi[i];
3473: for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3474: rtmp[i] = 0.0;
3475: nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3476: bjtmp = bj + bdiag[i + 1] + 1;
3477: for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3479: /* load in initial unfactored row of A */
3480: nz = ai[r[i] + 1] - ai[r[i]];
3481: ajtmp = aj + ai[r[i]];
3482: v = aa + ai[r[i]];
3483: for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3485: /* numerical factorization */
3486: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3487: nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3488: k = 0;
3489: while (k < nzl) {
3490: row = *bjtmp++;
3491: pc = rtmp + row;
3492: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3493: multiplier = (*pc) * (*pv);
3494: *pc = multiplier;
3495: if (PetscAbsScalar(multiplier) > dt) {
3496: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3497: pv = b->a + bdiag[row + 1] + 1;
3498: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3499: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3500: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3501: }
3502: k++;
3503: }
3505: /* finished row so stick it into b->a */
3506: /* L-part */
3507: pv = b->a + bi[i];
3508: pj = bj + bi[i];
3509: nzl = bi[i + 1] - bi[i];
3510: for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3512: /* diagonal: invert diagonal entries for simpler triangular solves */
3513: if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3514: b->a[bdiag[i]] = 1.0 / rtmp[i];
3516: /* U-part */
3517: pv = b->a + bdiag[i + 1] + 1;
3518: pj = bj + bdiag[i + 1] + 1;
3519: nzu = bdiag[i] - bdiag[i + 1] - 1;
3520: for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3521: }
3523: PetscCall(PetscFree(rtmp));
3524: PetscCall(ISRestoreIndices(isicol, &ic));
3525: PetscCall(ISRestoreIndices(isrow, &r));
3527: PetscCall(ISIdentity(isrow, &row_identity));
3528: PetscCall(ISIdentity(isicol, &col_identity));
3529: if (row_identity && col_identity) {
3530: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3531: } else {
3532: C->ops->solve = MatSolve_SeqAIJ;
3533: }
3534: C->ops->solveadd = NULL;
3535: C->ops->solvetranspose = NULL;
3536: C->ops->solvetransposeadd = NULL;
3537: C->ops->matsolve = NULL;
3538: C->assembled = PETSC_TRUE;
3539: C->preallocated = PETSC_TRUE;
3541: PetscCall(PetscLogFlops(C->cmap->n));
3542: PetscFunctionReturn(PETSC_SUCCESS);
3543: }
3544: #endif