Actual source code: sbaijfact.c
2: #include <../src/mat/impls/baij/seq/baij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petsc/private/kernels/blockinvert.h>
5: #include <petscis.h>
7: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
8: {
9: Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
10: MatScalar *dd = fact->a;
11: PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
13: PetscFunctionBegin;
14: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
16: nneg_tmp = 0;
17: npos_tmp = 0;
18: for (i = 0; i < mbs; i++) {
19: if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20: else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21: fi++;
22: }
23: if (nneg) *nneg = nneg_tmp;
24: if (npos) *npos = npos_tmp;
25: if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
26: PetscFunctionReturn(PETSC_SUCCESS);
27: }
29: /*
30: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
31: Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
32: */
33: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
34: {
35: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b;
36: const PetscInt *rip, *ai, *aj;
37: PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
38: PetscInt m, reallocs = 0, prow;
39: PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
40: PetscReal f = info->fill;
41: PetscBool perm_identity;
43: PetscFunctionBegin;
44: /* check whether perm is the identity mapping */
45: PetscCall(ISIdentity(perm, &perm_identity));
46: PetscCall(ISGetIndices(perm, &rip));
48: if (perm_identity) { /* without permutation */
49: a->permute = PETSC_FALSE;
51: ai = a->i;
52: aj = a->j;
53: } else { /* non-trivial permutation */
54: a->permute = PETSC_TRUE;
56: PetscCall(MatReorderingSeqSBAIJ(A, perm));
58: ai = a->inew;
59: aj = a->jnew;
60: }
62: /* initialization */
63: PetscCall(PetscMalloc1(mbs + 1, &iu));
64: umax = (PetscInt)(f * ai[mbs] + 1);
65: umax += mbs + 1;
66: PetscCall(PetscMalloc1(umax, &ju));
67: iu[0] = mbs + 1;
68: juidx = mbs + 1; /* index for ju */
69: /* jl linked list for pivot row -- linked list for col index */
70: PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
71: for (i = 0; i < mbs; i++) {
72: jl[i] = mbs;
73: q[i] = 0;
74: }
76: /* for each row k */
77: for (k = 0; k < mbs; k++) {
78: for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
79: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
80: q[k] = mbs;
81: /* initialize nonzero structure of k-th row to row rip[k] of A */
82: jmin = ai[rip[k]] + 1; /* exclude diag[k] */
83: jmax = ai[rip[k] + 1];
84: for (j = jmin; j < jmax; j++) {
85: vj = rip[aj[j]]; /* col. value */
86: if (vj > k) {
87: qm = k;
88: do {
89: m = qm;
90: qm = q[m];
91: } while (qm < vj);
92: PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
93: nzk++;
94: q[m] = vj;
95: q[vj] = qm;
96: } /* if (vj > k) */
97: } /* for (j=jmin; j<jmax; j++) */
99: /* modify nonzero structure of k-th row by computing fill-in
100: for each row i to be merged in */
101: prow = k;
102: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
104: while (prow < k) {
105: /* merge row prow into k-th row */
106: jmin = iu[prow] + 1;
107: jmax = iu[prow + 1];
108: qm = k;
109: for (j = jmin; j < jmax; j++) {
110: vj = ju[j];
111: do {
112: m = qm;
113: qm = q[m];
114: } while (qm < vj);
115: if (qm != vj) {
116: nzk++;
117: q[m] = vj;
118: q[vj] = qm;
119: qm = vj;
120: }
121: }
122: prow = jl[prow]; /* next pivot row */
123: }
125: /* add k to row list for first nonzero element in k-th row */
126: if (nzk > 0) {
127: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
128: jl[k] = jl[i];
129: jl[i] = k;
130: }
131: iu[k + 1] = iu[k] + nzk;
133: /* allocate more space to ju if needed */
134: if (iu[k + 1] > umax) {
135: /* estimate how much additional space we will need */
136: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
137: /* just double the memory each time */
138: maxadd = umax;
139: if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
140: umax += maxadd;
142: /* allocate a longer ju */
143: PetscCall(PetscMalloc1(umax, &jutmp));
144: PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
145: PetscCall(PetscFree(ju));
146: ju = jutmp;
147: reallocs++; /* count how many times we realloc */
148: }
150: /* save nonzero structure of k-th row in ju */
151: i = k;
152: while (nzk--) {
153: i = q[i];
154: ju[juidx++] = i;
155: }
156: }
158: #if defined(PETSC_USE_INFO)
159: if (ai[mbs] != 0) {
160: PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
161: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
162: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
163: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
164: PetscCall(PetscInfo(A, "for best performance.\n"));
165: } else {
166: PetscCall(PetscInfo(A, "Empty matrix\n"));
167: }
168: #endif
170: PetscCall(ISRestoreIndices(perm, &rip));
171: PetscCall(PetscFree2(jl, q));
173: /* put together the new matrix */
174: PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
176: b = (Mat_SeqSBAIJ *)(F)->data;
177: b->singlemalloc = PETSC_FALSE;
178: b->free_a = PETSC_TRUE;
179: b->free_ij = PETSC_TRUE;
181: PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
182: b->j = ju;
183: b->i = iu;
184: b->diag = NULL;
185: b->ilen = NULL;
186: b->imax = NULL;
187: b->row = perm;
189: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
191: PetscCall(PetscObjectReference((PetscObject)perm));
193: b->icol = perm;
194: PetscCall(PetscObjectReference((PetscObject)perm));
195: PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
196: /* In b structure: Free imax, ilen, old a, old j.
197: Allocate idnew, solve_work, new a, new j */
198: b->maxnz = b->nz = iu[mbs];
200: (F)->info.factor_mallocs = reallocs;
201: (F)->info.fill_ratio_given = f;
202: if (ai[mbs] != 0) {
203: (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
204: } else {
205: (F)->info.fill_ratio_needed = 0.0;
206: }
207: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
208: PetscFunctionReturn(PETSC_SUCCESS);
209: }
210: /*
211: Symbolic U^T*D*U factorization for SBAIJ format.
212: See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
213: */
214: #include <petscbt.h>
215: #include <../src/mat/utils/freespace.h>
216: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
217: {
218: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
219: Mat_SeqSBAIJ *b;
220: PetscBool perm_identity, missing;
221: PetscReal fill = info->fill;
222: const PetscInt *rip, *ai = a->i, *aj = a->j;
223: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
224: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
225: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
226: PetscFreeSpaceList free_space = NULL, current_space = NULL;
227: PetscBT lnkbt;
229: PetscFunctionBegin;
230: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
231: PetscCall(MatMissingDiagonal(A, &missing, &i));
232: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
233: if (bs > 1) {
234: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /* check whether perm is the identity mapping */
239: PetscCall(ISIdentity(perm, &perm_identity));
240: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
241: a->permute = PETSC_FALSE;
242: PetscCall(ISGetIndices(perm, &rip));
244: /* initialization */
245: PetscCall(PetscMalloc1(mbs + 1, &ui));
246: PetscCall(PetscMalloc1(mbs + 1, &udiag));
247: ui[0] = 0;
249: /* jl: linked list for storing indices of the pivot rows
250: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
251: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
252: for (i = 0; i < mbs; i++) {
253: jl[i] = mbs;
254: il[i] = 0;
255: }
257: /* create and initialize a linked list for storing column indices of the active row k */
258: nlnk = mbs + 1;
259: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
261: /* initial FreeSpace size is fill*(ai[mbs]+1) */
262: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
263: current_space = free_space;
265: for (k = 0; k < mbs; k++) { /* for each active row k */
266: /* initialize lnk by the column indices of row rip[k] of A */
267: nzk = 0;
268: ncols = ai[k + 1] - ai[k];
269: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
270: for (j = 0; j < ncols; j++) {
271: i = *(aj + ai[k] + j);
272: cols[j] = i;
273: }
274: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
275: nzk += nlnk;
277: /* update lnk by computing fill-in for each pivot row to be merged in */
278: prow = jl[k]; /* 1st pivot row */
280: while (prow < k) {
281: nextprow = jl[prow];
282: /* merge prow into k-th row */
283: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
284: jmax = ui[prow + 1];
285: ncols = jmax - jmin;
286: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
287: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
288: nzk += nlnk;
290: /* update il and jl for prow */
291: if (jmin < jmax) {
292: il[prow] = jmin;
293: j = *uj_ptr;
294: jl[prow] = jl[j];
295: jl[j] = prow;
296: }
297: prow = nextprow;
298: }
300: /* if free space is not available, make more free space */
301: if (current_space->local_remaining < nzk) {
302: i = mbs - k + 1; /* num of unfactored rows */
303: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
304: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
305: reallocs++;
306: }
308: /* copy data into free space, then initialize lnk */
309: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
311: /* add the k-th row into il and jl */
312: if (nzk > 1) {
313: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
314: jl[k] = jl[i];
315: jl[i] = k;
316: il[k] = ui[k] + 1;
317: }
318: ui_ptr[k] = current_space->array;
320: current_space->array += nzk;
321: current_space->local_used += nzk;
322: current_space->local_remaining -= nzk;
324: ui[k + 1] = ui[k] + nzk;
325: }
327: PetscCall(ISRestoreIndices(perm, &rip));
328: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
330: /* destroy list of free space and other temporary array(s) */
331: PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
332: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
333: PetscCall(PetscLLDestroy(lnk, lnkbt));
335: /* put together the new matrix in MATSEQSBAIJ format */
336: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
338: b = (Mat_SeqSBAIJ *)fact->data;
339: b->singlemalloc = PETSC_FALSE;
340: b->free_a = PETSC_TRUE;
341: b->free_ij = PETSC_TRUE;
343: PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
345: b->j = uj;
346: b->i = ui;
347: b->diag = udiag;
348: b->free_diag = PETSC_TRUE;
349: b->ilen = NULL;
350: b->imax = NULL;
351: b->row = perm;
352: b->icol = perm;
354: PetscCall(PetscObjectReference((PetscObject)perm));
355: PetscCall(PetscObjectReference((PetscObject)perm));
357: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
359: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
361: b->maxnz = b->nz = ui[mbs];
363: fact->info.factor_mallocs = reallocs;
364: fact->info.fill_ratio_given = fill;
365: if (ai[mbs] != 0) {
366: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
367: } else {
368: fact->info.fill_ratio_needed = 0.0;
369: }
370: #if defined(PETSC_USE_INFO)
371: if (ai[mbs] != 0) {
372: PetscReal af = fact->info.fill_ratio_needed;
373: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
374: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
375: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
376: } else {
377: PetscCall(PetscInfo(A, "Empty matrix\n"));
378: }
379: #endif
380: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
381: PetscFunctionReturn(PETSC_SUCCESS);
382: }
384: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
385: {
386: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
387: Mat_SeqSBAIJ *b;
388: PetscBool perm_identity, missing;
389: PetscReal fill = info->fill;
390: const PetscInt *rip, *ai, *aj;
391: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
392: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
393: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
394: PetscFreeSpaceList free_space = NULL, current_space = NULL;
395: PetscBT lnkbt;
397: PetscFunctionBegin;
398: PetscCall(MatMissingDiagonal(A, &missing, &d));
399: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
401: /*
402: This code originally uses Modified Sparse Row (MSR) storage
403: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
404: Then it is rewritten so the factor B takes seqsbaij format. However the associated
405: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
406: thus the original code in MSR format is still used for these cases.
407: The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
408: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
409: */
410: if (bs > 1) {
411: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
412: PetscFunctionReturn(PETSC_SUCCESS);
413: }
415: /* check whether perm is the identity mapping */
416: PetscCall(ISIdentity(perm, &perm_identity));
417: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
418: a->permute = PETSC_FALSE;
419: ai = a->i;
420: aj = a->j;
421: PetscCall(ISGetIndices(perm, &rip));
423: /* initialization */
424: PetscCall(PetscMalloc1(mbs + 1, &ui));
425: ui[0] = 0;
427: /* jl: linked list for storing indices of the pivot rows
428: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
429: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
430: for (i = 0; i < mbs; i++) {
431: jl[i] = mbs;
432: il[i] = 0;
433: }
435: /* create and initialize a linked list for storing column indices of the active row k */
436: nlnk = mbs + 1;
437: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
439: /* initial FreeSpace size is fill*(ai[mbs]+1) */
440: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
441: current_space = free_space;
443: for (k = 0; k < mbs; k++) { /* for each active row k */
444: /* initialize lnk by the column indices of row rip[k] of A */
445: nzk = 0;
446: ncols = ai[rip[k] + 1] - ai[rip[k]];
447: for (j = 0; j < ncols; j++) {
448: i = *(aj + ai[rip[k]] + j);
449: cols[j] = rip[i];
450: }
451: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
452: nzk += nlnk;
454: /* update lnk by computing fill-in for each pivot row to be merged in */
455: prow = jl[k]; /* 1st pivot row */
457: while (prow < k) {
458: nextprow = jl[prow];
459: /* merge prow into k-th row */
460: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
461: jmax = ui[prow + 1];
462: ncols = jmax - jmin;
463: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
464: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
465: nzk += nlnk;
467: /* update il and jl for prow */
468: if (jmin < jmax) {
469: il[prow] = jmin;
471: j = *uj_ptr;
472: jl[prow] = jl[j];
473: jl[j] = prow;
474: }
475: prow = nextprow;
476: }
478: /* if free space is not available, make more free space */
479: if (current_space->local_remaining < nzk) {
480: i = mbs - k + 1; /* num of unfactored rows */
481: i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
482: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
483: reallocs++;
484: }
486: /* copy data into free space, then initialize lnk */
487: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
489: /* add the k-th row into il and jl */
490: if (nzk - 1 > 0) {
491: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
492: jl[k] = jl[i];
493: jl[i] = k;
494: il[k] = ui[k] + 1;
495: }
496: ui_ptr[k] = current_space->array;
498: current_space->array += nzk;
499: current_space->local_used += nzk;
500: current_space->local_remaining -= nzk;
502: ui[k + 1] = ui[k] + nzk;
503: }
505: PetscCall(ISRestoreIndices(perm, &rip));
506: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
508: /* destroy list of free space and other temporary array(s) */
509: PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
510: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
511: PetscCall(PetscLLDestroy(lnk, lnkbt));
513: /* put together the new matrix in MATSEQSBAIJ format */
514: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
516: b = (Mat_SeqSBAIJ *)fact->data;
517: b->singlemalloc = PETSC_FALSE;
518: b->free_a = PETSC_TRUE;
519: b->free_ij = PETSC_TRUE;
521: PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
523: b->j = uj;
524: b->i = ui;
525: b->diag = NULL;
526: b->ilen = NULL;
527: b->imax = NULL;
528: b->row = perm;
530: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
532: PetscCall(PetscObjectReference((PetscObject)perm));
533: b->icol = perm;
534: PetscCall(PetscObjectReference((PetscObject)perm));
535: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
536: b->maxnz = b->nz = ui[mbs];
538: fact->info.factor_mallocs = reallocs;
539: fact->info.fill_ratio_given = fill;
540: if (ai[mbs] != 0) {
541: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
542: } else {
543: fact->info.fill_ratio_needed = 0.0;
544: }
545: #if defined(PETSC_USE_INFO)
546: if (ai[mbs] != 0) {
547: PetscReal af = fact->info.fill_ratio_needed;
548: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
549: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
550: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
551: } else {
552: PetscCall(PetscInfo(A, "Empty matrix\n"));
553: }
554: #endif
555: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
556: PetscFunctionReturn(PETSC_SUCCESS);
557: }
559: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
560: {
561: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
562: IS perm = b->row;
563: const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
564: PetscInt i, j;
565: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
566: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
567: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
568: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
569: MatScalar *work;
570: PetscInt *pivots;
571: PetscBool allowzeropivot, zeropivotdetected;
573: PetscFunctionBegin;
574: /* initialization */
575: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
576: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
577: allowzeropivot = PetscNot(A->erroriffailure);
579: il[0] = 0;
580: for (i = 0; i < mbs; i++) jl[i] = mbs;
582: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
583: PetscCall(PetscMalloc1(bs, &pivots));
585: PetscCall(ISGetIndices(perm, &perm_ptr));
587: /* check permutation */
588: if (!a->permute) {
589: ai = a->i;
590: aj = a->j;
591: aa = a->a;
592: } else {
593: ai = a->inew;
594: aj = a->jnew;
595: PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
596: PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
597: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
598: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
600: for (i = 0; i < mbs; i++) {
601: jmin = ai[i];
602: jmax = ai[i + 1];
603: for (j = jmin; j < jmax; j++) {
604: while (a2anew[j] != j) {
605: k = a2anew[j];
606: a2anew[j] = a2anew[k];
607: a2anew[k] = k;
608: for (k1 = 0; k1 < bs2; k1++) {
609: dk[k1] = aa[k * bs2 + k1];
610: aa[k * bs2 + k1] = aa[j * bs2 + k1];
611: aa[j * bs2 + k1] = dk[k1];
612: }
613: }
614: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
615: if (i > aj[j]) {
616: ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */
617: for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
618: for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */
619: for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
620: }
621: }
622: }
623: }
624: PetscCall(PetscFree(a2anew));
625: }
627: /* for each row k */
628: for (k = 0; k < mbs; k++) {
629: /*initialize k-th row with elements nonzero in row perm(k) of A */
630: jmin = ai[perm_ptr[k]];
631: jmax = ai[perm_ptr[k] + 1];
633: ap = aa + jmin * bs2;
634: for (j = jmin; j < jmax; j++) {
635: vj = perm_ptr[aj[j]]; /* block col. index */
636: rtmp_ptr = rtmp + vj * bs2;
637: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
638: }
640: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
641: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
642: i = jl[k]; /* first row to be added to k_th row */
644: while (i < k) {
645: nexti = jl[i]; /* next row to be added to k_th row */
647: /* compute multiplier */
648: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
650: /* uik = -inv(Di)*U_bar(i,k) */
651: diag = ba + i * bs2;
652: u = ba + ili * bs2;
653: PetscCall(PetscArrayzero(uik, bs2));
654: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
656: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
657: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
658: PetscCall(PetscLogFlops(4.0 * bs * bs2));
660: /* update -U(i,k) */
661: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
663: /* add multiple of row i to k-th row ... */
664: jmin = ili + 1;
665: jmax = bi[i + 1];
666: if (jmin < jmax) {
667: for (j = jmin; j < jmax; j++) {
668: /* rtmp += -U(i,k)^T * U_bar(i,j) */
669: rtmp_ptr = rtmp + bj[j] * bs2;
670: u = ba + j * bs2;
671: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
672: }
673: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
675: /* ... add i to row list for next nonzero entry */
676: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
677: j = bj[jmin];
678: jl[i] = jl[j];
679: jl[j] = i; /* update jl */
680: }
681: i = nexti;
682: }
684: /* save nonzero entries in k-th row of U ... */
686: /* invert diagonal block */
687: diag = ba + k * bs2;
688: PetscCall(PetscArraycpy(diag, dk, bs2));
690: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
691: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
693: jmin = bi[k];
694: jmax = bi[k + 1];
695: if (jmin < jmax) {
696: for (j = jmin; j < jmax; j++) {
697: vj = bj[j]; /* block col. index of U */
698: u = ba + j * bs2;
699: rtmp_ptr = rtmp + vj * bs2;
700: for (k1 = 0; k1 < bs2; k1++) {
701: *u++ = *rtmp_ptr;
702: *rtmp_ptr++ = 0.0;
703: }
704: }
706: /* ... add k to row list for first nonzero entry in k-th row */
707: il[k] = jmin;
708: i = bj[jmin];
709: jl[k] = jl[i];
710: jl[i] = k;
711: }
712: }
714: PetscCall(PetscFree(rtmp));
715: PetscCall(PetscFree2(il, jl));
716: PetscCall(PetscFree3(dk, uik, work));
717: PetscCall(PetscFree(pivots));
718: if (a->permute) PetscCall(PetscFree(aa));
720: PetscCall(ISRestoreIndices(perm, &perm_ptr));
722: C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
723: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
724: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
725: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
727: C->assembled = PETSC_TRUE;
728: C->preallocated = PETSC_TRUE;
730: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
731: PetscFunctionReturn(PETSC_SUCCESS);
732: }
734: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
735: {
736: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
737: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
738: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
739: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
740: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
741: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
742: MatScalar *work;
743: PetscInt *pivots;
744: PetscBool allowzeropivot, zeropivotdetected;
746: PetscFunctionBegin;
747: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
748: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
749: il[0] = 0;
750: for (i = 0; i < mbs; i++) jl[i] = mbs;
752: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
753: PetscCall(PetscMalloc1(bs, &pivots));
754: allowzeropivot = PetscNot(A->erroriffailure);
756: ai = a->i;
757: aj = a->j;
758: aa = a->a;
760: /* for each row k */
761: for (k = 0; k < mbs; k++) {
762: /*initialize k-th row with elements nonzero in row k of A */
763: jmin = ai[k];
764: jmax = ai[k + 1];
765: ap = aa + jmin * bs2;
766: for (j = jmin; j < jmax; j++) {
767: vj = aj[j]; /* block col. index */
768: rtmp_ptr = rtmp + vj * bs2;
769: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
770: }
772: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
773: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
774: i = jl[k]; /* first row to be added to k_th row */
776: while (i < k) {
777: nexti = jl[i]; /* next row to be added to k_th row */
779: /* compute multiplier */
780: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
782: /* uik = -inv(Di)*U_bar(i,k) */
783: diag = ba + i * bs2;
784: u = ba + ili * bs2;
785: PetscCall(PetscArrayzero(uik, bs2));
786: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
788: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
789: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
790: PetscCall(PetscLogFlops(2.0 * bs * bs2));
792: /* update -U(i,k) */
793: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
795: /* add multiple of row i to k-th row ... */
796: jmin = ili + 1;
797: jmax = bi[i + 1];
798: if (jmin < jmax) {
799: for (j = jmin; j < jmax; j++) {
800: /* rtmp += -U(i,k)^T * U_bar(i,j) */
801: rtmp_ptr = rtmp + bj[j] * bs2;
802: u = ba + j * bs2;
803: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
804: }
805: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
807: /* ... add i to row list for next nonzero entry */
808: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
809: j = bj[jmin];
810: jl[i] = jl[j];
811: jl[j] = i; /* update jl */
812: }
813: i = nexti;
814: }
816: /* save nonzero entries in k-th row of U ... */
818: /* invert diagonal block */
819: diag = ba + k * bs2;
820: PetscCall(PetscArraycpy(diag, dk, bs2));
822: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
823: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
825: jmin = bi[k];
826: jmax = bi[k + 1];
827: if (jmin < jmax) {
828: for (j = jmin; j < jmax; j++) {
829: vj = bj[j]; /* block col. index of U */
830: u = ba + j * bs2;
831: rtmp_ptr = rtmp + vj * bs2;
832: for (k1 = 0; k1 < bs2; k1++) {
833: *u++ = *rtmp_ptr;
834: *rtmp_ptr++ = 0.0;
835: }
836: }
838: /* ... add k to row list for first nonzero entry in k-th row */
839: il[k] = jmin;
840: i = bj[jmin];
841: jl[k] = jl[i];
842: jl[i] = k;
843: }
844: }
846: PetscCall(PetscFree(rtmp));
847: PetscCall(PetscFree2(il, jl));
848: PetscCall(PetscFree3(dk, uik, work));
849: PetscCall(PetscFree(pivots));
851: C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
855: C->assembled = PETSC_TRUE;
856: C->preallocated = PETSC_TRUE;
858: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
859: PetscFunctionReturn(PETSC_SUCCESS);
860: }
862: /*
863: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
864: Version for blocks 2 by 2.
865: */
866: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
867: {
868: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
869: IS perm = b->row;
870: const PetscInt *ai, *aj, *perm_ptr;
871: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
872: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
873: MatScalar *ba = b->a, *aa, *ap;
874: MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
875: PetscReal shift = info->shiftamount;
876: PetscBool allowzeropivot, zeropivotdetected;
878: PetscFunctionBegin;
879: allowzeropivot = PetscNot(A->erroriffailure);
881: /* initialization */
882: /* il and jl record the first nonzero element in each row of the accessing
883: window U(0:k, k:mbs-1).
884: jl: list of rows to be added to uneliminated rows
885: i>= k: jl(i) is the first row to be added to row i
886: i< k: jl(i) is the row following row i in some list of rows
887: jl(i) = mbs indicates the end of a list
888: il(i): points to the first nonzero element in columns k,...,mbs-1 of
889: row i of U */
890: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
891: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
892: il[0] = 0;
893: for (i = 0; i < mbs; i++) jl[i] = mbs;
895: PetscCall(ISGetIndices(perm, &perm_ptr));
897: /* check permutation */
898: if (!a->permute) {
899: ai = a->i;
900: aj = a->j;
901: aa = a->a;
902: } else {
903: ai = a->inew;
904: aj = a->jnew;
905: PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
906: PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
907: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
908: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
910: for (i = 0; i < mbs; i++) {
911: jmin = ai[i];
912: jmax = ai[i + 1];
913: for (j = jmin; j < jmax; j++) {
914: while (a2anew[j] != j) {
915: k = a2anew[j];
916: a2anew[j] = a2anew[k];
917: a2anew[k] = k;
918: for (k1 = 0; k1 < 4; k1++) {
919: dk[k1] = aa[k * 4 + k1];
920: aa[k * 4 + k1] = aa[j * 4 + k1];
921: aa[j * 4 + k1] = dk[k1];
922: }
923: }
924: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
925: if (i > aj[j]) {
926: ap = aa + j * 4; /* ptr to the beginning of the block */
927: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
928: ap[1] = ap[2];
929: ap[2] = dk[1];
930: }
931: }
932: }
933: PetscCall(PetscFree(a2anew));
934: }
936: /* for each row k */
937: for (k = 0; k < mbs; k++) {
938: /*initialize k-th row with elements nonzero in row perm(k) of A */
939: jmin = ai[perm_ptr[k]];
940: jmax = ai[perm_ptr[k] + 1];
941: ap = aa + jmin * 4;
942: for (j = jmin; j < jmax; j++) {
943: vj = perm_ptr[aj[j]]; /* block col. index */
944: rtmp_ptr = rtmp + vj * 4;
945: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
946: }
948: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
949: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
950: i = jl[k]; /* first row to be added to k_th row */
952: while (i < k) {
953: nexti = jl[i]; /* next row to be added to k_th row */
955: /* compute multiplier */
956: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
958: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
959: diag = ba + i * 4;
960: u = ba + ili * 4;
961: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
962: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
963: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
964: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
966: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
967: dk[0] += uik[0] * u[0] + uik[1] * u[1];
968: dk[1] += uik[2] * u[0] + uik[3] * u[1];
969: dk[2] += uik[0] * u[2] + uik[1] * u[3];
970: dk[3] += uik[2] * u[2] + uik[3] * u[3];
972: PetscCall(PetscLogFlops(16.0 * 2.0));
974: /* update -U(i,k): ba[ili] = uik */
975: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
977: /* add multiple of row i to k-th row ... */
978: jmin = ili + 1;
979: jmax = bi[i + 1];
980: if (jmin < jmax) {
981: for (j = jmin; j < jmax; j++) {
982: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983: rtmp_ptr = rtmp + bj[j] * 4;
984: u = ba + j * 4;
985: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
986: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
987: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
988: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
989: }
990: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
992: /* ... add i to row list for next nonzero entry */
993: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
994: j = bj[jmin];
995: jl[i] = jl[j];
996: jl[j] = i; /* update jl */
997: }
998: i = nexti;
999: }
1001: /* save nonzero entries in k-th row of U ... */
1003: /* invert diagonal block */
1004: diag = ba + k * 4;
1005: PetscCall(PetscArraycpy(diag, dk, 4));
1006: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1007: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1009: jmin = bi[k];
1010: jmax = bi[k + 1];
1011: if (jmin < jmax) {
1012: for (j = jmin; j < jmax; j++) {
1013: vj = bj[j]; /* block col. index of U */
1014: u = ba + j * 4;
1015: rtmp_ptr = rtmp + vj * 4;
1016: for (k1 = 0; k1 < 4; k1++) {
1017: *u++ = *rtmp_ptr;
1018: *rtmp_ptr++ = 0.0;
1019: }
1020: }
1022: /* ... add k to row list for first nonzero entry in k-th row */
1023: il[k] = jmin;
1024: i = bj[jmin];
1025: jl[k] = jl[i];
1026: jl[i] = k;
1027: }
1028: }
1030: PetscCall(PetscFree(rtmp));
1031: PetscCall(PetscFree2(il, jl));
1032: if (a->permute) PetscCall(PetscFree(aa));
1033: PetscCall(ISRestoreIndices(perm, &perm_ptr));
1035: C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1036: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1037: C->assembled = PETSC_TRUE;
1038: C->preallocated = PETSC_TRUE;
1040: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1041: PetscFunctionReturn(PETSC_SUCCESS);
1042: }
1044: /*
1045: Version for when blocks are 2 by 2 Using natural ordering
1046: */
1047: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1048: {
1049: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1050: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1051: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1052: MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8];
1053: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
1054: PetscReal shift = info->shiftamount;
1055: PetscBool allowzeropivot, zeropivotdetected;
1057: PetscFunctionBegin;
1058: allowzeropivot = PetscNot(A->erroriffailure);
1060: /* initialization */
1061: /* il and jl record the first nonzero element in each row of the accessing
1062: window U(0:k, k:mbs-1).
1063: jl: list of rows to be added to uneliminated rows
1064: i>= k: jl(i) is the first row to be added to row i
1065: i< k: jl(i) is the row following row i in some list of rows
1066: jl(i) = mbs indicates the end of a list
1067: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1068: row i of U */
1069: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1070: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1071: il[0] = 0;
1072: for (i = 0; i < mbs; i++) jl[i] = mbs;
1074: ai = a->i;
1075: aj = a->j;
1076: aa = a->a;
1078: /* for each row k */
1079: for (k = 0; k < mbs; k++) {
1080: /*initialize k-th row with elements nonzero in row k of A */
1081: jmin = ai[k];
1082: jmax = ai[k + 1];
1083: ap = aa + jmin * 4;
1084: for (j = jmin; j < jmax; j++) {
1085: vj = aj[j]; /* block col. index */
1086: rtmp_ptr = rtmp + vj * 4;
1087: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1088: }
1090: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1091: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1092: i = jl[k]; /* first row to be added to k_th row */
1094: while (i < k) {
1095: nexti = jl[i]; /* next row to be added to k_th row */
1097: /* compute multiplier */
1098: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1100: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1101: diag = ba + i * 4;
1102: u = ba + ili * 4;
1103: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1104: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1105: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1106: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1108: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1109: dk[0] += uik[0] * u[0] + uik[1] * u[1];
1110: dk[1] += uik[2] * u[0] + uik[3] * u[1];
1111: dk[2] += uik[0] * u[2] + uik[1] * u[3];
1112: dk[3] += uik[2] * u[2] + uik[3] * u[3];
1114: PetscCall(PetscLogFlops(16.0 * 2.0));
1116: /* update -U(i,k): ba[ili] = uik */
1117: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1119: /* add multiple of row i to k-th row ... */
1120: jmin = ili + 1;
1121: jmax = bi[i + 1];
1122: if (jmin < jmax) {
1123: for (j = jmin; j < jmax; j++) {
1124: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1125: rtmp_ptr = rtmp + bj[j] * 4;
1126: u = ba + j * 4;
1127: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1128: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1129: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1130: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1131: }
1132: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1134: /* ... add i to row list for next nonzero entry */
1135: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1136: j = bj[jmin];
1137: jl[i] = jl[j];
1138: jl[j] = i; /* update jl */
1139: }
1140: i = nexti;
1141: }
1143: /* save nonzero entries in k-th row of U ... */
1145: /* invert diagonal block */
1146: diag = ba + k * 4;
1147: PetscCall(PetscArraycpy(diag, dk, 4));
1148: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1149: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1151: jmin = bi[k];
1152: jmax = bi[k + 1];
1153: if (jmin < jmax) {
1154: for (j = jmin; j < jmax; j++) {
1155: vj = bj[j]; /* block col. index of U */
1156: u = ba + j * 4;
1157: rtmp_ptr = rtmp + vj * 4;
1158: for (k1 = 0; k1 < 4; k1++) {
1159: *u++ = *rtmp_ptr;
1160: *rtmp_ptr++ = 0.0;
1161: }
1162: }
1164: /* ... add k to row list for first nonzero entry in k-th row */
1165: il[k] = jmin;
1166: i = bj[jmin];
1167: jl[k] = jl[i];
1168: jl[i] = k;
1169: }
1170: }
1172: PetscCall(PetscFree(rtmp));
1173: PetscCall(PetscFree2(il, jl));
1175: C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1179: C->assembled = PETSC_TRUE;
1180: C->preallocated = PETSC_TRUE;
1182: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1183: PetscFunctionReturn(PETSC_SUCCESS);
1184: }
1186: /*
1187: Numeric U^T*D*U factorization for SBAIJ format.
1188: Version for blocks are 1 by 1.
1189: */
1190: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1191: {
1192: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1193: IS ip = b->row;
1194: const PetscInt *ai, *aj, *rip;
1195: PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1196: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1197: MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1198: PetscReal rs;
1199: FactorShiftCtx sctx;
1201: PetscFunctionBegin;
1202: /* MatPivotSetUp(): initialize shift context sctx */
1203: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1205: PetscCall(ISGetIndices(ip, &rip));
1206: if (!a->permute) {
1207: ai = a->i;
1208: aj = a->j;
1209: aa = a->a;
1210: } else {
1211: ai = a->inew;
1212: aj = a->jnew;
1213: nz = ai[mbs];
1214: PetscCall(PetscMalloc1(nz, &aa));
1215: a2anew = a->a2anew;
1216: bval = a->a;
1217: for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1218: }
1220: /* initialization */
1221: /* il and jl record the first nonzero element in each row of the accessing
1222: window U(0:k, k:mbs-1).
1223: jl: list of rows to be added to uneliminated rows
1224: i>= k: jl(i) is the first row to be added to row i
1225: i< k: jl(i) is the row following row i in some list of rows
1226: jl(i) = mbs indicates the end of a list
1227: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1228: row i of U */
1229: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1231: do {
1232: sctx.newshift = PETSC_FALSE;
1233: il[0] = 0;
1234: for (i = 0; i < mbs; i++) {
1235: rtmp[i] = 0.0;
1236: jl[i] = mbs;
1237: }
1239: for (k = 0; k < mbs; k++) {
1240: /*initialize k-th row by the perm[k]-th row of A */
1241: jmin = ai[rip[k]];
1242: jmax = ai[rip[k] + 1];
1243: bval = ba + bi[k];
1244: for (j = jmin; j < jmax; j++) {
1245: col = rip[aj[j]];
1246: rtmp[col] = aa[j];
1247: *bval++ = 0.0; /* for in-place factorization */
1248: }
1250: /* shift the diagonal of the matrix */
1251: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1253: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1254: dk = rtmp[k];
1255: i = jl[k]; /* first row to be added to k_th row */
1257: while (i < k) {
1258: nexti = jl[i]; /* next row to be added to k_th row */
1260: /* compute multiplier, update diag(k) and U(i,k) */
1261: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1262: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1263: dk += uikdi * ba[ili];
1264: ba[ili] = uikdi; /* -U(i,k) */
1266: /* add multiple of row i to k-th row */
1267: jmin = ili + 1;
1268: jmax = bi[i + 1];
1269: if (jmin < jmax) {
1270: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1271: PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1273: /* update il and jl for row i */
1274: il[i] = jmin;
1275: j = bj[jmin];
1276: jl[i] = jl[j];
1277: jl[j] = i;
1278: }
1279: i = nexti;
1280: }
1282: /* shift the diagonals when zero pivot is detected */
1283: /* compute rs=sum of abs(off-diagonal) */
1284: rs = 0.0;
1285: jmin = bi[k] + 1;
1286: nz = bi[k + 1] - jmin;
1287: if (nz) {
1288: bcol = bj + jmin;
1289: while (nz--) {
1290: rs += PetscAbsScalar(rtmp[*bcol]);
1291: bcol++;
1292: }
1293: }
1295: sctx.rs = rs;
1296: sctx.pv = dk;
1297: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1298: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1299: dk = sctx.pv;
1301: /* copy data into U(k,:) */
1302: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1303: jmin = bi[k] + 1;
1304: jmax = bi[k + 1];
1305: if (jmin < jmax) {
1306: for (j = jmin; j < jmax; j++) {
1307: col = bj[j];
1308: ba[j] = rtmp[col];
1309: rtmp[col] = 0.0;
1310: }
1311: /* add the k-th row into il and jl */
1312: il[k] = jmin;
1313: i = bj[jmin];
1314: jl[k] = jl[i];
1315: jl[i] = k;
1316: }
1317: }
1318: } while (sctx.newshift);
1319: PetscCall(PetscFree3(rtmp, il, jl));
1320: if (a->permute) PetscCall(PetscFree(aa));
1322: PetscCall(ISRestoreIndices(ip, &rip));
1324: C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1325: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1326: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1327: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1328: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1329: C->assembled = PETSC_TRUE;
1330: C->preallocated = PETSC_TRUE;
1332: PetscCall(PetscLogFlops(C->rmap->N));
1333: if (sctx.nshift) {
1334: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1335: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1336: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1337: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1338: }
1339: }
1340: PetscFunctionReturn(PETSC_SUCCESS);
1341: }
1343: /*
1344: Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1345: Modified from MatCholeskyFactorNumeric_SeqAIJ()
1346: */
1347: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1348: {
1349: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1350: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data;
1351: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1352: PetscInt *ai = a->i, *aj = a->j, *ajtmp;
1353: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1354: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1355: FactorShiftCtx sctx;
1356: PetscReal rs;
1357: MatScalar d, *v;
1359: PetscFunctionBegin;
1360: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1362: /* MatPivotSetUp(): initialize shift context sctx */
1363: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1365: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1366: sctx.shift_top = info->zeropivot;
1368: PetscCall(PetscArrayzero(rtmp, mbs));
1370: for (i = 0; i < mbs; i++) {
1371: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1372: d = (aa)[a->diag[i]];
1373: rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1374: ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1375: v = aa + ai[i] + 1;
1376: nz = ai[i + 1] - ai[i] - 1;
1377: for (j = 0; j < nz; j++) {
1378: rtmp[i] += PetscAbsScalar(v[j]);
1379: rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1380: }
1381: if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1382: }
1383: sctx.shift_top *= 1.1;
1384: sctx.nshift_max = 5;
1385: sctx.shift_lo = 0.;
1386: sctx.shift_hi = 1.;
1387: }
1389: /* allocate working arrays
1390: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1391: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1392: */
1393: do {
1394: sctx.newshift = PETSC_FALSE;
1396: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1397: if (mbs) il[0] = 0;
1399: for (k = 0; k < mbs; k++) {
1400: /* zero rtmp */
1401: nz = bi[k + 1] - bi[k];
1402: bjtmp = bj + bi[k];
1403: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1405: /* load in initial unfactored row */
1406: bval = ba + bi[k];
1407: jmin = ai[k];
1408: jmax = ai[k + 1];
1409: for (j = jmin; j < jmax; j++) {
1410: col = aj[j];
1411: rtmp[col] = aa[j];
1412: *bval++ = 0.0; /* for in-place factorization */
1413: }
1414: /* shift the diagonal of the matrix: ZeropivotApply() */
1415: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1417: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1418: dk = rtmp[k];
1419: i = c2r[k]; /* first row to be added to k_th row */
1421: while (i < k) {
1422: nexti = c2r[i]; /* next row to be added to k_th row */
1424: /* compute multiplier, update diag(k) and U(i,k) */
1425: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1426: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1427: dk += uikdi * ba[ili]; /* update diag[k] */
1428: ba[ili] = uikdi; /* -U(i,k) */
1430: /* add multiple of row i to k-th row */
1431: jmin = ili + 1;
1432: jmax = bi[i + 1];
1433: if (jmin < jmax) {
1434: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1435: /* update il and c2r for row i */
1436: il[i] = jmin;
1437: j = bj[jmin];
1438: c2r[i] = c2r[j];
1439: c2r[j] = i;
1440: }
1441: i = nexti;
1442: }
1444: /* copy data into U(k,:) */
1445: rs = 0.0;
1446: jmin = bi[k];
1447: jmax = bi[k + 1] - 1;
1448: if (jmin < jmax) {
1449: for (j = jmin; j < jmax; j++) {
1450: col = bj[j];
1451: ba[j] = rtmp[col];
1452: rs += PetscAbsScalar(ba[j]);
1453: }
1454: /* add the k-th row into il and c2r */
1455: il[k] = jmin;
1456: i = bj[jmin];
1457: c2r[k] = c2r[i];
1458: c2r[i] = k;
1459: }
1461: sctx.rs = rs;
1462: sctx.pv = dk;
1463: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1464: if (sctx.newshift) break;
1465: dk = sctx.pv;
1467: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1468: }
1469: } while (sctx.newshift);
1471: PetscCall(PetscFree3(rtmp, il, c2r));
1473: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1474: B->ops->solves = MatSolves_SeqSBAIJ_1;
1475: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1476: B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1477: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1478: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1480: B->assembled = PETSC_TRUE;
1481: B->preallocated = PETSC_TRUE;
1483: PetscCall(PetscLogFlops(B->rmap->n));
1485: /* MatPivotView() */
1486: if (sctx.nshift) {
1487: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1488: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1489: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1490: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1491: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1492: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1493: }
1494: }
1495: PetscFunctionReturn(PETSC_SUCCESS);
1496: }
1498: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1499: {
1500: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1501: PetscInt i, j, mbs = a->mbs;
1502: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1503: PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1504: MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1505: PetscReal rs;
1506: FactorShiftCtx sctx;
1508: PetscFunctionBegin;
1509: /* MatPivotSetUp(): initialize shift context sctx */
1510: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1512: /* initialization */
1513: /* il and jl record the first nonzero element in each row of the accessing
1514: window U(0:k, k:mbs-1).
1515: jl: list of rows to be added to uneliminated rows
1516: i>= k: jl(i) is the first row to be added to row i
1517: i< k: jl(i) is the row following row i in some list of rows
1518: jl(i) = mbs indicates the end of a list
1519: il(i): points to the first nonzero element in U(i,k:mbs-1)
1520: */
1521: PetscCall(PetscMalloc1(mbs, &rtmp));
1522: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1524: do {
1525: sctx.newshift = PETSC_FALSE;
1526: il[0] = 0;
1527: for (i = 0; i < mbs; i++) {
1528: rtmp[i] = 0.0;
1529: jl[i] = mbs;
1530: }
1532: for (k = 0; k < mbs; k++) {
1533: /*initialize k-th row with elements nonzero in row perm(k) of A */
1534: nz = ai[k + 1] - ai[k];
1535: acol = aj + ai[k];
1536: aval = aa + ai[k];
1537: bval = ba + bi[k];
1538: while (nz--) {
1539: rtmp[*acol++] = *aval++;
1540: *bval++ = 0.0; /* for in-place factorization */
1541: }
1543: /* shift the diagonal of the matrix */
1544: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1546: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1547: dk = rtmp[k];
1548: i = jl[k]; /* first row to be added to k_th row */
1550: while (i < k) {
1551: nexti = jl[i]; /* next row to be added to k_th row */
1552: /* compute multiplier, update D(k) and U(i,k) */
1553: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1554: uikdi = -ba[ili] * ba[bi[i]];
1555: dk += uikdi * ba[ili];
1556: ba[ili] = uikdi; /* -U(i,k) */
1558: /* add multiple of row i to k-th row ... */
1559: jmin = ili + 1;
1560: nz = bi[i + 1] - jmin;
1561: if (nz > 0) {
1562: bcol = bj + jmin;
1563: bval = ba + jmin;
1564: PetscCall(PetscLogFlops(2.0 * nz));
1565: while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1567: /* update il and jl for i-th row */
1568: il[i] = jmin;
1569: j = bj[jmin];
1570: jl[i] = jl[j];
1571: jl[j] = i;
1572: }
1573: i = nexti;
1574: }
1576: /* shift the diagonals when zero pivot is detected */
1577: /* compute rs=sum of abs(off-diagonal) */
1578: rs = 0.0;
1579: jmin = bi[k] + 1;
1580: nz = bi[k + 1] - jmin;
1581: if (nz) {
1582: bcol = bj + jmin;
1583: while (nz--) {
1584: rs += PetscAbsScalar(rtmp[*bcol]);
1585: bcol++;
1586: }
1587: }
1589: sctx.rs = rs;
1590: sctx.pv = dk;
1591: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1592: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1593: dk = sctx.pv;
1595: /* copy data into U(k,:) */
1596: ba[bi[k]] = 1.0 / dk;
1597: jmin = bi[k] + 1;
1598: nz = bi[k + 1] - jmin;
1599: if (nz) {
1600: bcol = bj + jmin;
1601: bval = ba + jmin;
1602: while (nz--) {
1603: *bval++ = rtmp[*bcol];
1604: rtmp[*bcol++] = 0.0;
1605: }
1606: /* add k-th row into il and jl */
1607: il[k] = jmin;
1608: i = bj[jmin];
1609: jl[k] = jl[i];
1610: jl[i] = k;
1611: }
1612: } /* end of for (k = 0; k<mbs; k++) */
1613: } while (sctx.newshift);
1614: PetscCall(PetscFree(rtmp));
1615: PetscCall(PetscFree2(il, jl));
1617: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1618: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1619: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1620: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1623: C->assembled = PETSC_TRUE;
1624: C->preallocated = PETSC_TRUE;
1626: PetscCall(PetscLogFlops(C->rmap->N));
1627: if (sctx.nshift) {
1628: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1629: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1630: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1631: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1632: }
1633: }
1634: PetscFunctionReturn(PETSC_SUCCESS);
1635: }
1637: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1638: {
1639: Mat C;
1641: PetscFunctionBegin;
1642: PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1643: PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1644: PetscCall(MatCholeskyFactorNumeric(C, A, info));
1646: A->ops->solve = C->ops->solve;
1647: A->ops->solvetranspose = C->ops->solvetranspose;
1649: PetscCall(MatHeaderMerge(A, &C));
1650: PetscFunctionReturn(PETSC_SUCCESS);
1651: }