Actual source code: sbaijfact.c


  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petsc/private/kernels/blockinvert.h>
  5: #include <petscis.h>

  7: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
  8: {
  9:   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
 10:   MatScalar    *dd   = fact->a;
 11:   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;

 13:   PetscFunctionBegin;
 14:   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);

 16:   nneg_tmp = 0;
 17:   npos_tmp = 0;
 18:   for (i = 0; i < mbs; i++) {
 19:     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
 20:     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
 21:     fi++;
 22:   }
 23:   if (nneg) *nneg = nneg_tmp;
 24:   if (npos) *npos = npos_tmp;
 25:   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
 26:   PetscFunctionReturn(PETSC_SUCCESS);
 27: }

 29: /*
 30:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 31:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
 32: */
 33: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
 34: {
 35:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
 36:   const PetscInt *rip, *ai, *aj;
 37:   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
 38:   PetscInt        m, reallocs = 0, prow;
 39:   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
 40:   PetscReal       f = info->fill;
 41:   PetscBool       perm_identity;

 43:   PetscFunctionBegin;
 44:   /* check whether perm is the identity mapping */
 45:   PetscCall(ISIdentity(perm, &perm_identity));
 46:   PetscCall(ISGetIndices(perm, &rip));

 48:   if (perm_identity) { /* without permutation */
 49:     a->permute = PETSC_FALSE;

 51:     ai = a->i;
 52:     aj = a->j;
 53:   } else { /* non-trivial permutation */
 54:     a->permute = PETSC_TRUE;

 56:     PetscCall(MatReorderingSeqSBAIJ(A, perm));

 58:     ai = a->inew;
 59:     aj = a->jnew;
 60:   }

 62:   /* initialization */
 63:   PetscCall(PetscMalloc1(mbs + 1, &iu));
 64:   umax = (PetscInt)(f * ai[mbs] + 1);
 65:   umax += mbs + 1;
 66:   PetscCall(PetscMalloc1(umax, &ju));
 67:   iu[0] = mbs + 1;
 68:   juidx = mbs + 1; /* index for ju */
 69:   /* jl linked list for pivot row -- linked list for col index */
 70:   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
 71:   for (i = 0; i < mbs; i++) {
 72:     jl[i] = mbs;
 73:     q[i]  = 0;
 74:   }

 76:   /* for each row k */
 77:   for (k = 0; k < mbs; k++) {
 78:     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
 79:     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
 80:     q[k] = mbs;
 81:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 82:     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
 83:     jmax = ai[rip[k] + 1];
 84:     for (j = jmin; j < jmax; j++) {
 85:       vj = rip[aj[j]]; /* col. value */
 86:       if (vj > k) {
 87:         qm = k;
 88:         do {
 89:           m  = qm;
 90:           qm = q[m];
 91:         } while (qm < vj);
 92:         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
 93:         nzk++;
 94:         q[m]  = vj;
 95:         q[vj] = qm;
 96:       } /* if (vj > k) */
 97:     }   /* for (j=jmin; j<jmax; j++) */

 99:     /* modify nonzero structure of k-th row by computing fill-in
100:        for each row i to be merged in */
101:     prow = k;
102:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */

104:     while (prow < k) {
105:       /* merge row prow into k-th row */
106:       jmin = iu[prow] + 1;
107:       jmax = iu[prow + 1];
108:       qm   = k;
109:       for (j = jmin; j < jmax; j++) {
110:         vj = ju[j];
111:         do {
112:           m  = qm;
113:           qm = q[m];
114:         } while (qm < vj);
115:         if (qm != vj) {
116:           nzk++;
117:           q[m]  = vj;
118:           q[vj] = qm;
119:           qm    = vj;
120:         }
121:       }
122:       prow = jl[prow]; /* next pivot row */
123:     }

125:     /* add k to row list for first nonzero element in k-th row */
126:     if (nzk > 0) {
127:       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
128:       jl[k] = jl[i];
129:       jl[i] = k;
130:     }
131:     iu[k + 1] = iu[k] + nzk;

133:     /* allocate more space to ju if needed */
134:     if (iu[k + 1] > umax) {
135:       /* estimate how much additional space we will need */
136:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
137:       /* just double the memory each time */
138:       maxadd = umax;
139:       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
140:       umax += maxadd;

142:       /* allocate a longer ju */
143:       PetscCall(PetscMalloc1(umax, &jutmp));
144:       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
145:       PetscCall(PetscFree(ju));
146:       ju = jutmp;
147:       reallocs++; /* count how many times we realloc */
148:     }

150:     /* save nonzero structure of k-th row in ju */
151:     i = k;
152:     while (nzk--) {
153:       i           = q[i];
154:       ju[juidx++] = i;
155:     }
156:   }

158: #if defined(PETSC_USE_INFO)
159:   if (ai[mbs] != 0) {
160:     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
161:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
162:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
163:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
164:     PetscCall(PetscInfo(A, "for best performance.\n"));
165:   } else {
166:     PetscCall(PetscInfo(A, "Empty matrix\n"));
167:   }
168: #endif

170:   PetscCall(ISRestoreIndices(perm, &rip));
171:   PetscCall(PetscFree2(jl, q));

173:   /* put together the new matrix */
174:   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));

176:   b               = (Mat_SeqSBAIJ *)(F)->data;
177:   b->singlemalloc = PETSC_FALSE;
178:   b->free_a       = PETSC_TRUE;
179:   b->free_ij      = PETSC_TRUE;

181:   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
182:   b->j    = ju;
183:   b->i    = iu;
184:   b->diag = NULL;
185:   b->ilen = NULL;
186:   b->imax = NULL;
187:   b->row  = perm;

189:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

191:   PetscCall(PetscObjectReference((PetscObject)perm));

193:   b->icol = perm;
194:   PetscCall(PetscObjectReference((PetscObject)perm));
195:   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
196:   /* In b structure:  Free imax, ilen, old a, old j.
197:      Allocate idnew, solve_work, new a, new j */
198:   b->maxnz = b->nz = iu[mbs];

200:   (F)->info.factor_mallocs   = reallocs;
201:   (F)->info.fill_ratio_given = f;
202:   if (ai[mbs] != 0) {
203:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
204:   } else {
205:     (F)->info.fill_ratio_needed = 0.0;
206:   }
207:   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
208:   PetscFunctionReturn(PETSC_SUCCESS);
209: }
210: /*
211:     Symbolic U^T*D*U factorization for SBAIJ format.
212:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
213: */
214: #include <petscbt.h>
215: #include <../src/mat/utils/freespace.h>
216: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
217: {
218:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
219:   Mat_SeqSBAIJ      *b;
220:   PetscBool          perm_identity, missing;
221:   PetscReal          fill = info->fill;
222:   const PetscInt    *rip, *ai = a->i, *aj = a->j;
223:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
224:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
225:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
226:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
227:   PetscBT            lnkbt;

229:   PetscFunctionBegin;
230:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
231:   PetscCall(MatMissingDiagonal(A, &missing, &i));
232:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
233:   if (bs > 1) {
234:     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
235:     PetscFunctionReturn(PETSC_SUCCESS);
236:   }

238:   /* check whether perm is the identity mapping */
239:   PetscCall(ISIdentity(perm, &perm_identity));
240:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
241:   a->permute = PETSC_FALSE;
242:   PetscCall(ISGetIndices(perm, &rip));

244:   /* initialization */
245:   PetscCall(PetscMalloc1(mbs + 1, &ui));
246:   PetscCall(PetscMalloc1(mbs + 1, &udiag));
247:   ui[0] = 0;

249:   /* jl: linked list for storing indices of the pivot rows
250:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
251:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
252:   for (i = 0; i < mbs; i++) {
253:     jl[i] = mbs;
254:     il[i] = 0;
255:   }

257:   /* create and initialize a linked list for storing column indices of the active row k */
258:   nlnk = mbs + 1;
259:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

261:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
262:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
263:   current_space = free_space;

265:   for (k = 0; k < mbs; k++) { /* for each active row k */
266:     /* initialize lnk by the column indices of row rip[k] of A */
267:     nzk   = 0;
268:     ncols = ai[k + 1] - ai[k];
269:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
270:     for (j = 0; j < ncols; j++) {
271:       i       = *(aj + ai[k] + j);
272:       cols[j] = i;
273:     }
274:     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
275:     nzk += nlnk;

277:     /* update lnk by computing fill-in for each pivot row to be merged in */
278:     prow = jl[k]; /* 1st pivot row */

280:     while (prow < k) {
281:       nextprow = jl[prow];
282:       /* merge prow into k-th row */
283:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
284:       jmax   = ui[prow + 1];
285:       ncols  = jmax - jmin;
286:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
287:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
288:       nzk += nlnk;

290:       /* update il and jl for prow */
291:       if (jmin < jmax) {
292:         il[prow] = jmin;
293:         j        = *uj_ptr;
294:         jl[prow] = jl[j];
295:         jl[j]    = prow;
296:       }
297:       prow = nextprow;
298:     }

300:     /* if free space is not available, make more free space */
301:     if (current_space->local_remaining < nzk) {
302:       i = mbs - k + 1;                                   /* num of unfactored rows */
303:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
304:       PetscCall(PetscFreeSpaceGet(i, &current_space));
305:       reallocs++;
306:     }

308:     /* copy data into free space, then initialize lnk */
309:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

311:     /* add the k-th row into il and jl */
312:     if (nzk > 1) {
313:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
314:       jl[k] = jl[i];
315:       jl[i] = k;
316:       il[k] = ui[k] + 1;
317:     }
318:     ui_ptr[k] = current_space->array;

320:     current_space->array += nzk;
321:     current_space->local_used += nzk;
322:     current_space->local_remaining -= nzk;

324:     ui[k + 1] = ui[k] + nzk;
325:   }

327:   PetscCall(ISRestoreIndices(perm, &rip));
328:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

330:   /* destroy list of free space and other temporary array(s) */
331:   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
332:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
333:   PetscCall(PetscLLDestroy(lnk, lnkbt));

335:   /* put together the new matrix in MATSEQSBAIJ format */
336:   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));

338:   b               = (Mat_SeqSBAIJ *)fact->data;
339:   b->singlemalloc = PETSC_FALSE;
340:   b->free_a       = PETSC_TRUE;
341:   b->free_ij      = PETSC_TRUE;

343:   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));

345:   b->j         = uj;
346:   b->i         = ui;
347:   b->diag      = udiag;
348:   b->free_diag = PETSC_TRUE;
349:   b->ilen      = NULL;
350:   b->imax      = NULL;
351:   b->row       = perm;
352:   b->icol      = perm;

354:   PetscCall(PetscObjectReference((PetscObject)perm));
355:   PetscCall(PetscObjectReference((PetscObject)perm));

357:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

359:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));

361:   b->maxnz = b->nz = ui[mbs];

363:   fact->info.factor_mallocs   = reallocs;
364:   fact->info.fill_ratio_given = fill;
365:   if (ai[mbs] != 0) {
366:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
367:   } else {
368:     fact->info.fill_ratio_needed = 0.0;
369:   }
370: #if defined(PETSC_USE_INFO)
371:   if (ai[mbs] != 0) {
372:     PetscReal af = fact->info.fill_ratio_needed;
373:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
374:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
375:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
376:   } else {
377:     PetscCall(PetscInfo(A, "Empty matrix\n"));
378:   }
379: #endif
380:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
381:   PetscFunctionReturn(PETSC_SUCCESS);
382: }

384: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
385: {
386:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
387:   Mat_SeqSBAIJ      *b;
388:   PetscBool          perm_identity, missing;
389:   PetscReal          fill = info->fill;
390:   const PetscInt    *rip, *ai, *aj;
391:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
392:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
393:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
394:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
395:   PetscBT            lnkbt;

397:   PetscFunctionBegin;
398:   PetscCall(MatMissingDiagonal(A, &missing, &d));
399:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);

401:   /*
402:    This code originally uses Modified Sparse Row (MSR) storage
403:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
404:    Then it is rewritten so the factor B takes seqsbaij format. However the associated
405:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
406:    thus the original code in MSR format is still used for these cases.
407:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
408:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
409:   */
410:   if (bs > 1) {
411:     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
412:     PetscFunctionReturn(PETSC_SUCCESS);
413:   }

415:   /* check whether perm is the identity mapping */
416:   PetscCall(ISIdentity(perm, &perm_identity));
417:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
418:   a->permute = PETSC_FALSE;
419:   ai         = a->i;
420:   aj         = a->j;
421:   PetscCall(ISGetIndices(perm, &rip));

423:   /* initialization */
424:   PetscCall(PetscMalloc1(mbs + 1, &ui));
425:   ui[0] = 0;

427:   /* jl: linked list for storing indices of the pivot rows
428:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
429:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
430:   for (i = 0; i < mbs; i++) {
431:     jl[i] = mbs;
432:     il[i] = 0;
433:   }

435:   /* create and initialize a linked list for storing column indices of the active row k */
436:   nlnk = mbs + 1;
437:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

439:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
440:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
441:   current_space = free_space;

443:   for (k = 0; k < mbs; k++) { /* for each active row k */
444:     /* initialize lnk by the column indices of row rip[k] of A */
445:     nzk   = 0;
446:     ncols = ai[rip[k] + 1] - ai[rip[k]];
447:     for (j = 0; j < ncols; j++) {
448:       i       = *(aj + ai[rip[k]] + j);
449:       cols[j] = rip[i];
450:     }
451:     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
452:     nzk += nlnk;

454:     /* update lnk by computing fill-in for each pivot row to be merged in */
455:     prow = jl[k]; /* 1st pivot row */

457:     while (prow < k) {
458:       nextprow = jl[prow];
459:       /* merge prow into k-th row */
460:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
461:       jmax   = ui[prow + 1];
462:       ncols  = jmax - jmin;
463:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
464:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
465:       nzk += nlnk;

467:       /* update il and jl for prow */
468:       if (jmin < jmax) {
469:         il[prow] = jmin;

471:         j        = *uj_ptr;
472:         jl[prow] = jl[j];
473:         jl[j]    = prow;
474:       }
475:       prow = nextprow;
476:     }

478:     /* if free space is not available, make more free space */
479:     if (current_space->local_remaining < nzk) {
480:       i = mbs - k + 1;                                                            /* num of unfactored rows */
481:       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
482:       PetscCall(PetscFreeSpaceGet(i, &current_space));
483:       reallocs++;
484:     }

486:     /* copy data into free space, then initialize lnk */
487:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

489:     /* add the k-th row into il and jl */
490:     if (nzk - 1 > 0) {
491:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
492:       jl[k] = jl[i];
493:       jl[i] = k;
494:       il[k] = ui[k] + 1;
495:     }
496:     ui_ptr[k] = current_space->array;

498:     current_space->array += nzk;
499:     current_space->local_used += nzk;
500:     current_space->local_remaining -= nzk;

502:     ui[k + 1] = ui[k] + nzk;
503:   }

505:   PetscCall(ISRestoreIndices(perm, &rip));
506:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

508:   /* destroy list of free space and other temporary array(s) */
509:   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
510:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
511:   PetscCall(PetscLLDestroy(lnk, lnkbt));

513:   /* put together the new matrix in MATSEQSBAIJ format */
514:   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));

516:   b               = (Mat_SeqSBAIJ *)fact->data;
517:   b->singlemalloc = PETSC_FALSE;
518:   b->free_a       = PETSC_TRUE;
519:   b->free_ij      = PETSC_TRUE;

521:   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));

523:   b->j    = uj;
524:   b->i    = ui;
525:   b->diag = NULL;
526:   b->ilen = NULL;
527:   b->imax = NULL;
528:   b->row  = perm;

530:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

532:   PetscCall(PetscObjectReference((PetscObject)perm));
533:   b->icol = perm;
534:   PetscCall(PetscObjectReference((PetscObject)perm));
535:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
536:   b->maxnz = b->nz = ui[mbs];

538:   fact->info.factor_mallocs   = reallocs;
539:   fact->info.fill_ratio_given = fill;
540:   if (ai[mbs] != 0) {
541:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
542:   } else {
543:     fact->info.fill_ratio_needed = 0.0;
544:   }
545: #if defined(PETSC_USE_INFO)
546:   if (ai[mbs] != 0) {
547:     PetscReal af = fact->info.fill_ratio_needed;
548:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
549:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
550:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
551:   } else {
552:     PetscCall(PetscInfo(A, "Empty matrix\n"));
553:   }
554: #endif
555:   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
556:   PetscFunctionReturn(PETSC_SUCCESS);
557: }

559: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
560: {
561:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
562:   IS              perm = b->row;
563:   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
564:   PetscInt        i, j;
565:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
566:   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
567:   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
568:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
569:   MatScalar      *work;
570:   PetscInt       *pivots;
571:   PetscBool       allowzeropivot, zeropivotdetected;

573:   PetscFunctionBegin;
574:   /* initialization */
575:   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
576:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
577:   allowzeropivot = PetscNot(A->erroriffailure);

579:   il[0] = 0;
580:   for (i = 0; i < mbs; i++) jl[i] = mbs;

582:   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
583:   PetscCall(PetscMalloc1(bs, &pivots));

585:   PetscCall(ISGetIndices(perm, &perm_ptr));

587:   /* check permutation */
588:   if (!a->permute) {
589:     ai = a->i;
590:     aj = a->j;
591:     aa = a->a;
592:   } else {
593:     ai = a->inew;
594:     aj = a->jnew;
595:     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
596:     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
597:     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
598:     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));

600:     for (i = 0; i < mbs; i++) {
601:       jmin = ai[i];
602:       jmax = ai[i + 1];
603:       for (j = jmin; j < jmax; j++) {
604:         while (a2anew[j] != j) {
605:           k         = a2anew[j];
606:           a2anew[j] = a2anew[k];
607:           a2anew[k] = k;
608:           for (k1 = 0; k1 < bs2; k1++) {
609:             dk[k1]           = aa[k * bs2 + k1];
610:             aa[k * bs2 + k1] = aa[j * bs2 + k1];
611:             aa[j * bs2 + k1] = dk[k1];
612:           }
613:         }
614:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
615:         if (i > aj[j]) {
616:           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
617:           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
618:           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
619:             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
620:           }
621:         }
622:       }
623:     }
624:     PetscCall(PetscFree(a2anew));
625:   }

627:   /* for each row k */
628:   for (k = 0; k < mbs; k++) {
629:     /*initialize k-th row with elements nonzero in row perm(k) of A */
630:     jmin = ai[perm_ptr[k]];
631:     jmax = ai[perm_ptr[k] + 1];

633:     ap = aa + jmin * bs2;
634:     for (j = jmin; j < jmax; j++) {
635:       vj       = perm_ptr[aj[j]]; /* block col. index */
636:       rtmp_ptr = rtmp + vj * bs2;
637:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
638:     }

640:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
641:     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
642:     i = jl[k]; /* first row to be added to k_th row  */

644:     while (i < k) {
645:       nexti = jl[i]; /* next row to be added to k_th row */

647:       /* compute multiplier */
648:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

650:       /* uik = -inv(Di)*U_bar(i,k) */
651:       diag = ba + i * bs2;
652:       u    = ba + ili * bs2;
653:       PetscCall(PetscArrayzero(uik, bs2));
654:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

656:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
657:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
658:       PetscCall(PetscLogFlops(4.0 * bs * bs2));

660:       /* update -U(i,k) */
661:       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));

663:       /* add multiple of row i to k-th row ... */
664:       jmin = ili + 1;
665:       jmax = bi[i + 1];
666:       if (jmin < jmax) {
667:         for (j = jmin; j < jmax; j++) {
668:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
669:           rtmp_ptr = rtmp + bj[j] * bs2;
670:           u        = ba + j * bs2;
671:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
672:         }
673:         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));

675:         /* ... add i to row list for next nonzero entry */
676:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
677:         j     = bj[jmin];
678:         jl[i] = jl[j];
679:         jl[j] = i; /* update jl */
680:       }
681:       i = nexti;
682:     }

684:     /* save nonzero entries in k-th row of U ... */

686:     /* invert diagonal block */
687:     diag = ba + k * bs2;
688:     PetscCall(PetscArraycpy(diag, dk, bs2));

690:     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
691:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

693:     jmin = bi[k];
694:     jmax = bi[k + 1];
695:     if (jmin < jmax) {
696:       for (j = jmin; j < jmax; j++) {
697:         vj       = bj[j]; /* block col. index of U */
698:         u        = ba + j * bs2;
699:         rtmp_ptr = rtmp + vj * bs2;
700:         for (k1 = 0; k1 < bs2; k1++) {
701:           *u++        = *rtmp_ptr;
702:           *rtmp_ptr++ = 0.0;
703:         }
704:       }

706:       /* ... add k to row list for first nonzero entry in k-th row */
707:       il[k] = jmin;
708:       i     = bj[jmin];
709:       jl[k] = jl[i];
710:       jl[i] = k;
711:     }
712:   }

714:   PetscCall(PetscFree(rtmp));
715:   PetscCall(PetscFree2(il, jl));
716:   PetscCall(PetscFree3(dk, uik, work));
717:   PetscCall(PetscFree(pivots));
718:   if (a->permute) PetscCall(PetscFree(aa));

720:   PetscCall(ISRestoreIndices(perm, &perm_ptr));

722:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
723:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
724:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
725:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

727:   C->assembled    = PETSC_TRUE;
728:   C->preallocated = PETSC_TRUE;

730:   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
731:   PetscFunctionReturn(PETSC_SUCCESS);
732: }

734: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
735: {
736:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
737:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
738:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
739:   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
740:   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
741:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
742:   MatScalar    *work;
743:   PetscInt     *pivots;
744:   PetscBool     allowzeropivot, zeropivotdetected;

746:   PetscFunctionBegin;
747:   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
748:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
749:   il[0] = 0;
750:   for (i = 0; i < mbs; i++) jl[i] = mbs;

752:   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
753:   PetscCall(PetscMalloc1(bs, &pivots));
754:   allowzeropivot = PetscNot(A->erroriffailure);

756:   ai = a->i;
757:   aj = a->j;
758:   aa = a->a;

760:   /* for each row k */
761:   for (k = 0; k < mbs; k++) {
762:     /*initialize k-th row with elements nonzero in row k of A */
763:     jmin = ai[k];
764:     jmax = ai[k + 1];
765:     ap   = aa + jmin * bs2;
766:     for (j = jmin; j < jmax; j++) {
767:       vj       = aj[j]; /* block col. index */
768:       rtmp_ptr = rtmp + vj * bs2;
769:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
770:     }

772:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
773:     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
774:     i = jl[k]; /* first row to be added to k_th row  */

776:     while (i < k) {
777:       nexti = jl[i]; /* next row to be added to k_th row */

779:       /* compute multiplier */
780:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

782:       /* uik = -inv(Di)*U_bar(i,k) */
783:       diag = ba + i * bs2;
784:       u    = ba + ili * bs2;
785:       PetscCall(PetscArrayzero(uik, bs2));
786:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

788:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
789:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
790:       PetscCall(PetscLogFlops(2.0 * bs * bs2));

792:       /* update -U(i,k) */
793:       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));

795:       /* add multiple of row i to k-th row ... */
796:       jmin = ili + 1;
797:       jmax = bi[i + 1];
798:       if (jmin < jmax) {
799:         for (j = jmin; j < jmax; j++) {
800:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
801:           rtmp_ptr = rtmp + bj[j] * bs2;
802:           u        = ba + j * bs2;
803:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
804:         }
805:         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));

807:         /* ... add i to row list for next nonzero entry */
808:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
809:         j     = bj[jmin];
810:         jl[i] = jl[j];
811:         jl[j] = i; /* update jl */
812:       }
813:       i = nexti;
814:     }

816:     /* save nonzero entries in k-th row of U ... */

818:     /* invert diagonal block */
819:     diag = ba + k * bs2;
820:     PetscCall(PetscArraycpy(diag, dk, bs2));

822:     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
823:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

825:     jmin = bi[k];
826:     jmax = bi[k + 1];
827:     if (jmin < jmax) {
828:       for (j = jmin; j < jmax; j++) {
829:         vj       = bj[j]; /* block col. index of U */
830:         u        = ba + j * bs2;
831:         rtmp_ptr = rtmp + vj * bs2;
832:         for (k1 = 0; k1 < bs2; k1++) {
833:           *u++        = *rtmp_ptr;
834:           *rtmp_ptr++ = 0.0;
835:         }
836:       }

838:       /* ... add k to row list for first nonzero entry in k-th row */
839:       il[k] = jmin;
840:       i     = bj[jmin];
841:       jl[k] = jl[i];
842:       jl[i] = k;
843:     }
844:   }

846:   PetscCall(PetscFree(rtmp));
847:   PetscCall(PetscFree2(il, jl));
848:   PetscCall(PetscFree3(dk, uik, work));
849:   PetscCall(PetscFree(pivots));

851:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
855:   C->assembled           = PETSC_TRUE;
856:   C->preallocated        = PETSC_TRUE;

858:   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
859:   PetscFunctionReturn(PETSC_SUCCESS);
860: }

862: /*
863:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
864:     Version for blocks 2 by 2.
865: */
866: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
867: {
868:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
869:   IS              perm = b->row;
870:   const PetscInt *ai, *aj, *perm_ptr;
871:   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
872:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
873:   MatScalar      *ba = b->a, *aa, *ap;
874:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
875:   PetscReal       shift = info->shiftamount;
876:   PetscBool       allowzeropivot, zeropivotdetected;

878:   PetscFunctionBegin;
879:   allowzeropivot = PetscNot(A->erroriffailure);

881:   /* initialization */
882:   /* il and jl record the first nonzero element in each row of the accessing
883:      window U(0:k, k:mbs-1).
884:      jl:    list of rows to be added to uneliminated rows
885:             i>= k: jl(i) is the first row to be added to row i
886:             i<  k: jl(i) is the row following row i in some list of rows
887:             jl(i) = mbs indicates the end of a list
888:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
889:             row i of U */
890:   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
891:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
892:   il[0] = 0;
893:   for (i = 0; i < mbs; i++) jl[i] = mbs;

895:   PetscCall(ISGetIndices(perm, &perm_ptr));

897:   /* check permutation */
898:   if (!a->permute) {
899:     ai = a->i;
900:     aj = a->j;
901:     aa = a->a;
902:   } else {
903:     ai = a->inew;
904:     aj = a->jnew;
905:     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
906:     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
907:     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
908:     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));

910:     for (i = 0; i < mbs; i++) {
911:       jmin = ai[i];
912:       jmax = ai[i + 1];
913:       for (j = jmin; j < jmax; j++) {
914:         while (a2anew[j] != j) {
915:           k         = a2anew[j];
916:           a2anew[j] = a2anew[k];
917:           a2anew[k] = k;
918:           for (k1 = 0; k1 < 4; k1++) {
919:             dk[k1]         = aa[k * 4 + k1];
920:             aa[k * 4 + k1] = aa[j * 4 + k1];
921:             aa[j * 4 + k1] = dk[k1];
922:           }
923:         }
924:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
925:         if (i > aj[j]) {
926:           ap    = aa + j * 4; /* ptr to the beginning of the block */
927:           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
928:           ap[1] = ap[2];
929:           ap[2] = dk[1];
930:         }
931:       }
932:     }
933:     PetscCall(PetscFree(a2anew));
934:   }

936:   /* for each row k */
937:   for (k = 0; k < mbs; k++) {
938:     /*initialize k-th row with elements nonzero in row perm(k) of A */
939:     jmin = ai[perm_ptr[k]];
940:     jmax = ai[perm_ptr[k] + 1];
941:     ap   = aa + jmin * 4;
942:     for (j = jmin; j < jmax; j++) {
943:       vj       = perm_ptr[aj[j]]; /* block col. index */
944:       rtmp_ptr = rtmp + vj * 4;
945:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
946:     }

948:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
949:     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
950:     i = jl[k]; /* first row to be added to k_th row  */

952:     while (i < k) {
953:       nexti = jl[i]; /* next row to be added to k_th row */

955:       /* compute multiplier */
956:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

958:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
959:       diag   = ba + i * 4;
960:       u      = ba + ili * 4;
961:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
962:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
963:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
964:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

966:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
967:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
968:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
969:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
970:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

972:       PetscCall(PetscLogFlops(16.0 * 2.0));

974:       /* update -U(i,k): ba[ili] = uik */
975:       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));

977:       /* add multiple of row i to k-th row ... */
978:       jmin = ili + 1;
979:       jmax = bi[i + 1];
980:       if (jmin < jmax) {
981:         for (j = jmin; j < jmax; j++) {
982:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983:           rtmp_ptr = rtmp + bj[j] * 4;
984:           u        = ba + j * 4;
985:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
986:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
987:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
988:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
989:         }
990:         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));

992:         /* ... add i to row list for next nonzero entry */
993:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
994:         j     = bj[jmin];
995:         jl[i] = jl[j];
996:         jl[j] = i; /* update jl */
997:       }
998:       i = nexti;
999:     }

1001:     /* save nonzero entries in k-th row of U ... */

1003:     /* invert diagonal block */
1004:     diag = ba + k * 4;
1005:     PetscCall(PetscArraycpy(diag, dk, 4));
1006:     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1007:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1009:     jmin = bi[k];
1010:     jmax = bi[k + 1];
1011:     if (jmin < jmax) {
1012:       for (j = jmin; j < jmax; j++) {
1013:         vj       = bj[j]; /* block col. index of U */
1014:         u        = ba + j * 4;
1015:         rtmp_ptr = rtmp + vj * 4;
1016:         for (k1 = 0; k1 < 4; k1++) {
1017:           *u++        = *rtmp_ptr;
1018:           *rtmp_ptr++ = 0.0;
1019:         }
1020:       }

1022:       /* ... add k to row list for first nonzero entry in k-th row */
1023:       il[k] = jmin;
1024:       i     = bj[jmin];
1025:       jl[k] = jl[i];
1026:       jl[i] = k;
1027:     }
1028:   }

1030:   PetscCall(PetscFree(rtmp));
1031:   PetscCall(PetscFree2(il, jl));
1032:   if (a->permute) PetscCall(PetscFree(aa));
1033:   PetscCall(ISRestoreIndices(perm, &perm_ptr));

1035:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1036:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1037:   C->assembled           = PETSC_TRUE;
1038:   C->preallocated        = PETSC_TRUE;

1040:   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1041:   PetscFunctionReturn(PETSC_SUCCESS);
1042: }

1044: /*
1045:       Version for when blocks are 2 by 2 Using natural ordering
1046: */
1047: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1048: {
1049:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1050:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1051:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1052:   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1053:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1054:   PetscReal     shift = info->shiftamount;
1055:   PetscBool     allowzeropivot, zeropivotdetected;

1057:   PetscFunctionBegin;
1058:   allowzeropivot = PetscNot(A->erroriffailure);

1060:   /* initialization */
1061:   /* il and jl record the first nonzero element in each row of the accessing
1062:      window U(0:k, k:mbs-1).
1063:      jl:    list of rows to be added to uneliminated rows
1064:             i>= k: jl(i) is the first row to be added to row i
1065:             i<  k: jl(i) is the row following row i in some list of rows
1066:             jl(i) = mbs indicates the end of a list
1067:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1068:             row i of U */
1069:   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1070:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1071:   il[0] = 0;
1072:   for (i = 0; i < mbs; i++) jl[i] = mbs;

1074:   ai = a->i;
1075:   aj = a->j;
1076:   aa = a->a;

1078:   /* for each row k */
1079:   for (k = 0; k < mbs; k++) {
1080:     /*initialize k-th row with elements nonzero in row k of A */
1081:     jmin = ai[k];
1082:     jmax = ai[k + 1];
1083:     ap   = aa + jmin * 4;
1084:     for (j = jmin; j < jmax; j++) {
1085:       vj       = aj[j]; /* block col. index */
1086:       rtmp_ptr = rtmp + vj * 4;
1087:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1088:     }

1090:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1091:     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1092:     i = jl[k]; /* first row to be added to k_th row  */

1094:     while (i < k) {
1095:       nexti = jl[i]; /* next row to be added to k_th row */

1097:       /* compute multiplier */
1098:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

1100:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1101:       diag   = ba + i * 4;
1102:       u      = ba + ili * 4;
1103:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1104:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1105:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1106:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

1108:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1109:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1110:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1111:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1112:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

1114:       PetscCall(PetscLogFlops(16.0 * 2.0));

1116:       /* update -U(i,k): ba[ili] = uik */
1117:       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));

1119:       /* add multiple of row i to k-th row ... */
1120:       jmin = ili + 1;
1121:       jmax = bi[i + 1];
1122:       if (jmin < jmax) {
1123:         for (j = jmin; j < jmax; j++) {
1124:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1125:           rtmp_ptr = rtmp + bj[j] * 4;
1126:           u        = ba + j * 4;
1127:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1128:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1129:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1130:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1131:         }
1132:         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));

1134:         /* ... add i to row list for next nonzero entry */
1135:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1136:         j     = bj[jmin];
1137:         jl[i] = jl[j];
1138:         jl[j] = i; /* update jl */
1139:       }
1140:       i = nexti;
1141:     }

1143:     /* save nonzero entries in k-th row of U ... */

1145:     /* invert diagonal block */
1146:     diag = ba + k * 4;
1147:     PetscCall(PetscArraycpy(diag, dk, 4));
1148:     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1149:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1151:     jmin = bi[k];
1152:     jmax = bi[k + 1];
1153:     if (jmin < jmax) {
1154:       for (j = jmin; j < jmax; j++) {
1155:         vj       = bj[j]; /* block col. index of U */
1156:         u        = ba + j * 4;
1157:         rtmp_ptr = rtmp + vj * 4;
1158:         for (k1 = 0; k1 < 4; k1++) {
1159:           *u++        = *rtmp_ptr;
1160:           *rtmp_ptr++ = 0.0;
1161:         }
1162:       }

1164:       /* ... add k to row list for first nonzero entry in k-th row */
1165:       il[k] = jmin;
1166:       i     = bj[jmin];
1167:       jl[k] = jl[i];
1168:       jl[i] = k;
1169:     }
1170:   }

1172:   PetscCall(PetscFree(rtmp));
1173:   PetscCall(PetscFree2(il, jl));

1175:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1179:   C->assembled           = PETSC_TRUE;
1180:   C->preallocated        = PETSC_TRUE;

1182:   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1183:   PetscFunctionReturn(PETSC_SUCCESS);
1184: }

1186: /*
1187:     Numeric U^T*D*U factorization for SBAIJ format.
1188:     Version for blocks are 1 by 1.
1189: */
1190: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1191: {
1192:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1193:   IS              ip = b->row;
1194:   const PetscInt *ai, *aj, *rip;
1195:   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1196:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1197:   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1198:   PetscReal       rs;
1199:   FactorShiftCtx  sctx;

1201:   PetscFunctionBegin;
1202:   /* MatPivotSetUp(): initialize shift context sctx */
1203:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1205:   PetscCall(ISGetIndices(ip, &rip));
1206:   if (!a->permute) {
1207:     ai = a->i;
1208:     aj = a->j;
1209:     aa = a->a;
1210:   } else {
1211:     ai = a->inew;
1212:     aj = a->jnew;
1213:     nz = ai[mbs];
1214:     PetscCall(PetscMalloc1(nz, &aa));
1215:     a2anew = a->a2anew;
1216:     bval   = a->a;
1217:     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1218:   }

1220:   /* initialization */
1221:   /* il and jl record the first nonzero element in each row of the accessing
1222:      window U(0:k, k:mbs-1).
1223:      jl:    list of rows to be added to uneliminated rows
1224:             i>= k: jl(i) is the first row to be added to row i
1225:             i<  k: jl(i) is the row following row i in some list of rows
1226:             jl(i) = mbs indicates the end of a list
1227:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1228:             row i of U */
1229:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

1231:   do {
1232:     sctx.newshift = PETSC_FALSE;
1233:     il[0]         = 0;
1234:     for (i = 0; i < mbs; i++) {
1235:       rtmp[i] = 0.0;
1236:       jl[i]   = mbs;
1237:     }

1239:     for (k = 0; k < mbs; k++) {
1240:       /*initialize k-th row by the perm[k]-th row of A */
1241:       jmin = ai[rip[k]];
1242:       jmax = ai[rip[k] + 1];
1243:       bval = ba + bi[k];
1244:       for (j = jmin; j < jmax; j++) {
1245:         col       = rip[aj[j]];
1246:         rtmp[col] = aa[j];
1247:         *bval++   = 0.0; /* for in-place factorization */
1248:       }

1250:       /* shift the diagonal of the matrix */
1251:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1253:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1254:       dk = rtmp[k];
1255:       i  = jl[k]; /* first row to be added to k_th row  */

1257:       while (i < k) {
1258:         nexti = jl[i]; /* next row to be added to k_th row */

1260:         /* compute multiplier, update diag(k) and U(i,k) */
1261:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1262:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1263:         dk += uikdi * ba[ili];
1264:         ba[ili] = uikdi; /* -U(i,k) */

1266:         /* add multiple of row i to k-th row */
1267:         jmin = ili + 1;
1268:         jmax = bi[i + 1];
1269:         if (jmin < jmax) {
1270:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1271:           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));

1273:           /* update il and jl for row i */
1274:           il[i] = jmin;
1275:           j     = bj[jmin];
1276:           jl[i] = jl[j];
1277:           jl[j] = i;
1278:         }
1279:         i = nexti;
1280:       }

1282:       /* shift the diagonals when zero pivot is detected */
1283:       /* compute rs=sum of abs(off-diagonal) */
1284:       rs   = 0.0;
1285:       jmin = bi[k] + 1;
1286:       nz   = bi[k + 1] - jmin;
1287:       if (nz) {
1288:         bcol = bj + jmin;
1289:         while (nz--) {
1290:           rs += PetscAbsScalar(rtmp[*bcol]);
1291:           bcol++;
1292:         }
1293:       }

1295:       sctx.rs = rs;
1296:       sctx.pv = dk;
1297:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1298:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1299:       dk = sctx.pv;

1301:       /* copy data into U(k,:) */
1302:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1303:       jmin      = bi[k] + 1;
1304:       jmax      = bi[k + 1];
1305:       if (jmin < jmax) {
1306:         for (j = jmin; j < jmax; j++) {
1307:           col       = bj[j];
1308:           ba[j]     = rtmp[col];
1309:           rtmp[col] = 0.0;
1310:         }
1311:         /* add the k-th row into il and jl */
1312:         il[k] = jmin;
1313:         i     = bj[jmin];
1314:         jl[k] = jl[i];
1315:         jl[i] = k;
1316:       }
1317:     }
1318:   } while (sctx.newshift);
1319:   PetscCall(PetscFree3(rtmp, il, jl));
1320:   if (a->permute) PetscCall(PetscFree(aa));

1322:   PetscCall(ISRestoreIndices(ip, &rip));

1324:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1325:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1326:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1327:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1328:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1329:   C->assembled           = PETSC_TRUE;
1330:   C->preallocated        = PETSC_TRUE;

1332:   PetscCall(PetscLogFlops(C->rmap->N));
1333:   if (sctx.nshift) {
1334:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1335:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1336:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1337:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1338:     }
1339:   }
1340:   PetscFunctionReturn(PETSC_SUCCESS);
1341: }

1343: /*
1344:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1345:   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1346: */
1347: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1348: {
1349:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1350:   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1351:   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1352:   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1353:   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1354:   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1355:   FactorShiftCtx sctx;
1356:   PetscReal      rs;
1357:   MatScalar      d, *v;

1359:   PetscFunctionBegin;
1360:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

1362:   /* MatPivotSetUp(): initialize shift context sctx */
1363:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1365:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1366:     sctx.shift_top = info->zeropivot;

1368:     PetscCall(PetscArrayzero(rtmp, mbs));

1370:     for (i = 0; i < mbs; i++) {
1371:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1372:       d = (aa)[a->diag[i]];
1373:       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1374:       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1375:       v     = aa + ai[i] + 1;
1376:       nz    = ai[i + 1] - ai[i] - 1;
1377:       for (j = 0; j < nz; j++) {
1378:         rtmp[i] += PetscAbsScalar(v[j]);
1379:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1380:       }
1381:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1382:     }
1383:     sctx.shift_top *= 1.1;
1384:     sctx.nshift_max = 5;
1385:     sctx.shift_lo   = 0.;
1386:     sctx.shift_hi   = 1.;
1387:   }

1389:   /* allocate working arrays
1390:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1391:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1392:   */
1393:   do {
1394:     sctx.newshift = PETSC_FALSE;

1396:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1397:     if (mbs) il[0] = 0;

1399:     for (k = 0; k < mbs; k++) {
1400:       /* zero rtmp */
1401:       nz    = bi[k + 1] - bi[k];
1402:       bjtmp = bj + bi[k];
1403:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1405:       /* load in initial unfactored row */
1406:       bval = ba + bi[k];
1407:       jmin = ai[k];
1408:       jmax = ai[k + 1];
1409:       for (j = jmin; j < jmax; j++) {
1410:         col       = aj[j];
1411:         rtmp[col] = aa[j];
1412:         *bval++   = 0.0; /* for in-place factorization */
1413:       }
1414:       /* shift the diagonal of the matrix: ZeropivotApply() */
1415:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

1417:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1418:       dk = rtmp[k];
1419:       i  = c2r[k]; /* first row to be added to k_th row  */

1421:       while (i < k) {
1422:         nexti = c2r[i]; /* next row to be added to k_th row */

1424:         /* compute multiplier, update diag(k) and U(i,k) */
1425:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1426:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1427:         dk += uikdi * ba[ili];           /* update diag[k] */
1428:         ba[ili] = uikdi;                 /* -U(i,k) */

1430:         /* add multiple of row i to k-th row */
1431:         jmin = ili + 1;
1432:         jmax = bi[i + 1];
1433:         if (jmin < jmax) {
1434:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1435:           /* update il and c2r for row i */
1436:           il[i]  = jmin;
1437:           j      = bj[jmin];
1438:           c2r[i] = c2r[j];
1439:           c2r[j] = i;
1440:         }
1441:         i = nexti;
1442:       }

1444:       /* copy data into U(k,:) */
1445:       rs   = 0.0;
1446:       jmin = bi[k];
1447:       jmax = bi[k + 1] - 1;
1448:       if (jmin < jmax) {
1449:         for (j = jmin; j < jmax; j++) {
1450:           col   = bj[j];
1451:           ba[j] = rtmp[col];
1452:           rs += PetscAbsScalar(ba[j]);
1453:         }
1454:         /* add the k-th row into il and c2r */
1455:         il[k]  = jmin;
1456:         i      = bj[jmin];
1457:         c2r[k] = c2r[i];
1458:         c2r[i] = k;
1459:       }

1461:       sctx.rs = rs;
1462:       sctx.pv = dk;
1463:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1464:       if (sctx.newshift) break;
1465:       dk = sctx.pv;

1467:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1468:     }
1469:   } while (sctx.newshift);

1471:   PetscCall(PetscFree3(rtmp, il, c2r));

1473:   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1474:   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1475:   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1476:   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1477:   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1478:   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1480:   B->assembled    = PETSC_TRUE;
1481:   B->preallocated = PETSC_TRUE;

1483:   PetscCall(PetscLogFlops(B->rmap->n));

1485:   /* MatPivotView() */
1486:   if (sctx.nshift) {
1487:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1488:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1489:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1490:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1491:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1492:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1493:     }
1494:   }
1495:   PetscFunctionReturn(PETSC_SUCCESS);
1496: }

1498: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1499: {
1500:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1501:   PetscInt       i, j, mbs = a->mbs;
1502:   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1503:   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1504:   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1505:   PetscReal      rs;
1506:   FactorShiftCtx sctx;

1508:   PetscFunctionBegin;
1509:   /* MatPivotSetUp(): initialize shift context sctx */
1510:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1512:   /* initialization */
1513:   /* il and jl record the first nonzero element in each row of the accessing
1514:      window U(0:k, k:mbs-1).
1515:      jl:    list of rows to be added to uneliminated rows
1516:             i>= k: jl(i) is the first row to be added to row i
1517:             i<  k: jl(i) is the row following row i in some list of rows
1518:             jl(i) = mbs indicates the end of a list
1519:      il(i): points to the first nonzero element in U(i,k:mbs-1)
1520:   */
1521:   PetscCall(PetscMalloc1(mbs, &rtmp));
1522:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));

1524:   do {
1525:     sctx.newshift = PETSC_FALSE;
1526:     il[0]         = 0;
1527:     for (i = 0; i < mbs; i++) {
1528:       rtmp[i] = 0.0;
1529:       jl[i]   = mbs;
1530:     }

1532:     for (k = 0; k < mbs; k++) {
1533:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1534:       nz   = ai[k + 1] - ai[k];
1535:       acol = aj + ai[k];
1536:       aval = aa + ai[k];
1537:       bval = ba + bi[k];
1538:       while (nz--) {
1539:         rtmp[*acol++] = *aval++;
1540:         *bval++       = 0.0; /* for in-place factorization */
1541:       }

1543:       /* shift the diagonal of the matrix */
1544:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1546:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1547:       dk = rtmp[k];
1548:       i  = jl[k]; /* first row to be added to k_th row  */

1550:       while (i < k) {
1551:         nexti = jl[i]; /* next row to be added to k_th row */
1552:         /* compute multiplier, update D(k) and U(i,k) */
1553:         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1554:         uikdi = -ba[ili] * ba[bi[i]];
1555:         dk += uikdi * ba[ili];
1556:         ba[ili] = uikdi; /* -U(i,k) */

1558:         /* add multiple of row i to k-th row ... */
1559:         jmin = ili + 1;
1560:         nz   = bi[i + 1] - jmin;
1561:         if (nz > 0) {
1562:           bcol = bj + jmin;
1563:           bval = ba + jmin;
1564:           PetscCall(PetscLogFlops(2.0 * nz));
1565:           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);

1567:           /* update il and jl for i-th row */
1568:           il[i] = jmin;
1569:           j     = bj[jmin];
1570:           jl[i] = jl[j];
1571:           jl[j] = i;
1572:         }
1573:         i = nexti;
1574:       }

1576:       /* shift the diagonals when zero pivot is detected */
1577:       /* compute rs=sum of abs(off-diagonal) */
1578:       rs   = 0.0;
1579:       jmin = bi[k] + 1;
1580:       nz   = bi[k + 1] - jmin;
1581:       if (nz) {
1582:         bcol = bj + jmin;
1583:         while (nz--) {
1584:           rs += PetscAbsScalar(rtmp[*bcol]);
1585:           bcol++;
1586:         }
1587:       }

1589:       sctx.rs = rs;
1590:       sctx.pv = dk;
1591:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1592:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1593:       dk = sctx.pv;

1595:       /* copy data into U(k,:) */
1596:       ba[bi[k]] = 1.0 / dk;
1597:       jmin      = bi[k] + 1;
1598:       nz        = bi[k + 1] - jmin;
1599:       if (nz) {
1600:         bcol = bj + jmin;
1601:         bval = ba + jmin;
1602:         while (nz--) {
1603:           *bval++       = rtmp[*bcol];
1604:           rtmp[*bcol++] = 0.0;
1605:         }
1606:         /* add k-th row into il and jl */
1607:         il[k] = jmin;
1608:         i     = bj[jmin];
1609:         jl[k] = jl[i];
1610:         jl[i] = k;
1611:       }
1612:     } /* end of for (k = 0; k<mbs; k++) */
1613:   } while (sctx.newshift);
1614:   PetscCall(PetscFree(rtmp));
1615:   PetscCall(PetscFree2(il, jl));

1617:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1618:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1619:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1620:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1623:   C->assembled    = PETSC_TRUE;
1624:   C->preallocated = PETSC_TRUE;

1626:   PetscCall(PetscLogFlops(C->rmap->N));
1627:   if (sctx.nshift) {
1628:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1629:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1630:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1631:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1632:     }
1633:   }
1634:   PetscFunctionReturn(PETSC_SUCCESS);
1635: }

1637: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1638: {
1639:   Mat C;

1641:   PetscFunctionBegin;
1642:   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1643:   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1644:   PetscCall(MatCholeskyFactorNumeric(C, A, info));

1646:   A->ops->solve          = C->ops->solve;
1647:   A->ops->solvetranspose = C->ops->solvetranspose;

1649:   PetscCall(MatHeaderMerge(A, &C));
1650:   PetscFunctionReturn(PETSC_SUCCESS);
1651: }