Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: /*
  7:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

  9:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 10: */
 11: #if 0
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
 15:   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal          best, past = 0, future;

 21:   PetscFunctionBegin;
 22:   /* pick initial row */
 23:   best = -1;
 24:   for (i = 0; i < n; i++) {
 25:     future = 0.0;
 26:     for (j = ai[i]; j < ai[i + 1]; j++) {
 27:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 28:       else past = PetscAbsScalar(aa[j]);
 29:     }
 30:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 31:     if (past / future > best) {
 32:       best    = past / future;
 33:       current = i;
 34:     }
 35:   }

 37:   PetscCall(PetscMalloc1(n, &done));
 38:   PetscCall(PetscArrayzero(done, n));
 39:   PetscCall(PetscMalloc1(n, &order));
 40:   order[0] = current;
 41:   for (i = 0; i < n - 1; i++) {
 42:     done[current] = PETSC_TRUE;
 43:     best          = -1;
 44:     /* loop over all neighbors of current pivot */
 45:     for (j = ai[current]; j < ai[current + 1]; j++) {
 46:       jj = aj[j];
 47:       if (done[jj]) continue;
 48:       /* loop over columns of potential next row computing weights for below and above diagonal */
 49:       past = future = 0.0;
 50:       for (k = ai[jj]; k < ai[jj + 1]; k++) {
 51:         kk = aj[k];
 52:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 53:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 54:       }
 55:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 56:       if (past / future > best) {
 57:         best       = past / future;
 58:         newcurrent = jj;
 59:       }
 60:     }
 61:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 62:       best = -1;
 63:       for (k = 0; k < n; k++) {
 64:         if (done[k]) continue;
 65:         future = 0.0;
 66:         past   = 0.0;
 67:         for (j = ai[k]; j < ai[k + 1]; j++) {
 68:           kk = aj[j];
 69:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 70:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 71:         }
 72:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 73:         if (past / future > best) {
 74:           best       = past / future;
 75:           newcurrent = k;
 76:         }
 77:       }
 78:     }
 79:     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
 80:     current      = newcurrent;
 81:     order[i + 1] = current;
 82:   }
 83:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
 84:   *icol = *irow;
 85:   PetscCall(PetscObjectReference((PetscObject)*irow));
 86:   PetscCall(PetscFree(done));
 87:   PetscCall(PetscFree(order));
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }
 90: #endif

 92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
 93: {
 94:   PetscFunctionBegin;
 95:   *type = MATSOLVERPETSC;
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101:   PetscInt n = A->rmap->n;

103:   PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105:   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY or ICC Factor is not supported");
106: #endif
107:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
108:   PetscCall(MatSetSizes(*B, n, n, n, n));
109:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
110:     PetscCall(MatSetType(*B, MATSEQAIJ));

112:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
113:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

115:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
116:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
117:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
118:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
119:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
120:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
121:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

123:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
124:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
125:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
126:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
127:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
128:   (*B)->factortype = ftype;

130:   PetscCall(PetscFree((*B)->solvertype));
131:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
132:   (*B)->canuseordering = PETSC_TRUE;
133:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
134:   PetscFunctionReturn(PETSC_SUCCESS);
135: }

137: #if 0
138: // currently unused
139: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
140: {
141:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
142:   IS                 isicol;
143:   const PetscInt    *r, *ic;
144:   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
145:   PetscInt          *bi, *bj, *ajtmp;
146:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
147:   PetscReal          f;
148:   PetscInt           nlnk, *lnk, k, **bi_ptr;
149:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
150:   PetscBT            lnkbt;
151:   PetscBool          missing;

153:   PetscFunctionBegin;
154:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
155:   PetscCall(MatMissingDiagonal(A, &missing, &i));
156:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

158:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
159:   PetscCall(ISGetIndices(isrow, &r));
160:   PetscCall(ISGetIndices(isicol, &ic));

162:   /* get new row pointers */
163:   PetscCall(PetscMalloc1(n + 1, &bi));
164:   bi[0] = 0;

166:   /* bdiag is location of diagonal in factor */
167:   PetscCall(PetscMalloc1(n + 1, &bdiag));
168:   bdiag[0] = 0;

170:   /* linked list for storing column indices of the active row */
171:   nlnk = n + 1;
172:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

174:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

176:   /* initial FreeSpace size is f*(ai[n]+1) */
177:   f = info->fill;
178:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
179:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
180:   current_space = free_space;

182:   for (i = 0; i < n; i++) {
183:     /* copy previous fill into linked list */
184:     nzi   = 0;
185:     nnz   = ai[r[i] + 1] - ai[r[i]];
186:     ajtmp = aj + ai[r[i]];
187:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
188:     nzi += nlnk;

190:     /* add pivot rows into linked list */
191:     row = lnk[n];
192:     while (row < i) {
193:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
194:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
195:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
196:       nzi += nlnk;
197:       row = lnk[row];
198:     }
199:     bi[i + 1] = bi[i] + nzi;
200:     im[i]     = nzi;

202:     /* mark bdiag */
203:     nzbd = 0;
204:     nnz  = nzi;
205:     k    = lnk[n];
206:     while (nnz-- && k < i) {
207:       nzbd++;
208:       k = lnk[k];
209:     }
210:     bdiag[i] = bi[i] + nzbd;

212:     /* if free space is not available, make more free space */
213:     if (current_space->local_remaining < nzi) {
214:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
215:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
216:       reallocs++;
217:     }

219:     /* copy data into free space, then initialize lnk */
220:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

222:     bi_ptr[i] = current_space->array;
223:     current_space->array += nzi;
224:     current_space->local_used += nzi;
225:     current_space->local_remaining -= nzi;
226:   }
227:   #if defined(PETSC_USE_INFO)
228:   if (ai[n] != 0) {
229:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
230:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
231:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
232:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
233:     PetscCall(PetscInfo(A, "for best performance.\n"));
234:   } else {
235:     PetscCall(PetscInfo(A, "Empty matrix\n"));
236:   }
237:   #endif

239:   PetscCall(ISRestoreIndices(isrow, &r));
240:   PetscCall(ISRestoreIndices(isicol, &ic));

242:   /* destroy list of free space and other temporary array(s) */
243:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
244:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
245:   PetscCall(PetscLLDestroy(lnk, lnkbt));
246:   PetscCall(PetscFree2(bi_ptr, im));

248:   /* put together the new matrix */
249:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
250:   b = (Mat_SeqAIJ *)(B)->data;

252:   b->free_a       = PETSC_TRUE;
253:   b->free_ij      = PETSC_TRUE;
254:   b->singlemalloc = PETSC_FALSE;

256:   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
257:   b->j    = bj;
258:   b->i    = bi;
259:   b->diag = bdiag;
260:   b->ilen = NULL;
261:   b->imax = NULL;
262:   b->row  = isrow;
263:   b->col  = iscol;
264:   PetscCall(PetscObjectReference((PetscObject)isrow));
265:   PetscCall(PetscObjectReference((PetscObject)iscol));
266:   b->icol = isicol;
267:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

269:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
270:   b->maxnz = b->nz = bi[n];

272:   (B)->factortype            = MAT_FACTOR_LU;
273:   (B)->info.factor_mallocs   = reallocs;
274:   (B)->info.fill_ratio_given = f;

276:   if (ai[n]) {
277:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
278:   } else {
279:     (B)->info.fill_ratio_needed = 0.0;
280:   }
281:   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
282:   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
283:   PetscFunctionReturn(PETSC_SUCCESS);
284: }
285: #endif

287: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
288: {
289:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
290:   IS                 isicol;
291:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
292:   PetscInt           i, n = A->rmap->n;
293:   PetscInt          *bi, *bj;
294:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
295:   PetscReal          f;
296:   PetscInt           nlnk, *lnk, k, **bi_ptr;
297:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
298:   PetscBT            lnkbt;
299:   PetscBool          missing;

301:   PetscFunctionBegin;
302:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
303:   PetscCall(MatMissingDiagonal(A, &missing, &i));
304:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

306:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
307:   PetscCall(ISGetIndices(isrow, &r));
308:   PetscCall(ISGetIndices(isicol, &ic));

310:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311:   PetscCall(PetscMalloc1(n + 1, &bi));
312:   PetscCall(PetscMalloc1(n + 1, &bdiag));
313:   bi[0] = bdiag[0] = 0;

315:   /* linked list for storing column indices of the active row */
316:   nlnk = n + 1;
317:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

319:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

321:   /* initial FreeSpace size is f*(ai[n]+1) */
322:   f = info->fill;
323:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
324:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
325:   current_space = free_space;

327:   for (i = 0; i < n; i++) {
328:     /* copy previous fill into linked list */
329:     nzi   = 0;
330:     nnz   = ai[r[i] + 1] - ai[r[i]];
331:     ajtmp = aj + ai[r[i]];
332:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
333:     nzi += nlnk;

335:     /* add pivot rows into linked list */
336:     row = lnk[n];
337:     while (row < i) {
338:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
339:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
341:       nzi += nlnk;
342:       row = lnk[row];
343:     }
344:     bi[i + 1] = bi[i] + nzi;
345:     im[i]     = nzi;

347:     /* mark bdiag */
348:     nzbd = 0;
349:     nnz  = nzi;
350:     k    = lnk[n];
351:     while (nnz-- && k < i) {
352:       nzbd++;
353:       k = lnk[k];
354:     }
355:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

357:     /* if free space is not available, make more free space */
358:     if (current_space->local_remaining < nzi) {
359:       /* estimated additional space needed */
360:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
361:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
362:       reallocs++;
363:     }

365:     /* copy data into free space, then initialize lnk */
366:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

368:     bi_ptr[i] = current_space->array;
369:     current_space->array += nzi;
370:     current_space->local_used += nzi;
371:     current_space->local_remaining -= nzi;
372:   }

374:   PetscCall(ISRestoreIndices(isrow, &r));
375:   PetscCall(ISRestoreIndices(isicol, &ic));

377:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
378:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
379:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
380:   PetscCall(PetscLLDestroy(lnk, lnkbt));
381:   PetscCall(PetscFree2(bi_ptr, im));

383:   /* put together the new matrix */
384:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
385:   b = (Mat_SeqAIJ *)(B)->data;

387:   b->free_a       = PETSC_TRUE;
388:   b->free_ij      = PETSC_TRUE;
389:   b->singlemalloc = PETSC_FALSE;

391:   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));

393:   b->j    = bj;
394:   b->i    = bi;
395:   b->diag = bdiag;
396:   b->ilen = NULL;
397:   b->imax = NULL;
398:   b->row  = isrow;
399:   b->col  = iscol;
400:   PetscCall(PetscObjectReference((PetscObject)isrow));
401:   PetscCall(PetscObjectReference((PetscObject)iscol));
402:   b->icol = isicol;
403:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

405:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
406:   b->maxnz = b->nz = bdiag[0] + 1;

408:   B->factortype            = MAT_FACTOR_LU;
409:   B->info.factor_mallocs   = reallocs;
410:   B->info.fill_ratio_given = f;

412:   if (ai[n]) {
413:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
414:   } else {
415:     B->info.fill_ratio_needed = 0.0;
416:   }
417: #if defined(PETSC_USE_INFO)
418:   if (ai[n] != 0) {
419:     PetscReal af = B->info.fill_ratio_needed;
420:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
421:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
422:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
423:     PetscCall(PetscInfo(A, "for best performance.\n"));
424:   } else {
425:     PetscCall(PetscInfo(A, "Empty matrix\n"));
426:   }
427: #endif
428:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
429:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
430:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
431:   PetscFunctionReturn(PETSC_SUCCESS);
432: }

434: /*
435:     Trouble in factorization, should we dump the original matrix?
436: */
437: PetscErrorCode MatFactorDumpMatrix(Mat A)
438: {
439:   PetscBool flg = PETSC_FALSE;

441:   PetscFunctionBegin;
442:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
443:   if (flg) {
444:     PetscViewer viewer;
445:     char        filename[PETSC_MAX_PATH_LEN];

447:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
448:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
449:     PetscCall(MatView(A, viewer));
450:     PetscCall(PetscViewerDestroy(&viewer));
451:   }
452:   PetscFunctionReturn(PETSC_SUCCESS);
453: }

455: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
456: {
457:   Mat              C = B;
458:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
459:   IS               isrow = b->row, isicol = b->icol;
460:   const PetscInt  *r, *ic, *ics;
461:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
462:   PetscInt         i, j, k, nz, nzL, row, *pj;
463:   const PetscInt  *ajtmp, *bjtmp;
464:   MatScalar       *rtmp, *pc, multiplier, *pv;
465:   const MatScalar *aa = a->a, *v;
466:   PetscBool        row_identity, col_identity;
467:   FactorShiftCtx   sctx;
468:   const PetscInt  *ddiag;
469:   PetscReal        rs;
470:   MatScalar        d;

472:   PetscFunctionBegin;
473:   /* MatPivotSetUp(): initialize shift context sctx */
474:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

476:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477:     ddiag          = a->diag;
478:     sctx.shift_top = info->zeropivot;
479:     for (i = 0; i < n; i++) {
480:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481:       d  = (aa)[ddiag[i]];
482:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
483:       v  = aa + ai[i];
484:       nz = ai[i + 1] - ai[i];
485:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486:       if (rs > sctx.shift_top) sctx.shift_top = rs;
487:     }
488:     sctx.shift_top *= 1.1;
489:     sctx.nshift_max = 5;
490:     sctx.shift_lo   = 0.;
491:     sctx.shift_hi   = 1.;
492:   }

494:   PetscCall(ISGetIndices(isrow, &r));
495:   PetscCall(ISGetIndices(isicol, &ic));
496:   PetscCall(PetscMalloc1(n + 1, &rtmp));
497:   ics = ic;

499:   do {
500:     sctx.newshift = PETSC_FALSE;
501:     for (i = 0; i < n; i++) {
502:       /* zero rtmp */
503:       /* L part */
504:       nz    = bi[i + 1] - bi[i];
505:       bjtmp = bj + bi[i];
506:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

508:       /* U part */
509:       nz    = bdiag[i] - bdiag[i + 1];
510:       bjtmp = bj + bdiag[i + 1] + 1;
511:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

513:       /* load in initial (unfactored row) */
514:       nz    = ai[r[i] + 1] - ai[r[i]];
515:       ajtmp = aj + ai[r[i]];
516:       v     = aa + ai[r[i]];
517:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518:       /* ZeropivotApply() */
519:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

521:       /* elimination */
522:       bjtmp = bj + bi[i];
523:       row   = *bjtmp++;
524:       nzL   = bi[i + 1] - bi[i];
525:       for (k = 0; k < nzL; k++) {
526:         pc = rtmp + row;
527:         if (*pc != 0.0) {
528:           pv         = b->a + bdiag[row];
529:           multiplier = *pc * (*pv);
530:           *pc        = multiplier;

532:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533:           pv = b->a + bdiag[row + 1] + 1;
534:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

536:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
537:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
538:         }
539:         row = *bjtmp++;
540:       }

542:       /* finished row so stick it into b->a */
543:       rs = 0.0;
544:       /* L part */
545:       pv = b->a + bi[i];
546:       pj = b->j + bi[i];
547:       nz = bi[i + 1] - bi[i];
548:       for (j = 0; j < nz; j++) {
549:         pv[j] = rtmp[pj[j]];
550:         rs += PetscAbsScalar(pv[j]);
551:       }

553:       /* U part */
554:       pv = b->a + bdiag[i + 1] + 1;
555:       pj = b->j + bdiag[i + 1] + 1;
556:       nz = bdiag[i] - bdiag[i + 1] - 1;
557:       for (j = 0; j < nz; j++) {
558:         pv[j] = rtmp[pj[j]];
559:         rs += PetscAbsScalar(pv[j]);
560:       }

562:       sctx.rs = rs;
563:       sctx.pv = rtmp[i];
564:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
565:       if (sctx.newshift) break; /* break for-loop */
566:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

568:       /* Mark diagonal and invert diagonal for simpler triangular solves */
569:       pv  = b->a + bdiag[i];
570:       *pv = 1.0 / rtmp[i];

572:     } /* endof for (i=0; i<n; i++) { */

574:     /* MatPivotRefine() */
575:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576:       /*
577:        * if no shift in this attempt & shifting & started shifting & can refine,
578:        * then try lower shift
579:        */
580:       sctx.shift_hi       = sctx.shift_fraction;
581:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
583:       sctx.newshift       = PETSC_TRUE;
584:       sctx.nshift++;
585:     }
586:   } while (sctx.newshift);

588:   PetscCall(PetscFree(rtmp));
589:   PetscCall(ISRestoreIndices(isicol, &ic));
590:   PetscCall(ISRestoreIndices(isrow, &r));

592:   PetscCall(ISIdentity(isrow, &row_identity));
593:   PetscCall(ISIdentity(isicol, &col_identity));
594:   if (b->inode.size) {
595:     C->ops->solve = MatSolve_SeqAIJ_Inode;
596:   } else if (row_identity && col_identity) {
597:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
598:   } else {
599:     C->ops->solve = MatSolve_SeqAIJ;
600:   }
601:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
602:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
603:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
604:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
605:   C->assembled              = PETSC_TRUE;
606:   C->preallocated           = PETSC_TRUE;

608:   PetscCall(PetscLogFlops(C->cmap->n));

610:   /* MatShiftView(A,info,&sctx) */
611:   if (sctx.nshift) {
612:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
614:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
616:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
617:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
618:     }
619:   }
620:   PetscFunctionReturn(PETSC_SUCCESS);
621: }

623: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
624: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
625: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

627: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
628: {
629:   Mat              C = B;
630:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
631:   IS               isrow = b->row, isicol = b->icol;
632:   const PetscInt  *r, *ic, *ics;
633:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
634:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
635:   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
636:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
637:   const MatScalar *v, *aa = a->a;
638:   PetscReal        rs = 0.0;
639:   FactorShiftCtx   sctx;
640:   const PetscInt  *ddiag;
641:   PetscBool        row_identity, col_identity;

643:   PetscFunctionBegin;
644:   /* MatPivotSetUp(): initialize shift context sctx */
645:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

647:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648:     ddiag          = a->diag;
649:     sctx.shift_top = info->zeropivot;
650:     for (i = 0; i < n; i++) {
651:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652:       d  = (aa)[ddiag[i]];
653:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654:       v  = aa + ai[i];
655:       nz = ai[i + 1] - ai[i];
656:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
657:       if (rs > sctx.shift_top) sctx.shift_top = rs;
658:     }
659:     sctx.shift_top *= 1.1;
660:     sctx.nshift_max = 5;
661:     sctx.shift_lo   = 0.;
662:     sctx.shift_hi   = 1.;
663:   }

665:   PetscCall(ISGetIndices(isrow, &r));
666:   PetscCall(ISGetIndices(isicol, &ic));
667:   PetscCall(PetscMalloc1(n + 1, &rtmp));
668:   ics = ic;

670:   do {
671:     sctx.newshift = PETSC_FALSE;
672:     for (i = 0; i < n; i++) {
673:       nz    = bi[i + 1] - bi[i];
674:       bjtmp = bj + bi[i];
675:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

677:       /* load in initial (unfactored row) */
678:       nz    = ai[r[i] + 1] - ai[r[i]];
679:       ajtmp = aj + ai[r[i]];
680:       v     = aa + ai[r[i]];
681:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
682:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

684:       row = *bjtmp++;
685:       while (row < i) {
686:         pc = rtmp + row;
687:         if (*pc != 0.0) {
688:           pv         = b->a + diag_offset[row];
689:           pj         = b->j + diag_offset[row] + 1;
690:           multiplier = *pc / *pv++;
691:           *pc        = multiplier;
692:           nz         = bi[row + 1] - diag_offset[row] - 1;
693:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
694:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
695:         }
696:         row = *bjtmp++;
697:       }
698:       /* finished row so stick it into b->a */
699:       pv   = b->a + bi[i];
700:       pj   = b->j + bi[i];
701:       nz   = bi[i + 1] - bi[i];
702:       diag = diag_offset[i] - bi[i];
703:       rs   = 0.0;
704:       for (j = 0; j < nz; j++) {
705:         pv[j] = rtmp[pj[j]];
706:         rs += PetscAbsScalar(pv[j]);
707:       }
708:       rs -= PetscAbsScalar(pv[diag]);

710:       sctx.rs = rs;
711:       sctx.pv = pv[diag];
712:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
713:       if (sctx.newshift) break;
714:       pv[diag] = sctx.pv;
715:     }

717:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
718:       /*
719:        * if no shift in this attempt & shifting & started shifting & can refine,
720:        * then try lower shift
721:        */
722:       sctx.shift_hi       = sctx.shift_fraction;
723:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
724:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
725:       sctx.newshift       = PETSC_TRUE;
726:       sctx.nshift++;
727:     }
728:   } while (sctx.newshift);

730:   /* invert diagonal entries for simpler triangular solves */
731:   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
732:   PetscCall(PetscFree(rtmp));
733:   PetscCall(ISRestoreIndices(isicol, &ic));
734:   PetscCall(ISRestoreIndices(isrow, &r));

736:   PetscCall(ISIdentity(isrow, &row_identity));
737:   PetscCall(ISIdentity(isicol, &col_identity));
738:   if (row_identity && col_identity) {
739:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
740:   } else {
741:     C->ops->solve = MatSolve_SeqAIJ_inplace;
742:   }
743:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
744:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
745:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
746:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;

748:   C->assembled    = PETSC_TRUE;
749:   C->preallocated = PETSC_TRUE;

751:   PetscCall(PetscLogFlops(C->cmap->n));
752:   if (sctx.nshift) {
753:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
754:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
755:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
756:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
757:     }
758:   }
759:   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
760:   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

762:   PetscCall(MatSeqAIJCheckInode(C));
763:   PetscFunctionReturn(PETSC_SUCCESS);
764: }

766: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

768: /*
769:    This routine implements inplace ILU(0) with row or/and column permutations.
770:    Input:
771:      A - original matrix
772:    Output;
773:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774:          a->j (col index) is permuted by the inverse of colperm, then sorted
775:          a->a reordered accordingly with a->j
776:          a->diag (ptr to diagonal elements) is updated.
777: */
778: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
779: {
780:   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
781:   IS               isrow = a->row, isicol = a->icol;
782:   const PetscInt  *r, *ic, *ics;
783:   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
784:   PetscInt        *ajtmp, nz, row;
785:   PetscInt        *diag = a->diag, nbdiag, *pj;
786:   PetscScalar     *rtmp, *pc, multiplier, d;
787:   MatScalar       *pv, *v;
788:   PetscReal        rs;
789:   FactorShiftCtx   sctx;
790:   const MatScalar *aa = a->a, *vtmp;

792:   PetscFunctionBegin;
793:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

795:   /* MatPivotSetUp(): initialize shift context sctx */
796:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

798:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
799:     const PetscInt *ddiag = a->diag;
800:     sctx.shift_top        = info->zeropivot;
801:     for (i = 0; i < n; i++) {
802:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
803:       d    = (aa)[ddiag[i]];
804:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
805:       vtmp = aa + ai[i];
806:       nz   = ai[i + 1] - ai[i];
807:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
808:       if (rs > sctx.shift_top) sctx.shift_top = rs;
809:     }
810:     sctx.shift_top *= 1.1;
811:     sctx.nshift_max = 5;
812:     sctx.shift_lo   = 0.;
813:     sctx.shift_hi   = 1.;
814:   }

816:   PetscCall(ISGetIndices(isrow, &r));
817:   PetscCall(ISGetIndices(isicol, &ic));
818:   PetscCall(PetscMalloc1(n + 1, &rtmp));
819:   PetscCall(PetscArrayzero(rtmp, n + 1));
820:   ics = ic;

822: #if defined(MV)
823:   sctx.shift_top      = 0.;
824:   sctx.nshift_max     = 0;
825:   sctx.shift_lo       = 0.;
826:   sctx.shift_hi       = 0.;
827:   sctx.shift_fraction = 0.;

829:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
830:     sctx.shift_top = 0.;
831:     for (i = 0; i < n; i++) {
832:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
833:       d  = (a->a)[diag[i]];
834:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
835:       v  = a->a + ai[i];
836:       nz = ai[i + 1] - ai[i];
837:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
838:       if (rs > sctx.shift_top) sctx.shift_top = rs;
839:     }
840:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
841:     sctx.shift_top *= 1.1;
842:     sctx.nshift_max = 5;
843:     sctx.shift_lo   = 0.;
844:     sctx.shift_hi   = 1.;
845:   }

847:   sctx.shift_amount = 0.;
848:   sctx.nshift       = 0;
849: #endif

851:   do {
852:     sctx.newshift = PETSC_FALSE;
853:     for (i = 0; i < n; i++) {
854:       /* load in initial unfactored row */
855:       nz    = ai[r[i] + 1] - ai[r[i]];
856:       ajtmp = aj + ai[r[i]];
857:       v     = a->a + ai[r[i]];
858:       /* sort permuted ajtmp and values v accordingly */
859:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
860:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

862:       diag[r[i]] = ai[r[i]];
863:       for (j = 0; j < nz; j++) {
864:         rtmp[ajtmp[j]] = v[j];
865:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
866:       }
867:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

869:       row = *ajtmp++;
870:       while (row < i) {
871:         pc = rtmp + row;
872:         if (*pc != 0.0) {
873:           pv = a->a + diag[r[row]];
874:           pj = aj + diag[r[row]] + 1;

876:           multiplier = *pc / *pv++;
877:           *pc        = multiplier;
878:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
879:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
880:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
881:         }
882:         row = *ajtmp++;
883:       }
884:       /* finished row so overwrite it onto a->a */
885:       pv     = a->a + ai[r[i]];
886:       pj     = aj + ai[r[i]];
887:       nz     = ai[r[i] + 1] - ai[r[i]];
888:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

890:       rs = 0.0;
891:       for (j = 0; j < nz; j++) {
892:         pv[j] = rtmp[pj[j]];
893:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
894:       }

896:       sctx.rs = rs;
897:       sctx.pv = pv[nbdiag];
898:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
899:       if (sctx.newshift) break;
900:       pv[nbdiag] = sctx.pv;
901:     }

903:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
904:       /*
905:        * if no shift in this attempt & shifting & started shifting & can refine,
906:        * then try lower shift
907:        */
908:       sctx.shift_hi       = sctx.shift_fraction;
909:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
910:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
911:       sctx.newshift       = PETSC_TRUE;
912:       sctx.nshift++;
913:     }
914:   } while (sctx.newshift);

916:   /* invert diagonal entries for simpler triangular solves */
917:   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];

919:   PetscCall(PetscFree(rtmp));
920:   PetscCall(ISRestoreIndices(isicol, &ic));
921:   PetscCall(ISRestoreIndices(isrow, &r));

923:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
924:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
925:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
926:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

928:   A->assembled    = PETSC_TRUE;
929:   A->preallocated = PETSC_TRUE;

931:   PetscCall(PetscLogFlops(A->cmap->n));
932:   if (sctx.nshift) {
933:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
934:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
935:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
936:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
937:     }
938:   }
939:   PetscFunctionReturn(PETSC_SUCCESS);
940: }

942: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
943: {
944:   Mat C;

946:   PetscFunctionBegin;
947:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
948:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
949:   PetscCall(MatLUFactorNumeric(C, A, info));

951:   A->ops->solve          = C->ops->solve;
952:   A->ops->solvetranspose = C->ops->solvetranspose;

954:   PetscCall(MatHeaderMerge(A, &C));
955:   PetscFunctionReturn(PETSC_SUCCESS);
956: }

958: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
959: {
960:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
961:   IS                 iscol = a->col, isrow = a->row;
962:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
963:   PetscInt           nz;
964:   const PetscInt    *rout, *cout, *r, *c;
965:   PetscScalar       *x, *tmp, *tmps, sum;
966:   const PetscScalar *b;
967:   const MatScalar   *aa = a->a, *v;

969:   PetscFunctionBegin;
970:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

972:   PetscCall(VecGetArrayRead(bb, &b));
973:   PetscCall(VecGetArrayWrite(xx, &x));
974:   tmp = a->solve_work;

976:   PetscCall(ISGetIndices(isrow, &rout));
977:   r = rout;
978:   PetscCall(ISGetIndices(iscol, &cout));
979:   c = cout + (n - 1);

981:   /* forward solve the lower triangular */
982:   tmp[0] = b[*r++];
983:   tmps   = tmp;
984:   for (i = 1; i < n; i++) {
985:     v   = aa + ai[i];
986:     vi  = aj + ai[i];
987:     nz  = a->diag[i] - ai[i];
988:     sum = b[*r++];
989:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
990:     tmp[i] = sum;
991:   }

993:   /* backward solve the upper triangular */
994:   for (i = n - 1; i >= 0; i--) {
995:     v   = aa + a->diag[i] + 1;
996:     vi  = aj + a->diag[i] + 1;
997:     nz  = ai[i + 1] - a->diag[i] - 1;
998:     sum = tmp[i];
999:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1000:     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1001:   }

1003:   PetscCall(ISRestoreIndices(isrow, &rout));
1004:   PetscCall(ISRestoreIndices(iscol, &cout));
1005:   PetscCall(VecRestoreArrayRead(bb, &b));
1006:   PetscCall(VecRestoreArrayWrite(xx, &x));
1007:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1008:   PetscFunctionReturn(PETSC_SUCCESS);
1009: }

1011: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1012: {
1013:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1014:   IS                 iscol = a->col, isrow = a->row;
1015:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1016:   PetscInt           nz, neq, ldb, ldx;
1017:   const PetscInt    *rout, *cout, *r, *c;
1018:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1019:   const PetscScalar *b, *aa  = a->a, *v;
1020:   PetscBool          isdense;

1022:   PetscFunctionBegin;
1023:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1024:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1025:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1026:   if (X != B) {
1027:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1028:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1029:   }
1030:   PetscCall(MatDenseGetArrayRead(B, &b));
1031:   PetscCall(MatDenseGetLDA(B, &ldb));
1032:   PetscCall(MatDenseGetArray(X, &x));
1033:   PetscCall(MatDenseGetLDA(X, &ldx));
1034:   PetscCall(ISGetIndices(isrow, &rout));
1035:   r = rout;
1036:   PetscCall(ISGetIndices(iscol, &cout));
1037:   c = cout;
1038:   for (neq = 0; neq < B->cmap->n; neq++) {
1039:     /* forward solve the lower triangular */
1040:     tmp[0] = b[r[0]];
1041:     tmps   = tmp;
1042:     for (i = 1; i < n; i++) {
1043:       v   = aa + ai[i];
1044:       vi  = aj + ai[i];
1045:       nz  = a->diag[i] - ai[i];
1046:       sum = b[r[i]];
1047:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1048:       tmp[i] = sum;
1049:     }
1050:     /* backward solve the upper triangular */
1051:     for (i = n - 1; i >= 0; i--) {
1052:       v   = aa + a->diag[i] + 1;
1053:       vi  = aj + a->diag[i] + 1;
1054:       nz  = ai[i + 1] - a->diag[i] - 1;
1055:       sum = tmp[i];
1056:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1057:       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1058:     }
1059:     b += ldb;
1060:     x += ldx;
1061:   }
1062:   PetscCall(ISRestoreIndices(isrow, &rout));
1063:   PetscCall(ISRestoreIndices(iscol, &cout));
1064:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1065:   PetscCall(MatDenseRestoreArray(X, &x));
1066:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1067:   PetscFunctionReturn(PETSC_SUCCESS);
1068: }

1070: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1071: {
1072:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1073:   IS                 iscol = a->col, isrow = a->row;
1074:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1075:   PetscInt           nz, neq, ldb, ldx;
1076:   const PetscInt    *rout, *cout, *r, *c;
1077:   PetscScalar       *x, *tmp = a->solve_work, sum;
1078:   const PetscScalar *b, *aa  = a->a, *v;
1079:   PetscBool          isdense;

1081:   PetscFunctionBegin;
1082:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1083:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1084:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1085:   if (X != B) {
1086:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1087:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1088:   }
1089:   PetscCall(MatDenseGetArrayRead(B, &b));
1090:   PetscCall(MatDenseGetLDA(B, &ldb));
1091:   PetscCall(MatDenseGetArray(X, &x));
1092:   PetscCall(MatDenseGetLDA(X, &ldx));
1093:   PetscCall(ISGetIndices(isrow, &rout));
1094:   r = rout;
1095:   PetscCall(ISGetIndices(iscol, &cout));
1096:   c = cout;
1097:   for (neq = 0; neq < B->cmap->n; neq++) {
1098:     /* forward solve the lower triangular */
1099:     tmp[0] = b[r[0]];
1100:     v      = aa;
1101:     vi     = aj;
1102:     for (i = 1; i < n; i++) {
1103:       nz  = ai[i + 1] - ai[i];
1104:       sum = b[r[i]];
1105:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1106:       tmp[i] = sum;
1107:       v += nz;
1108:       vi += nz;
1109:     }
1110:     /* backward solve the upper triangular */
1111:     for (i = n - 1; i >= 0; i--) {
1112:       v   = aa + adiag[i + 1] + 1;
1113:       vi  = aj + adiag[i + 1] + 1;
1114:       nz  = adiag[i] - adiag[i + 1] - 1;
1115:       sum = tmp[i];
1116:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1117:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1118:     }
1119:     b += ldb;
1120:     x += ldx;
1121:   }
1122:   PetscCall(ISRestoreIndices(isrow, &rout));
1123:   PetscCall(ISRestoreIndices(iscol, &cout));
1124:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1125:   PetscCall(MatDenseRestoreArray(X, &x));
1126:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1127:   PetscFunctionReturn(PETSC_SUCCESS);
1128: }

1130: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1131: {
1132:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1133:   IS                 iscol = a->col, isrow = a->row;
1134:   const PetscInt    *r, *c, *rout, *cout;
1135:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1136:   PetscInt           nz, row;
1137:   PetscScalar       *x, *tmp, *tmps, sum;
1138:   const PetscScalar *b;
1139:   const MatScalar   *aa = a->a, *v;

1141:   PetscFunctionBegin;
1142:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1144:   PetscCall(VecGetArrayRead(bb, &b));
1145:   PetscCall(VecGetArrayWrite(xx, &x));
1146:   tmp = a->solve_work;

1148:   PetscCall(ISGetIndices(isrow, &rout));
1149:   r = rout;
1150:   PetscCall(ISGetIndices(iscol, &cout));
1151:   c = cout + (n - 1);

1153:   /* forward solve the lower triangular */
1154:   tmp[0] = b[*r++];
1155:   tmps   = tmp;
1156:   for (row = 1; row < n; row++) {
1157:     i   = rout[row]; /* permuted row */
1158:     v   = aa + ai[i];
1159:     vi  = aj + ai[i];
1160:     nz  = a->diag[i] - ai[i];
1161:     sum = b[*r++];
1162:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1163:     tmp[row] = sum;
1164:   }

1166:   /* backward solve the upper triangular */
1167:   for (row = n - 1; row >= 0; row--) {
1168:     i   = rout[row]; /* permuted row */
1169:     v   = aa + a->diag[i] + 1;
1170:     vi  = aj + a->diag[i] + 1;
1171:     nz  = ai[i + 1] - a->diag[i] - 1;
1172:     sum = tmp[row];
1173:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1174:     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1175:   }

1177:   PetscCall(ISRestoreIndices(isrow, &rout));
1178:   PetscCall(ISRestoreIndices(iscol, &cout));
1179:   PetscCall(VecRestoreArrayRead(bb, &b));
1180:   PetscCall(VecRestoreArrayWrite(xx, &x));
1181:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1182:   PetscFunctionReturn(PETSC_SUCCESS);
1183: }

1185: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1186: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1187: {
1188:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1189:   PetscInt           n  = A->rmap->n;
1190:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1191:   PetscScalar       *x;
1192:   const PetscScalar *b;
1193:   const MatScalar   *aa = a->a;
1194: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1195:   PetscInt         adiag_i, i, nz, ai_i;
1196:   const PetscInt  *vi;
1197:   const MatScalar *v;
1198:   PetscScalar      sum;
1199: #endif

1201:   PetscFunctionBegin;
1202:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1204:   PetscCall(VecGetArrayRead(bb, &b));
1205:   PetscCall(VecGetArrayWrite(xx, &x));

1207: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1208:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1209: #else
1210:   /* forward solve the lower triangular */
1211:   x[0] = b[0];
1212:   for (i = 1; i < n; i++) {
1213:     ai_i = ai[i];
1214:     v    = aa + ai_i;
1215:     vi   = aj + ai_i;
1216:     nz   = adiag[i] - ai_i;
1217:     sum  = b[i];
1218:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1219:     x[i] = sum;
1220:   }

1222:   /* backward solve the upper triangular */
1223:   for (i = n - 1; i >= 0; i--) {
1224:     adiag_i = adiag[i];
1225:     v       = aa + adiag_i + 1;
1226:     vi      = aj + adiag_i + 1;
1227:     nz      = ai[i + 1] - adiag_i - 1;
1228:     sum     = x[i];
1229:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1230:     x[i] = sum * aa[adiag_i];
1231:   }
1232: #endif
1233:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1234:   PetscCall(VecRestoreArrayRead(bb, &b));
1235:   PetscCall(VecRestoreArrayWrite(xx, &x));
1236:   PetscFunctionReturn(PETSC_SUCCESS);
1237: }

1239: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1240: {
1241:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1242:   IS                 iscol = a->col, isrow = a->row;
1243:   PetscInt           i, n                  = A->rmap->n, j;
1244:   PetscInt           nz;
1245:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1246:   PetscScalar       *x, *tmp, sum;
1247:   const PetscScalar *b;
1248:   const MatScalar   *aa = a->a, *v;

1250:   PetscFunctionBegin;
1251:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1253:   PetscCall(VecGetArrayRead(bb, &b));
1254:   PetscCall(VecGetArray(xx, &x));
1255:   tmp = a->solve_work;

1257:   PetscCall(ISGetIndices(isrow, &rout));
1258:   r = rout;
1259:   PetscCall(ISGetIndices(iscol, &cout));
1260:   c = cout + (n - 1);

1262:   /* forward solve the lower triangular */
1263:   tmp[0] = b[*r++];
1264:   for (i = 1; i < n; i++) {
1265:     v   = aa + ai[i];
1266:     vi  = aj + ai[i];
1267:     nz  = a->diag[i] - ai[i];
1268:     sum = b[*r++];
1269:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1270:     tmp[i] = sum;
1271:   }

1273:   /* backward solve the upper triangular */
1274:   for (i = n - 1; i >= 0; i--) {
1275:     v   = aa + a->diag[i] + 1;
1276:     vi  = aj + a->diag[i] + 1;
1277:     nz  = ai[i + 1] - a->diag[i] - 1;
1278:     sum = tmp[i];
1279:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1280:     tmp[i] = sum * aa[a->diag[i]];
1281:     x[*c--] += tmp[i];
1282:   }

1284:   PetscCall(ISRestoreIndices(isrow, &rout));
1285:   PetscCall(ISRestoreIndices(iscol, &cout));
1286:   PetscCall(VecRestoreArrayRead(bb, &b));
1287:   PetscCall(VecRestoreArray(xx, &x));
1288:   PetscCall(PetscLogFlops(2.0 * a->nz));
1289:   PetscFunctionReturn(PETSC_SUCCESS);
1290: }

1292: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1293: {
1294:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1295:   IS                 iscol = a->col, isrow = a->row;
1296:   PetscInt           i, n                  = A->rmap->n, j;
1297:   PetscInt           nz;
1298:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1299:   PetscScalar       *x, *tmp, sum;
1300:   const PetscScalar *b;
1301:   const MatScalar   *aa = a->a, *v;

1303:   PetscFunctionBegin;
1304:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1306:   PetscCall(VecGetArrayRead(bb, &b));
1307:   PetscCall(VecGetArray(xx, &x));
1308:   tmp = a->solve_work;

1310:   PetscCall(ISGetIndices(isrow, &rout));
1311:   r = rout;
1312:   PetscCall(ISGetIndices(iscol, &cout));
1313:   c = cout;

1315:   /* forward solve the lower triangular */
1316:   tmp[0] = b[r[0]];
1317:   v      = aa;
1318:   vi     = aj;
1319:   for (i = 1; i < n; i++) {
1320:     nz  = ai[i + 1] - ai[i];
1321:     sum = b[r[i]];
1322:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1323:     tmp[i] = sum;
1324:     v += nz;
1325:     vi += nz;
1326:   }

1328:   /* backward solve the upper triangular */
1329:   v  = aa + adiag[n - 1];
1330:   vi = aj + adiag[n - 1];
1331:   for (i = n - 1; i >= 0; i--) {
1332:     nz  = adiag[i] - adiag[i + 1] - 1;
1333:     sum = tmp[i];
1334:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1335:     tmp[i] = sum * v[nz];
1336:     x[c[i]] += tmp[i];
1337:     v += nz + 1;
1338:     vi += nz + 1;
1339:   }

1341:   PetscCall(ISRestoreIndices(isrow, &rout));
1342:   PetscCall(ISRestoreIndices(iscol, &cout));
1343:   PetscCall(VecRestoreArrayRead(bb, &b));
1344:   PetscCall(VecRestoreArray(xx, &x));
1345:   PetscCall(PetscLogFlops(2.0 * a->nz));
1346:   PetscFunctionReturn(PETSC_SUCCESS);
1347: }

1349: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1350: {
1351:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1352:   IS                 iscol = a->col, isrow = a->row;
1353:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1354:   PetscInt           i, n = A->rmap->n, j;
1355:   PetscInt           nz;
1356:   PetscScalar       *x, *tmp, s1;
1357:   const MatScalar   *aa = a->a, *v;
1358:   const PetscScalar *b;

1360:   PetscFunctionBegin;
1361:   PetscCall(VecGetArrayRead(bb, &b));
1362:   PetscCall(VecGetArrayWrite(xx, &x));
1363:   tmp = a->solve_work;

1365:   PetscCall(ISGetIndices(isrow, &rout));
1366:   r = rout;
1367:   PetscCall(ISGetIndices(iscol, &cout));
1368:   c = cout;

1370:   /* copy the b into temp work space according to permutation */
1371:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1373:   /* forward solve the U^T */
1374:   for (i = 0; i < n; i++) {
1375:     v  = aa + diag[i];
1376:     vi = aj + diag[i] + 1;
1377:     nz = ai[i + 1] - diag[i] - 1;
1378:     s1 = tmp[i];
1379:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1380:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1381:     tmp[i] = s1;
1382:   }

1384:   /* backward solve the L^T */
1385:   for (i = n - 1; i >= 0; i--) {
1386:     v  = aa + diag[i] - 1;
1387:     vi = aj + diag[i] - 1;
1388:     nz = diag[i] - ai[i];
1389:     s1 = tmp[i];
1390:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1391:   }

1393:   /* copy tmp into x according to permutation */
1394:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1396:   PetscCall(ISRestoreIndices(isrow, &rout));
1397:   PetscCall(ISRestoreIndices(iscol, &cout));
1398:   PetscCall(VecRestoreArrayRead(bb, &b));
1399:   PetscCall(VecRestoreArrayWrite(xx, &x));

1401:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1402:   PetscFunctionReturn(PETSC_SUCCESS);
1403: }

1405: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1406: {
1407:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1408:   IS                 iscol = a->col, isrow = a->row;
1409:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1410:   PetscInt           i, n = A->rmap->n, j;
1411:   PetscInt           nz;
1412:   PetscScalar       *x, *tmp, s1;
1413:   const MatScalar   *aa = a->a, *v;
1414:   const PetscScalar *b;

1416:   PetscFunctionBegin;
1417:   PetscCall(VecGetArrayRead(bb, &b));
1418:   PetscCall(VecGetArrayWrite(xx, &x));
1419:   tmp = a->solve_work;

1421:   PetscCall(ISGetIndices(isrow, &rout));
1422:   r = rout;
1423:   PetscCall(ISGetIndices(iscol, &cout));
1424:   c = cout;

1426:   /* copy the b into temp work space according to permutation */
1427:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1429:   /* forward solve the U^T */
1430:   for (i = 0; i < n; i++) {
1431:     v  = aa + adiag[i + 1] + 1;
1432:     vi = aj + adiag[i + 1] + 1;
1433:     nz = adiag[i] - adiag[i + 1] - 1;
1434:     s1 = tmp[i];
1435:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1436:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1437:     tmp[i] = s1;
1438:   }

1440:   /* backward solve the L^T */
1441:   for (i = n - 1; i >= 0; i--) {
1442:     v  = aa + ai[i];
1443:     vi = aj + ai[i];
1444:     nz = ai[i + 1] - ai[i];
1445:     s1 = tmp[i];
1446:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1447:   }

1449:   /* copy tmp into x according to permutation */
1450:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1452:   PetscCall(ISRestoreIndices(isrow, &rout));
1453:   PetscCall(ISRestoreIndices(iscol, &cout));
1454:   PetscCall(VecRestoreArrayRead(bb, &b));
1455:   PetscCall(VecRestoreArrayWrite(xx, &x));

1457:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1458:   PetscFunctionReturn(PETSC_SUCCESS);
1459: }

1461: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1462: {
1463:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1464:   IS                 iscol = a->col, isrow = a->row;
1465:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1466:   PetscInt           i, n = A->rmap->n, j;
1467:   PetscInt           nz;
1468:   PetscScalar       *x, *tmp, s1;
1469:   const MatScalar   *aa = a->a, *v;
1470:   const PetscScalar *b;

1472:   PetscFunctionBegin;
1473:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1474:   PetscCall(VecGetArrayRead(bb, &b));
1475:   PetscCall(VecGetArray(xx, &x));
1476:   tmp = a->solve_work;

1478:   PetscCall(ISGetIndices(isrow, &rout));
1479:   r = rout;
1480:   PetscCall(ISGetIndices(iscol, &cout));
1481:   c = cout;

1483:   /* copy the b into temp work space according to permutation */
1484:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1486:   /* forward solve the U^T */
1487:   for (i = 0; i < n; i++) {
1488:     v  = aa + diag[i];
1489:     vi = aj + diag[i] + 1;
1490:     nz = ai[i + 1] - diag[i] - 1;
1491:     s1 = tmp[i];
1492:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1493:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1494:     tmp[i] = s1;
1495:   }

1497:   /* backward solve the L^T */
1498:   for (i = n - 1; i >= 0; i--) {
1499:     v  = aa + diag[i] - 1;
1500:     vi = aj + diag[i] - 1;
1501:     nz = diag[i] - ai[i];
1502:     s1 = tmp[i];
1503:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1504:   }

1506:   /* copy tmp into x according to permutation */
1507:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1509:   PetscCall(ISRestoreIndices(isrow, &rout));
1510:   PetscCall(ISRestoreIndices(iscol, &cout));
1511:   PetscCall(VecRestoreArrayRead(bb, &b));
1512:   PetscCall(VecRestoreArray(xx, &x));

1514:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1515:   PetscFunctionReturn(PETSC_SUCCESS);
1516: }

1518: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1519: {
1520:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1521:   IS                 iscol = a->col, isrow = a->row;
1522:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1523:   PetscInt           i, n = A->rmap->n, j;
1524:   PetscInt           nz;
1525:   PetscScalar       *x, *tmp, s1;
1526:   const MatScalar   *aa = a->a, *v;
1527:   const PetscScalar *b;

1529:   PetscFunctionBegin;
1530:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1531:   PetscCall(VecGetArrayRead(bb, &b));
1532:   PetscCall(VecGetArray(xx, &x));
1533:   tmp = a->solve_work;

1535:   PetscCall(ISGetIndices(isrow, &rout));
1536:   r = rout;
1537:   PetscCall(ISGetIndices(iscol, &cout));
1538:   c = cout;

1540:   /* copy the b into temp work space according to permutation */
1541:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1543:   /* forward solve the U^T */
1544:   for (i = 0; i < n; i++) {
1545:     v  = aa + adiag[i + 1] + 1;
1546:     vi = aj + adiag[i + 1] + 1;
1547:     nz = adiag[i] - adiag[i + 1] - 1;
1548:     s1 = tmp[i];
1549:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1550:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1551:     tmp[i] = s1;
1552:   }

1554:   /* backward solve the L^T */
1555:   for (i = n - 1; i >= 0; i--) {
1556:     v  = aa + ai[i];
1557:     vi = aj + ai[i];
1558:     nz = ai[i + 1] - ai[i];
1559:     s1 = tmp[i];
1560:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1561:   }

1563:   /* copy tmp into x according to permutation */
1564:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1566:   PetscCall(ISRestoreIndices(isrow, &rout));
1567:   PetscCall(ISRestoreIndices(iscol, &cout));
1568:   PetscCall(VecRestoreArrayRead(bb, &b));
1569:   PetscCall(VecRestoreArray(xx, &x));

1571:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1572:   PetscFunctionReturn(PETSC_SUCCESS);
1573: }

1575: /*
1576:    ilu() under revised new data structure.
1577:    Factored arrays bj and ba are stored as
1578:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1580:    bi=fact->i is an array of size n+1, in which
1581:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1582:      bi[n]:  points to L(n-1,n-1)+1

1584:   bdiag=fact->diag is an array of size n+1,in which
1585:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1586:      bdiag[n]: points to entry of U(n-1,0)-1

1588:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1589:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1590: */
1591: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1592: {
1593:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1594:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1595:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1596:   IS             isicol;

1598:   PetscFunctionBegin;
1599:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1600:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1601:   b = (Mat_SeqAIJ *)(fact)->data;

1603:   /* allocate matrix arrays for new data structure */
1604:   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));

1606:   b->singlemalloc = PETSC_TRUE;
1607:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1608:   bdiag = b->diag;

1610:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));

1612:   /* set bi and bj with new data structure */
1613:   bi = b->i;
1614:   bj = b->j;

1616:   /* L part */
1617:   bi[0] = 0;
1618:   for (i = 0; i < n; i++) {
1619:     nz        = adiag[i] - ai[i];
1620:     bi[i + 1] = bi[i] + nz;
1621:     aj        = a->j + ai[i];
1622:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1623:   }

1625:   /* U part */
1626:   bdiag[n] = bi[n] - 1;
1627:   for (i = n - 1; i >= 0; i--) {
1628:     nz = ai[i + 1] - adiag[i] - 1;
1629:     aj = a->j + adiag[i] + 1;
1630:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1631:     /* diag[i] */
1632:     bj[k++]  = i;
1633:     bdiag[i] = bdiag[i + 1] + nz + 1;
1634:   }

1636:   fact->factortype             = MAT_FACTOR_ILU;
1637:   fact->info.factor_mallocs    = 0;
1638:   fact->info.fill_ratio_given  = info->fill;
1639:   fact->info.fill_ratio_needed = 1.0;
1640:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1641:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1643:   b       = (Mat_SeqAIJ *)(fact)->data;
1644:   b->row  = isrow;
1645:   b->col  = iscol;
1646:   b->icol = isicol;
1647:   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1648:   PetscCall(PetscObjectReference((PetscObject)isrow));
1649:   PetscCall(PetscObjectReference((PetscObject)iscol));
1650:   PetscFunctionReturn(PETSC_SUCCESS);
1651: }

1653: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1654: {
1655:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1656:   IS                 isicol;
1657:   const PetscInt    *r, *ic;
1658:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1659:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1660:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1661:   PetscInt           i, levels, diagonal_fill;
1662:   PetscBool          col_identity, row_identity, missing;
1663:   PetscReal          f;
1664:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1665:   PetscBT            lnkbt;
1666:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1667:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1668:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;

1670:   PetscFunctionBegin;
1671:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1672:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1673:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1675:   levels = (PetscInt)info->levels;
1676:   PetscCall(ISIdentity(isrow, &row_identity));
1677:   PetscCall(ISIdentity(iscol, &col_identity));
1678:   if (!levels && row_identity && col_identity) {
1679:     /* special case: ilu(0) with natural ordering */
1680:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1681:     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1682:     PetscFunctionReturn(PETSC_SUCCESS);
1683:   }

1685:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1686:   PetscCall(ISGetIndices(isrow, &r));
1687:   PetscCall(ISGetIndices(isicol, &ic));

1689:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1690:   PetscCall(PetscMalloc1(n + 1, &bi));
1691:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1692:   bi[0] = bdiag[0] = 0;
1693:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1695:   /* create a linked list for storing column indices of the active row */
1696:   nlnk = n + 1;
1697:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1699:   /* initial FreeSpace size is f*(ai[n]+1) */
1700:   f             = info->fill;
1701:   diagonal_fill = (PetscInt)info->diagonal_fill;
1702:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1703:   current_space = free_space;
1704:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1705:   current_space_lvl = free_space_lvl;
1706:   for (i = 0; i < n; i++) {
1707:     nzi = 0;
1708:     /* copy current row into linked list */
1709:     nnz = ai[r[i] + 1] - ai[r[i]];
1710:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1711:     cols   = aj + ai[r[i]];
1712:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1713:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1714:     nzi += nlnk;

1716:     /* make sure diagonal entry is included */
1717:     if (diagonal_fill && lnk[i] == -1) {
1718:       fm = n;
1719:       while (lnk[fm] < i) fm = lnk[fm];
1720:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1721:       lnk[fm]    = i;
1722:       lnk_lvl[i] = 0;
1723:       nzi++;
1724:       dcount++;
1725:     }

1727:     /* add pivot rows into the active row */
1728:     nzbd = 0;
1729:     prow = lnk[n];
1730:     while (prow < i) {
1731:       nnz      = bdiag[prow];
1732:       cols     = bj_ptr[prow] + nnz + 1;
1733:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1734:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1735:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1736:       nzi += nlnk;
1737:       prow = lnk[prow];
1738:       nzbd++;
1739:     }
1740:     bdiag[i]  = nzbd;
1741:     bi[i + 1] = bi[i] + nzi;
1742:     /* if free space is not available, make more free space */
1743:     if (current_space->local_remaining < nzi) {
1744:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1745:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1746:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1747:       reallocs++;
1748:     }

1750:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1751:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1752:     bj_ptr[i]    = current_space->array;
1753:     bjlvl_ptr[i] = current_space_lvl->array;

1755:     /* make sure the active row i has diagonal entry */
1756:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1758:     current_space->array += nzi;
1759:     current_space->local_used += nzi;
1760:     current_space->local_remaining -= nzi;
1761:     current_space_lvl->array += nzi;
1762:     current_space_lvl->local_used += nzi;
1763:     current_space_lvl->local_remaining -= nzi;
1764:   }

1766:   PetscCall(ISRestoreIndices(isrow, &r));
1767:   PetscCall(ISRestoreIndices(isicol, &ic));
1768:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1769:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1770:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1772:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1773:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1774:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1776: #if defined(PETSC_USE_INFO)
1777:   {
1778:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1779:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1780:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1781:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1782:     PetscCall(PetscInfo(A, "for best performance.\n"));
1783:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1784:   }
1785: #endif
1786:   /* put together the new matrix */
1787:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1788:   b = (Mat_SeqAIJ *)(fact)->data;

1790:   b->free_a       = PETSC_TRUE;
1791:   b->free_ij      = PETSC_TRUE;
1792:   b->singlemalloc = PETSC_FALSE;

1794:   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));

1796:   b->j    = bj;
1797:   b->i    = bi;
1798:   b->diag = bdiag;
1799:   b->ilen = NULL;
1800:   b->imax = NULL;
1801:   b->row  = isrow;
1802:   b->col  = iscol;
1803:   PetscCall(PetscObjectReference((PetscObject)isrow));
1804:   PetscCall(PetscObjectReference((PetscObject)iscol));
1805:   b->icol = isicol;

1807:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1808:   /* In b structure:  Free imax, ilen, old a, old j.
1809:      Allocate bdiag, solve_work, new a, new j */
1810:   b->maxnz = b->nz = bdiag[0] + 1;

1812:   (fact)->info.factor_mallocs    = reallocs;
1813:   (fact)->info.fill_ratio_given  = f;
1814:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1815:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1816:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1817:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1818:   PetscFunctionReturn(PETSC_SUCCESS);
1819: }

1821: #if 0
1822: // unused
1823: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1824: {
1825:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1826:   IS                 isicol;
1827:   const PetscInt    *r, *ic;
1828:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1829:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1830:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1831:   PetscInt           i, levels, diagonal_fill;
1832:   PetscBool          col_identity, row_identity;
1833:   PetscReal          f;
1834:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1835:   PetscBT            lnkbt;
1836:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1837:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1838:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1839:   PetscBool          missing;

1841:   PetscFunctionBegin;
1842:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1843:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1844:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1846:   f             = info->fill;
1847:   levels        = (PetscInt)info->levels;
1848:   diagonal_fill = (PetscInt)info->diagonal_fill;

1850:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

1852:   PetscCall(ISIdentity(isrow, &row_identity));
1853:   PetscCall(ISIdentity(iscol, &col_identity));
1854:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1855:     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));

1857:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1858:     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1859:     fact->factortype               = MAT_FACTOR_ILU;
1860:     (fact)->info.factor_mallocs    = 0;
1861:     (fact)->info.fill_ratio_given  = info->fill;
1862:     (fact)->info.fill_ratio_needed = 1.0;

1864:     b       = (Mat_SeqAIJ *)(fact)->data;
1865:     b->row  = isrow;
1866:     b->col  = iscol;
1867:     b->icol = isicol;
1868:     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1869:     PetscCall(PetscObjectReference((PetscObject)isrow));
1870:     PetscCall(PetscObjectReference((PetscObject)iscol));
1871:     PetscFunctionReturn(PETSC_SUCCESS);
1872:   }

1874:   PetscCall(ISGetIndices(isrow, &r));
1875:   PetscCall(ISGetIndices(isicol, &ic));

1877:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1878:   PetscCall(PetscMalloc1(n + 1, &bi));
1879:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1880:   bi[0] = bdiag[0] = 0;

1882:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1884:   /* create a linked list for storing column indices of the active row */
1885:   nlnk = n + 1;
1886:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1888:   /* initial FreeSpace size is f*(ai[n]+1) */
1889:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1890:   current_space = free_space;
1891:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1892:   current_space_lvl = free_space_lvl;

1894:   for (i = 0; i < n; i++) {
1895:     nzi = 0;
1896:     /* copy current row into linked list */
1897:     nnz = ai[r[i] + 1] - ai[r[i]];
1898:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1899:     cols   = aj + ai[r[i]];
1900:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1901:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1902:     nzi += nlnk;

1904:     /* make sure diagonal entry is included */
1905:     if (diagonal_fill && lnk[i] == -1) {
1906:       fm = n;
1907:       while (lnk[fm] < i) fm = lnk[fm];
1908:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1909:       lnk[fm]    = i;
1910:       lnk_lvl[i] = 0;
1911:       nzi++;
1912:       dcount++;
1913:     }

1915:     /* add pivot rows into the active row */
1916:     nzbd = 0;
1917:     prow = lnk[n];
1918:     while (prow < i) {
1919:       nnz      = bdiag[prow];
1920:       cols     = bj_ptr[prow] + nnz + 1;
1921:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1922:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1923:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1924:       nzi += nlnk;
1925:       prow = lnk[prow];
1926:       nzbd++;
1927:     }
1928:     bdiag[i]  = nzbd;
1929:     bi[i + 1] = bi[i] + nzi;

1931:     /* if free space is not available, make more free space */
1932:     if (current_space->local_remaining < nzi) {
1933:       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1934:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1935:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1936:       reallocs++;
1937:     }

1939:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1940:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1941:     bj_ptr[i]    = current_space->array;
1942:     bjlvl_ptr[i] = current_space_lvl->array;

1944:     /* make sure the active row i has diagonal entry */
1945:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1947:     current_space->array += nzi;
1948:     current_space->local_used += nzi;
1949:     current_space->local_remaining -= nzi;
1950:     current_space_lvl->array += nzi;
1951:     current_space_lvl->local_used += nzi;
1952:     current_space_lvl->local_remaining -= nzi;
1953:   }

1955:   PetscCall(ISRestoreIndices(isrow, &r));
1956:   PetscCall(ISRestoreIndices(isicol, &ic));

1958:   /* destroy list of free space and other temporary arrays */
1959:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1960:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
1961:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1962:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1963:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1965:   #if defined(PETSC_USE_INFO)
1966:   {
1967:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1968:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1969:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1970:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1971:     PetscCall(PetscInfo(A, "for best performance.\n"));
1972:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1973:   }
1974:   #endif

1976:   /* put together the new matrix */
1977:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1978:   b = (Mat_SeqAIJ *)(fact)->data;

1980:   b->free_a       = PETSC_TRUE;
1981:   b->free_ij      = PETSC_TRUE;
1982:   b->singlemalloc = PETSC_FALSE;

1984:   PetscCall(PetscMalloc1(bi[n], &b->a));
1985:   b->j = bj;
1986:   b->i = bi;
1987:   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1988:   b->diag = bdiag;
1989:   b->ilen = NULL;
1990:   b->imax = NULL;
1991:   b->row  = isrow;
1992:   b->col  = iscol;
1993:   PetscCall(PetscObjectReference((PetscObject)isrow));
1994:   PetscCall(PetscObjectReference((PetscObject)iscol));
1995:   b->icol = isicol;
1996:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1997:   /* In b structure:  Free imax, ilen, old a, old j.
1998:      Allocate bdiag, solve_work, new a, new j */
1999:   b->maxnz = b->nz = bi[n];

2001:   (fact)->info.factor_mallocs    = reallocs;
2002:   (fact)->info.fill_ratio_given  = f;
2003:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2004:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2005:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2006:   PetscFunctionReturn(PETSC_SUCCESS);
2007: }
2008: #endif

2010: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2011: {
2012:   Mat             C  = B;
2013:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2014:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2015:   IS              ip = b->row, iip = b->icol;
2016:   const PetscInt *rip, *riip;
2017:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2018:   PetscInt       *ai = a->i, *aj = a->j;
2019:   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2020:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2021:   PetscBool       perm_identity;
2022:   FactorShiftCtx  sctx;
2023:   PetscReal       rs;
2024:   MatScalar       d, *v;

2026:   PetscFunctionBegin;
2027:   /* MatPivotSetUp(): initialize shift context sctx */
2028:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2030:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2031:     sctx.shift_top = info->zeropivot;
2032:     for (i = 0; i < mbs; i++) {
2033:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2034:       d  = (aa)[a->diag[i]];
2035:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2036:       v  = aa + ai[i];
2037:       nz = ai[i + 1] - ai[i];
2038:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2039:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2040:     }
2041:     sctx.shift_top *= 1.1;
2042:     sctx.nshift_max = 5;
2043:     sctx.shift_lo   = 0.;
2044:     sctx.shift_hi   = 1.;
2045:   }

2047:   PetscCall(ISGetIndices(ip, &rip));
2048:   PetscCall(ISGetIndices(iip, &riip));

2050:   /* allocate working arrays
2051:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2052:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2053:   */
2054:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

2056:   do {
2057:     sctx.newshift = PETSC_FALSE;

2059:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2060:     if (mbs) il[0] = 0;

2062:     for (k = 0; k < mbs; k++) {
2063:       /* zero rtmp */
2064:       nz    = bi[k + 1] - bi[k];
2065:       bjtmp = bj + bi[k];
2066:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2068:       /* load in initial unfactored row */
2069:       bval = ba + bi[k];
2070:       jmin = ai[rip[k]];
2071:       jmax = ai[rip[k] + 1];
2072:       for (j = jmin; j < jmax; j++) {
2073:         col = riip[aj[j]];
2074:         if (col >= k) { /* only take upper triangular entry */
2075:           rtmp[col] = aa[j];
2076:           *bval++   = 0.0; /* for in-place factorization */
2077:         }
2078:       }
2079:       /* shift the diagonal of the matrix: ZeropivotApply() */
2080:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

2082:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2083:       dk = rtmp[k];
2084:       i  = c2r[k]; /* first row to be added to k_th row  */

2086:       while (i < k) {
2087:         nexti = c2r[i]; /* next row to be added to k_th row */

2089:         /* compute multiplier, update diag(k) and U(i,k) */
2090:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2091:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2092:         dk += uikdi * ba[ili];           /* update diag[k] */
2093:         ba[ili] = uikdi;                 /* -U(i,k) */

2095:         /* add multiple of row i to k-th row */
2096:         jmin = ili + 1;
2097:         jmax = bi[i + 1];
2098:         if (jmin < jmax) {
2099:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2100:           /* update il and c2r for row i */
2101:           il[i]  = jmin;
2102:           j      = bj[jmin];
2103:           c2r[i] = c2r[j];
2104:           c2r[j] = i;
2105:         }
2106:         i = nexti;
2107:       }

2109:       /* copy data into U(k,:) */
2110:       rs   = 0.0;
2111:       jmin = bi[k];
2112:       jmax = bi[k + 1] - 1;
2113:       if (jmin < jmax) {
2114:         for (j = jmin; j < jmax; j++) {
2115:           col   = bj[j];
2116:           ba[j] = rtmp[col];
2117:           rs += PetscAbsScalar(ba[j]);
2118:         }
2119:         /* add the k-th row into il and c2r */
2120:         il[k]  = jmin;
2121:         i      = bj[jmin];
2122:         c2r[k] = c2r[i];
2123:         c2r[i] = k;
2124:       }

2126:       /* MatPivotCheck() */
2127:       sctx.rs = rs;
2128:       sctx.pv = dk;
2129:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2130:       if (sctx.newshift) break;
2131:       dk = sctx.pv;

2133:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2134:     }
2135:   } while (sctx.newshift);

2137:   PetscCall(PetscFree3(rtmp, il, c2r));
2138:   PetscCall(ISRestoreIndices(ip, &rip));
2139:   PetscCall(ISRestoreIndices(iip, &riip));

2141:   PetscCall(ISIdentity(ip, &perm_identity));
2142:   if (perm_identity) {
2143:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2144:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2145:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2146:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2147:   } else {
2148:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2149:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2150:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2151:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2152:   }

2154:   C->assembled    = PETSC_TRUE;
2155:   C->preallocated = PETSC_TRUE;

2157:   PetscCall(PetscLogFlops(C->rmap->n));

2159:   /* MatPivotView() */
2160:   if (sctx.nshift) {
2161:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2162:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2163:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2164:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2165:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2166:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2167:     }
2168:   }
2169:   PetscFunctionReturn(PETSC_SUCCESS);
2170: }

2172: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2173: {
2174:   Mat             C  = B;
2175:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2176:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2177:   IS              ip = b->row, iip = b->icol;
2178:   const PetscInt *rip, *riip;
2179:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2180:   PetscInt       *ai = a->i, *aj = a->j;
2181:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2182:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2183:   PetscBool       perm_identity;
2184:   FactorShiftCtx  sctx;
2185:   PetscReal       rs;
2186:   MatScalar       d, *v;

2188:   PetscFunctionBegin;
2189:   /* MatPivotSetUp(): initialize shift context sctx */
2190:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2192:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2193:     sctx.shift_top = info->zeropivot;
2194:     for (i = 0; i < mbs; i++) {
2195:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2196:       d  = (aa)[a->diag[i]];
2197:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2198:       v  = aa + ai[i];
2199:       nz = ai[i + 1] - ai[i];
2200:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2201:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2202:     }
2203:     sctx.shift_top *= 1.1;
2204:     sctx.nshift_max = 5;
2205:     sctx.shift_lo   = 0.;
2206:     sctx.shift_hi   = 1.;
2207:   }

2209:   PetscCall(ISGetIndices(ip, &rip));
2210:   PetscCall(ISGetIndices(iip, &riip));

2212:   /* initialization */
2213:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

2215:   do {
2216:     sctx.newshift = PETSC_FALSE;

2218:     for (i = 0; i < mbs; i++) jl[i] = mbs;
2219:     il[0] = 0;

2221:     for (k = 0; k < mbs; k++) {
2222:       /* zero rtmp */
2223:       nz    = bi[k + 1] - bi[k];
2224:       bjtmp = bj + bi[k];
2225:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2227:       bval = ba + bi[k];
2228:       /* initialize k-th row by the perm[k]-th row of A */
2229:       jmin = ai[rip[k]];
2230:       jmax = ai[rip[k] + 1];
2231:       for (j = jmin; j < jmax; j++) {
2232:         col = riip[aj[j]];
2233:         if (col >= k) { /* only take upper triangular entry */
2234:           rtmp[col] = aa[j];
2235:           *bval++   = 0.0; /* for in-place factorization */
2236:         }
2237:       }
2238:       /* shift the diagonal of the matrix */
2239:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2241:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2242:       dk = rtmp[k];
2243:       i  = jl[k]; /* first row to be added to k_th row  */

2245:       while (i < k) {
2246:         nexti = jl[i]; /* next row to be added to k_th row */

2248:         /* compute multiplier, update diag(k) and U(i,k) */
2249:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2250:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2251:         dk += uikdi * ba[ili];
2252:         ba[ili] = uikdi; /* -U(i,k) */

2254:         /* add multiple of row i to k-th row */
2255:         jmin = ili + 1;
2256:         jmax = bi[i + 1];
2257:         if (jmin < jmax) {
2258:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2259:           /* update il and jl for row i */
2260:           il[i] = jmin;
2261:           j     = bj[jmin];
2262:           jl[i] = jl[j];
2263:           jl[j] = i;
2264:         }
2265:         i = nexti;
2266:       }

2268:       /* shift the diagonals when zero pivot is detected */
2269:       /* compute rs=sum of abs(off-diagonal) */
2270:       rs   = 0.0;
2271:       jmin = bi[k] + 1;
2272:       nz   = bi[k + 1] - jmin;
2273:       bcol = bj + jmin;
2274:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

2276:       sctx.rs = rs;
2277:       sctx.pv = dk;
2278:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2279:       if (sctx.newshift) break;
2280:       dk = sctx.pv;

2282:       /* copy data into U(k,:) */
2283:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2284:       jmin      = bi[k] + 1;
2285:       jmax      = bi[k + 1];
2286:       if (jmin < jmax) {
2287:         for (j = jmin; j < jmax; j++) {
2288:           col   = bj[j];
2289:           ba[j] = rtmp[col];
2290:         }
2291:         /* add the k-th row into il and jl */
2292:         il[k] = jmin;
2293:         i     = bj[jmin];
2294:         jl[k] = jl[i];
2295:         jl[i] = k;
2296:       }
2297:     }
2298:   } while (sctx.newshift);

2300:   PetscCall(PetscFree3(rtmp, il, jl));
2301:   PetscCall(ISRestoreIndices(ip, &rip));
2302:   PetscCall(ISRestoreIndices(iip, &riip));

2304:   PetscCall(ISIdentity(ip, &perm_identity));
2305:   if (perm_identity) {
2306:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2307:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2308:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2309:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2310:   } else {
2311:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2312:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2313:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2314:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2315:   }

2317:   C->assembled    = PETSC_TRUE;
2318:   C->preallocated = PETSC_TRUE;

2320:   PetscCall(PetscLogFlops(C->rmap->n));
2321:   if (sctx.nshift) {
2322:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2323:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2324:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2325:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2326:     }
2327:   }
2328:   PetscFunctionReturn(PETSC_SUCCESS);
2329: }

2331: /*
2332:    icc() under revised new data structure.
2333:    Factored arrays bj and ba are stored as
2334:      U(0,:),...,U(i,:),U(n-1,:)

2336:    ui=fact->i is an array of size n+1, in which
2337:    ui+
2338:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2339:      ui[n]:  points to U(n-1,n-1)+1

2341:   udiag=fact->diag is an array of size n,in which
2342:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2344:    U(i,:) contains udiag[i] as its last entry, i.e.,
2345:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2346: */

2348: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2349: {
2350:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2351:   Mat_SeqSBAIJ      *b;
2352:   PetscBool          perm_identity, missing;
2353:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2354:   const PetscInt    *rip, *riip;
2355:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2356:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2357:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2358:   PetscReal          fill = info->fill, levels = info->levels;
2359:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2360:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2361:   PetscBT            lnkbt;
2362:   IS                 iperm;

2364:   PetscFunctionBegin;
2365:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2366:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2367:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2368:   PetscCall(ISIdentity(perm, &perm_identity));
2369:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2371:   PetscCall(PetscMalloc1(am + 1, &ui));
2372:   PetscCall(PetscMalloc1(am + 1, &udiag));
2373:   ui[0] = 0;

2375:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2376:   if (!levels && perm_identity) {
2377:     for (i = 0; i < am; i++) {
2378:       ncols     = ai[i + 1] - a->diag[i];
2379:       ui[i + 1] = ui[i] + ncols;
2380:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2381:     }
2382:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2383:     cols = uj;
2384:     for (i = 0; i < am; i++) {
2385:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2386:       ncols = ai[i + 1] - a->diag[i] - 1;
2387:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2388:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2389:     }
2390:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2391:     PetscCall(ISGetIndices(iperm, &riip));
2392:     PetscCall(ISGetIndices(perm, &rip));

2394:     /* initialization */
2395:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2397:     /* jl: linked list for storing indices of the pivot rows
2398:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2399:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2400:     for (i = 0; i < am; i++) {
2401:       jl[i] = am;
2402:       il[i] = 0;
2403:     }

2405:     /* create and initialize a linked list for storing column indices of the active row k */
2406:     nlnk = am + 1;
2407:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2409:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2410:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2411:     current_space = free_space;
2412:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2413:     current_space_lvl = free_space_lvl;

2415:     for (k = 0; k < am; k++) { /* for each active row k */
2416:       /* initialize lnk by the column indices of row rip[k] of A */
2417:       nzk   = 0;
2418:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2419:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2420:       ncols_upper = 0;
2421:       for (j = 0; j < ncols; j++) {
2422:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2423:         if (riip[i] >= k) {         /* only take upper triangular entry */
2424:           ajtmp[ncols_upper] = i;
2425:           ncols_upper++;
2426:         }
2427:       }
2428:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2429:       nzk += nlnk;

2431:       /* update lnk by computing fill-in for each pivot row to be merged in */
2432:       prow = jl[k]; /* 1st pivot row */

2434:       while (prow < k) {
2435:         nextprow = jl[prow];

2437:         /* merge prow into k-th row */
2438:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2439:         jmax  = ui[prow + 1];
2440:         ncols = jmax - jmin;
2441:         i     = jmin - ui[prow];
2442:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2443:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2444:         j     = *(uj - 1);
2445:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2446:         nzk += nlnk;

2448:         /* update il and jl for prow */
2449:         if (jmin < jmax) {
2450:           il[prow] = jmin;
2451:           j        = *cols;
2452:           jl[prow] = jl[j];
2453:           jl[j]    = prow;
2454:         }
2455:         prow = nextprow;
2456:       }

2458:       /* if free space is not available, make more free space */
2459:       if (current_space->local_remaining < nzk) {
2460:         i = am - k + 1;                                    /* num of unfactored rows */
2461:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2462:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2463:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2464:         reallocs++;
2465:       }

2467:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2468:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2469:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2471:       /* add the k-th row into il and jl */
2472:       if (nzk > 1) {
2473:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2474:         jl[k] = jl[i];
2475:         jl[i] = k;
2476:         il[k] = ui[k] + 1;
2477:       }
2478:       uj_ptr[k]     = current_space->array;
2479:       uj_lvl_ptr[k] = current_space_lvl->array;

2481:       current_space->array += nzk;
2482:       current_space->local_used += nzk;
2483:       current_space->local_remaining -= nzk;

2485:       current_space_lvl->array += nzk;
2486:       current_space_lvl->local_used += nzk;
2487:       current_space_lvl->local_remaining -= nzk;

2489:       ui[k + 1] = ui[k] + nzk;
2490:     }

2492:     PetscCall(ISRestoreIndices(perm, &rip));
2493:     PetscCall(ISRestoreIndices(iperm, &riip));
2494:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2495:     PetscCall(PetscFree(ajtmp));

2497:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2498:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2499:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2500:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2501:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2503:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2505:   /* put together the new matrix in MATSEQSBAIJ format */
2506:   b               = (Mat_SeqSBAIJ *)(fact)->data;
2507:   b->singlemalloc = PETSC_FALSE;

2509:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2511:   b->j         = uj;
2512:   b->i         = ui;
2513:   b->diag      = udiag;
2514:   b->free_diag = PETSC_TRUE;
2515:   b->ilen      = NULL;
2516:   b->imax      = NULL;
2517:   b->row       = perm;
2518:   b->col       = perm;
2519:   PetscCall(PetscObjectReference((PetscObject)perm));
2520:   PetscCall(PetscObjectReference((PetscObject)perm));
2521:   b->icol          = iperm;
2522:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2524:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2526:   b->maxnz = b->nz = ui[am];
2527:   b->free_a        = PETSC_TRUE;
2528:   b->free_ij       = PETSC_TRUE;

2530:   fact->info.factor_mallocs   = reallocs;
2531:   fact->info.fill_ratio_given = fill;
2532:   if (ai[am] != 0) {
2533:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2534:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2535:   } else {
2536:     fact->info.fill_ratio_needed = 0.0;
2537:   }
2538: #if defined(PETSC_USE_INFO)
2539:   if (ai[am] != 0) {
2540:     PetscReal af = fact->info.fill_ratio_needed;
2541:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2542:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2543:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2544:   } else {
2545:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2546:   }
2547: #endif
2548:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2549:   PetscFunctionReturn(PETSC_SUCCESS);
2550: }

2552: #if 0
2553: // unused
2554: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2555: {
2556:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2557:   Mat_SeqSBAIJ      *b;
2558:   PetscBool          perm_identity, missing;
2559:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2560:   const PetscInt    *rip, *riip;
2561:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2562:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2563:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2564:   PetscReal          fill = info->fill, levels = info->levels;
2565:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2566:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2567:   PetscBT            lnkbt;
2568:   IS                 iperm;

2570:   PetscFunctionBegin;
2571:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2572:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2573:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2574:   PetscCall(ISIdentity(perm, &perm_identity));
2575:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2577:   PetscCall(PetscMalloc1(am + 1, &ui));
2578:   PetscCall(PetscMalloc1(am + 1, &udiag));
2579:   ui[0] = 0;

2581:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2582:   if (!levels && perm_identity) {
2583:     for (i = 0; i < am; i++) {
2584:       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2585:       udiag[i]  = ui[i];
2586:     }
2587:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2588:     cols = uj;
2589:     for (i = 0; i < am; i++) {
2590:       aj    = a->j + a->diag[i];
2591:       ncols = ui[i + 1] - ui[i];
2592:       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2593:     }
2594:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2595:     PetscCall(ISGetIndices(iperm, &riip));
2596:     PetscCall(ISGetIndices(perm, &rip));

2598:     /* initialization */
2599:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2601:     /* jl: linked list for storing indices of the pivot rows
2602:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2603:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2604:     for (i = 0; i < am; i++) {
2605:       jl[i] = am;
2606:       il[i] = 0;
2607:     }

2609:     /* create and initialize a linked list for storing column indices of the active row k */
2610:     nlnk = am + 1;
2611:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2613:     /* initial FreeSpace size is fill*(ai[am]+1) */
2614:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2615:     current_space = free_space;
2616:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2617:     current_space_lvl = free_space_lvl;

2619:     for (k = 0; k < am; k++) { /* for each active row k */
2620:       /* initialize lnk by the column indices of row rip[k] of A */
2621:       nzk   = 0;
2622:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2623:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2624:       ncols_upper = 0;
2625:       for (j = 0; j < ncols; j++) {
2626:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2627:         if (riip[i] >= k) {         /* only take upper triangular entry */
2628:           ajtmp[ncols_upper] = i;
2629:           ncols_upper++;
2630:         }
2631:       }
2632:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2633:       nzk += nlnk;

2635:       /* update lnk by computing fill-in for each pivot row to be merged in */
2636:       prow = jl[k]; /* 1st pivot row */

2638:       while (prow < k) {
2639:         nextprow = jl[prow];

2641:         /* merge prow into k-th row */
2642:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2643:         jmax  = ui[prow + 1];
2644:         ncols = jmax - jmin;
2645:         i     = jmin - ui[prow];
2646:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2647:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2648:         j     = *(uj - 1);
2649:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2650:         nzk += nlnk;

2652:         /* update il and jl for prow */
2653:         if (jmin < jmax) {
2654:           il[prow] = jmin;
2655:           j        = *cols;
2656:           jl[prow] = jl[j];
2657:           jl[j]    = prow;
2658:         }
2659:         prow = nextprow;
2660:       }

2662:       /* if free space is not available, make more free space */
2663:       if (current_space->local_remaining < nzk) {
2664:         i = am - k + 1;                                    /* num of unfactored rows */
2665:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2666:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2667:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2668:         reallocs++;
2669:       }

2671:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2672:       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2673:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2675:       /* add the k-th row into il and jl */
2676:       if (nzk > 1) {
2677:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2678:         jl[k] = jl[i];
2679:         jl[i] = k;
2680:         il[k] = ui[k] + 1;
2681:       }
2682:       uj_ptr[k]     = current_space->array;
2683:       uj_lvl_ptr[k] = current_space_lvl->array;

2685:       current_space->array += nzk;
2686:       current_space->local_used += nzk;
2687:       current_space->local_remaining -= nzk;

2689:       current_space_lvl->array += nzk;
2690:       current_space_lvl->local_used += nzk;
2691:       current_space_lvl->local_remaining -= nzk;

2693:       ui[k + 1] = ui[k] + nzk;
2694:     }

2696:   #if defined(PETSC_USE_INFO)
2697:     if (ai[am] != 0) {
2698:       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2699:       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2700:       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2701:       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2702:     } else {
2703:       PetscCall(PetscInfo(A, "Empty matrix\n"));
2704:     }
2705:   #endif

2707:     PetscCall(ISRestoreIndices(perm, &rip));
2708:     PetscCall(ISRestoreIndices(iperm, &riip));
2709:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2710:     PetscCall(PetscFree(ajtmp));

2712:     /* destroy list of free space and other temporary array(s) */
2713:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2714:     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2715:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2716:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2718:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2720:   /* put together the new matrix in MATSEQSBAIJ format */

2722:   b               = (Mat_SeqSBAIJ *)fact->data;
2723:   b->singlemalloc = PETSC_FALSE;

2725:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2727:   b->j         = uj;
2728:   b->i         = ui;
2729:   b->diag      = udiag;
2730:   b->free_diag = PETSC_TRUE;
2731:   b->ilen      = NULL;
2732:   b->imax      = NULL;
2733:   b->row       = perm;
2734:   b->col       = perm;

2736:   PetscCall(PetscObjectReference((PetscObject)perm));
2737:   PetscCall(PetscObjectReference((PetscObject)perm));

2739:   b->icol          = iperm;
2740:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2741:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2742:   b->maxnz = b->nz = ui[am];
2743:   b->free_a        = PETSC_TRUE;
2744:   b->free_ij       = PETSC_TRUE;

2746:   fact->info.factor_mallocs   = reallocs;
2747:   fact->info.fill_ratio_given = fill;
2748:   if (ai[am] != 0) {
2749:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2750:   } else {
2751:     fact->info.fill_ratio_needed = 0.0;
2752:   }
2753:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2754:   PetscFunctionReturn(PETSC_SUCCESS);
2755: }
2756: #endif

2758: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2759: {
2760:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2761:   Mat_SeqSBAIJ      *b;
2762:   PetscBool          perm_identity, missing;
2763:   PetscReal          fill = info->fill;
2764:   const PetscInt    *rip, *riip;
2765:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2766:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2767:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2768:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2769:   PetscBT            lnkbt;
2770:   IS                 iperm;

2772:   PetscFunctionBegin;
2773:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2774:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2775:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2777:   /* check whether perm is the identity mapping */
2778:   PetscCall(ISIdentity(perm, &perm_identity));
2779:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2780:   PetscCall(ISGetIndices(iperm, &riip));
2781:   PetscCall(ISGetIndices(perm, &rip));

2783:   /* initialization */
2784:   PetscCall(PetscMalloc1(am + 1, &ui));
2785:   PetscCall(PetscMalloc1(am + 1, &udiag));
2786:   ui[0] = 0;

2788:   /* jl: linked list for storing indices of the pivot rows
2789:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2790:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2791:   for (i = 0; i < am; i++) {
2792:     jl[i] = am;
2793:     il[i] = 0;
2794:   }

2796:   /* create and initialize a linked list for storing column indices of the active row k */
2797:   nlnk = am + 1;
2798:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2800:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2801:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2802:   current_space = free_space;

2804:   for (k = 0; k < am; k++) { /* for each active row k */
2805:     /* initialize lnk by the column indices of row rip[k] of A */
2806:     nzk   = 0;
2807:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2808:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2809:     ncols_upper = 0;
2810:     for (j = 0; j < ncols; j++) {
2811:       i = riip[*(aj + ai[rip[k]] + j)];
2812:       if (i >= k) { /* only take upper triangular entry */
2813:         cols[ncols_upper] = i;
2814:         ncols_upper++;
2815:       }
2816:     }
2817:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2818:     nzk += nlnk;

2820:     /* update lnk by computing fill-in for each pivot row to be merged in */
2821:     prow = jl[k]; /* 1st pivot row */

2823:     while (prow < k) {
2824:       nextprow = jl[prow];
2825:       /* merge prow into k-th row */
2826:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2827:       jmax   = ui[prow + 1];
2828:       ncols  = jmax - jmin;
2829:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2830:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2831:       nzk += nlnk;

2833:       /* update il and jl for prow */
2834:       if (jmin < jmax) {
2835:         il[prow] = jmin;
2836:         j        = *uj_ptr;
2837:         jl[prow] = jl[j];
2838:         jl[j]    = prow;
2839:       }
2840:       prow = nextprow;
2841:     }

2843:     /* if free space is not available, make more free space */
2844:     if (current_space->local_remaining < nzk) {
2845:       i = am - k + 1;                                    /* num of unfactored rows */
2846:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2847:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2848:       reallocs++;
2849:     }

2851:     /* copy data into free space, then initialize lnk */
2852:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2854:     /* add the k-th row into il and jl */
2855:     if (nzk > 1) {
2856:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2857:       jl[k] = jl[i];
2858:       jl[i] = k;
2859:       il[k] = ui[k] + 1;
2860:     }
2861:     ui_ptr[k] = current_space->array;

2863:     current_space->array += nzk;
2864:     current_space->local_used += nzk;
2865:     current_space->local_remaining -= nzk;

2867:     ui[k + 1] = ui[k] + nzk;
2868:   }

2870:   PetscCall(ISRestoreIndices(perm, &rip));
2871:   PetscCall(ISRestoreIndices(iperm, &riip));
2872:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2874:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2875:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2876:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2877:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2879:   /* put together the new matrix in MATSEQSBAIJ format */

2881:   b               = (Mat_SeqSBAIJ *)fact->data;
2882:   b->singlemalloc = PETSC_FALSE;
2883:   b->free_a       = PETSC_TRUE;
2884:   b->free_ij      = PETSC_TRUE;

2886:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2888:   b->j         = uj;
2889:   b->i         = ui;
2890:   b->diag      = udiag;
2891:   b->free_diag = PETSC_TRUE;
2892:   b->ilen      = NULL;
2893:   b->imax      = NULL;
2894:   b->row       = perm;
2895:   b->col       = perm;

2897:   PetscCall(PetscObjectReference((PetscObject)perm));
2898:   PetscCall(PetscObjectReference((PetscObject)perm));

2900:   b->icol          = iperm;
2901:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2903:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2905:   b->maxnz = b->nz = ui[am];

2907:   fact->info.factor_mallocs   = reallocs;
2908:   fact->info.fill_ratio_given = fill;
2909:   if (ai[am] != 0) {
2910:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2911:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2912:   } else {
2913:     fact->info.fill_ratio_needed = 0.0;
2914:   }
2915: #if defined(PETSC_USE_INFO)
2916:   if (ai[am] != 0) {
2917:     PetscReal af = fact->info.fill_ratio_needed;
2918:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2919:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2920:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2921:   } else {
2922:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2923:   }
2924: #endif
2925:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2926:   PetscFunctionReturn(PETSC_SUCCESS);
2927: }

2929: #if 0
2930: // unused
2931: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2932: {
2933:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2934:   Mat_SeqSBAIJ      *b;
2935:   PetscBool          perm_identity, missing;
2936:   PetscReal          fill = info->fill;
2937:   const PetscInt    *rip, *riip;
2938:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2939:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2940:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2941:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2942:   PetscBT            lnkbt;
2943:   IS                 iperm;

2945:   PetscFunctionBegin;
2946:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2947:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2948:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2950:   /* check whether perm is the identity mapping */
2951:   PetscCall(ISIdentity(perm, &perm_identity));
2952:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2953:   PetscCall(ISGetIndices(iperm, &riip));
2954:   PetscCall(ISGetIndices(perm, &rip));

2956:   /* initialization */
2957:   PetscCall(PetscMalloc1(am + 1, &ui));
2958:   ui[0] = 0;

2960:   /* jl: linked list for storing indices of the pivot rows
2961:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2962:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2963:   for (i = 0; i < am; i++) {
2964:     jl[i] = am;
2965:     il[i] = 0;
2966:   }

2968:   /* create and initialize a linked list for storing column indices of the active row k */
2969:   nlnk = am + 1;
2970:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2972:   /* initial FreeSpace size is fill*(ai[am]+1) */
2973:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2974:   current_space = free_space;

2976:   for (k = 0; k < am; k++) { /* for each active row k */
2977:     /* initialize lnk by the column indices of row rip[k] of A */
2978:     nzk   = 0;
2979:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2980:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2981:     ncols_upper = 0;
2982:     for (j = 0; j < ncols; j++) {
2983:       i = riip[*(aj + ai[rip[k]] + j)];
2984:       if (i >= k) { /* only take upper triangular entry */
2985:         cols[ncols_upper] = i;
2986:         ncols_upper++;
2987:       }
2988:     }
2989:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2990:     nzk += nlnk;

2992:     /* update lnk by computing fill-in for each pivot row to be merged in */
2993:     prow = jl[k]; /* 1st pivot row */

2995:     while (prow < k) {
2996:       nextprow = jl[prow];
2997:       /* merge prow into k-th row */
2998:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2999:       jmax   = ui[prow + 1];
3000:       ncols  = jmax - jmin;
3001:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3002:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3003:       nzk += nlnk;

3005:       /* update il and jl for prow */
3006:       if (jmin < jmax) {
3007:         il[prow] = jmin;
3008:         j        = *uj_ptr;
3009:         jl[prow] = jl[j];
3010:         jl[j]    = prow;
3011:       }
3012:       prow = nextprow;
3013:     }

3015:     /* if free space is not available, make more free space */
3016:     if (current_space->local_remaining < nzk) {
3017:       i = am - k + 1;                     /* num of unfactored rows */
3018:       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3019:       PetscCall(PetscFreeSpaceGet(i, &current_space));
3020:       reallocs++;
3021:     }

3023:     /* copy data into free space, then initialize lnk */
3024:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

3026:     /* add the k-th row into il and jl */
3027:     if (nzk - 1 > 0) {
3028:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3029:       jl[k] = jl[i];
3030:       jl[i] = k;
3031:       il[k] = ui[k] + 1;
3032:     }
3033:     ui_ptr[k] = current_space->array;

3035:     current_space->array += nzk;
3036:     current_space->local_used += nzk;
3037:     current_space->local_remaining -= nzk;

3039:     ui[k + 1] = ui[k] + nzk;
3040:   }

3042:   #if defined(PETSC_USE_INFO)
3043:   if (ai[am] != 0) {
3044:     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3045:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3046:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3047:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3048:   } else {
3049:     PetscCall(PetscInfo(A, "Empty matrix\n"));
3050:   }
3051:   #endif

3053:   PetscCall(ISRestoreIndices(perm, &rip));
3054:   PetscCall(ISRestoreIndices(iperm, &riip));
3055:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

3057:   /* destroy list of free space and other temporary array(s) */
3058:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3059:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3060:   PetscCall(PetscLLDestroy(lnk, lnkbt));

3062:   /* put together the new matrix in MATSEQSBAIJ format */

3064:   b               = (Mat_SeqSBAIJ *)fact->data;
3065:   b->singlemalloc = PETSC_FALSE;
3066:   b->free_a       = PETSC_TRUE;
3067:   b->free_ij      = PETSC_TRUE;

3069:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

3071:   b->j    = uj;
3072:   b->i    = ui;
3073:   b->diag = NULL;
3074:   b->ilen = NULL;
3075:   b->imax = NULL;
3076:   b->row  = perm;
3077:   b->col  = perm;

3079:   PetscCall(PetscObjectReference((PetscObject)perm));
3080:   PetscCall(PetscObjectReference((PetscObject)perm));

3082:   b->icol          = iperm;
3083:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3085:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3086:   b->maxnz = b->nz = ui[am];

3088:   fact->info.factor_mallocs   = reallocs;
3089:   fact->info.fill_ratio_given = fill;
3090:   if (ai[am] != 0) {
3091:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3092:   } else {
3093:     fact->info.fill_ratio_needed = 0.0;
3094:   }
3095:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3096:   PetscFunctionReturn(PETSC_SUCCESS);
3097: }
3098: #endif

3100: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3101: {
3102:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3103:   PetscInt           n  = A->rmap->n;
3104:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3105:   PetscScalar       *x, sum;
3106:   const PetscScalar *b;
3107:   const MatScalar   *aa = a->a, *v;
3108:   PetscInt           i, nz;

3110:   PetscFunctionBegin;
3111:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3113:   PetscCall(VecGetArrayRead(bb, &b));
3114:   PetscCall(VecGetArrayWrite(xx, &x));

3116:   /* forward solve the lower triangular */
3117:   x[0] = b[0];
3118:   v    = aa;
3119:   vi   = aj;
3120:   for (i = 1; i < n; i++) {
3121:     nz  = ai[i + 1] - ai[i];
3122:     sum = b[i];
3123:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3124:     v += nz;
3125:     vi += nz;
3126:     x[i] = sum;
3127:   }

3129:   /* backward solve the upper triangular */
3130:   for (i = n - 1; i >= 0; i--) {
3131:     v   = aa + adiag[i + 1] + 1;
3132:     vi  = aj + adiag[i + 1] + 1;
3133:     nz  = adiag[i] - adiag[i + 1] - 1;
3134:     sum = x[i];
3135:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3136:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3137:   }

3139:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3140:   PetscCall(VecRestoreArrayRead(bb, &b));
3141:   PetscCall(VecRestoreArrayWrite(xx, &x));
3142:   PetscFunctionReturn(PETSC_SUCCESS);
3143: }

3145: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3146: {
3147:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3148:   IS                 iscol = a->col, isrow = a->row;
3149:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3150:   const PetscInt    *rout, *cout, *r, *c;
3151:   PetscScalar       *x, *tmp, sum;
3152:   const PetscScalar *b;
3153:   const MatScalar   *aa = a->a, *v;

3155:   PetscFunctionBegin;
3156:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3158:   PetscCall(VecGetArrayRead(bb, &b));
3159:   PetscCall(VecGetArrayWrite(xx, &x));
3160:   tmp = a->solve_work;

3162:   PetscCall(ISGetIndices(isrow, &rout));
3163:   r = rout;
3164:   PetscCall(ISGetIndices(iscol, &cout));
3165:   c = cout;

3167:   /* forward solve the lower triangular */
3168:   tmp[0] = b[r[0]];
3169:   v      = aa;
3170:   vi     = aj;
3171:   for (i = 1; i < n; i++) {
3172:     nz  = ai[i + 1] - ai[i];
3173:     sum = b[r[i]];
3174:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3175:     tmp[i] = sum;
3176:     v += nz;
3177:     vi += nz;
3178:   }

3180:   /* backward solve the upper triangular */
3181:   for (i = n - 1; i >= 0; i--) {
3182:     v   = aa + adiag[i + 1] + 1;
3183:     vi  = aj + adiag[i + 1] + 1;
3184:     nz  = adiag[i] - adiag[i + 1] - 1;
3185:     sum = tmp[i];
3186:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3187:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3188:   }

3190:   PetscCall(ISRestoreIndices(isrow, &rout));
3191:   PetscCall(ISRestoreIndices(iscol, &cout));
3192:   PetscCall(VecRestoreArrayRead(bb, &b));
3193:   PetscCall(VecRestoreArrayWrite(xx, &x));
3194:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3195:   PetscFunctionReturn(PETSC_SUCCESS);
3196: }

3198: #if 0
3199: // unused
3200: /*
3201:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3202: */
3203: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3204: {
3205:   Mat             B = *fact;
3206:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3207:   IS              isicol;
3208:   const PetscInt *r, *ic;
3209:   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3210:   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3211:   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3212:   PetscInt        nlnk, *lnk;
3213:   PetscBT         lnkbt;
3214:   PetscBool       row_identity, icol_identity;
3215:   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3216:   const PetscInt *ics;
3217:   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3218:   PetscReal       dt = info->dt, shift = info->shiftamount;
3219:   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3220:   PetscBool       missing;

3222:   PetscFunctionBegin;
3223:   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3224:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);

3226:   /* symbolic factorization, can be reused */
3227:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3228:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3229:   adiag = a->diag;

3231:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

3233:   /* bdiag is location of diagonal in factor */
3234:   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3235:   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */

3237:   /* allocate row pointers bi */
3238:   PetscCall(PetscMalloc1(2 * n + 2, &bi));

3240:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3241:   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3242:   nnz_max = ai[n] + 2 * n * dtcount + 2;

3244:   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3245:   PetscCall(PetscMalloc1(nnz_max + 1, &ba));

3247:   /* put together the new matrix */
3248:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3249:   b = (Mat_SeqAIJ *)B->data;

3251:   b->free_a       = PETSC_TRUE;
3252:   b->free_ij      = PETSC_TRUE;
3253:   b->singlemalloc = PETSC_FALSE;

3255:   b->a    = ba;
3256:   b->j    = bj;
3257:   b->i    = bi;
3258:   b->diag = bdiag;
3259:   b->ilen = NULL;
3260:   b->imax = NULL;
3261:   b->row  = isrow;
3262:   b->col  = iscol;
3263:   PetscCall(PetscObjectReference((PetscObject)isrow));
3264:   PetscCall(PetscObjectReference((PetscObject)iscol));
3265:   b->icol = isicol;

3267:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3268:   b->maxnz = nnz_max;

3270:   B->factortype            = MAT_FACTOR_ILUDT;
3271:   B->info.factor_mallocs   = 0;
3272:   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3273:   /*  end of symbolic factorization */

3275:   PetscCall(ISGetIndices(isrow, &r));
3276:   PetscCall(ISGetIndices(isicol, &ic));
3277:   ics = ic;

3279:   /* linked list for storing column indices of the active row */
3280:   nlnk = n + 1;
3281:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

3283:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3284:   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3285:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3286:   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3287:   PetscCall(PetscArrayzero(rtmp, n));

3289:   bi[0]         = 0;
3290:   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3291:   bdiag_rev[n]  = bdiag[0];
3292:   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3293:   for (i = 0; i < n; i++) {
3294:     /* copy initial fill into linked list */
3295:     nzi = ai[r[i] + 1] - ai[r[i]];
3296:     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3297:     nzi_al = adiag[r[i]] - ai[r[i]];
3298:     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3299:     ajtmp  = aj + ai[r[i]];
3300:     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));

3302:     /* load in initial (unfactored row) */
3303:     aatmp = a->a + ai[r[i]];
3304:     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;

3306:     /* add pivot rows into linked list */
3307:     row = lnk[n];
3308:     while (row < i) {
3309:       nzi_bl = bi[row + 1] - bi[row] + 1;
3310:       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3311:       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3312:       nzi += nlnk;
3313:       row = lnk[row];
3314:     }

3316:     /* copy data from lnk into jtmp, then initialize lnk */
3317:     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));

3319:     /* numerical factorization */
3320:     bjtmp = jtmp;
3321:     row   = *bjtmp++; /* 1st pivot row */
3322:     while (row < i) {
3323:       pc         = rtmp + row;
3324:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3325:       multiplier = (*pc) * (*pv);
3326:       *pc        = multiplier;
3327:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3328:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3329:         pv = ba + bdiag[row + 1] + 1;
3330:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3331:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3332:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3333:       }
3334:       row = *bjtmp++;
3335:     }

3337:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3338:     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3339:     nzi_bl   = 0;
3340:     j        = 0;
3341:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3342:       vtmp[j]       = rtmp[jtmp[j]];
3343:       rtmp[jtmp[j]] = 0.0;
3344:       nzi_bl++;
3345:       j++;
3346:     }
3347:     nzi_bu = nzi - nzi_bl - 1;
3348:     while (j < nzi) {
3349:       vtmp[j]       = rtmp[jtmp[j]];
3350:       rtmp[jtmp[j]] = 0.0;
3351:       j++;
3352:     }

3354:     bjtmp = bj + bi[i];
3355:     batmp = ba + bi[i];
3356:     /* apply level dropping rule to L part */
3357:     ncut = nzi_al + dtcount;
3358:     if (ncut < nzi_bl) {
3359:       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3360:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3361:     } else {
3362:       ncut = nzi_bl;
3363:     }
3364:     for (j = 0; j < ncut; j++) {
3365:       bjtmp[j] = jtmp[j];
3366:       batmp[j] = vtmp[j];
3367:     }
3368:     bi[i + 1] = bi[i] + ncut;
3369:     nzi       = ncut + 1;

3371:     /* apply level dropping rule to U part */
3372:     ncut = nzi_au + dtcount;
3373:     if (ncut < nzi_bu) {
3374:       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3375:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3376:     } else {
3377:       ncut = nzi_bu;
3378:     }
3379:     nzi += ncut;

3381:     /* mark bdiagonal */
3382:     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3383:     bdiag_rev[n - i - 1] = bdiag[i + 1];
3384:     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3385:     bjtmp                = bj + bdiag[i];
3386:     batmp                = ba + bdiag[i];
3387:     *bjtmp               = i;
3388:     *batmp               = diag_tmp; /* rtmp[i]; */
3389:     if (*batmp == 0.0) *batmp = dt + shift;
3390:     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */

3392:     bjtmp = bj + bdiag[i + 1] + 1;
3393:     batmp = ba + bdiag[i + 1] + 1;
3394:     for (k = 0; k < ncut; k++) {
3395:       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3396:       batmp[k] = vtmp[nzi_bl + 1 + k];
3397:     }

3399:     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3400:   }              /* for (i=0; i<n; i++) */
3401:   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);

3403:   PetscCall(ISRestoreIndices(isrow, &r));
3404:   PetscCall(ISRestoreIndices(isicol, &ic));

3406:   PetscCall(PetscLLDestroy(lnk, lnkbt));
3407:   PetscCall(PetscFree2(im, jtmp));
3408:   PetscCall(PetscFree2(rtmp, vtmp));
3409:   PetscCall(PetscFree(bdiag_rev));

3411:   PetscCall(PetscLogFlops(B->cmap->n));
3412:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3414:   PetscCall(ISIdentity(isrow, &row_identity));
3415:   PetscCall(ISIdentity(isicol, &icol_identity));
3416:   if (row_identity && icol_identity) {
3417:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3418:   } else {
3419:     B->ops->solve = MatSolve_SeqAIJ;
3420:   }

3422:   B->ops->solveadd          = NULL;
3423:   B->ops->solvetranspose    = NULL;
3424:   B->ops->solvetransposeadd = NULL;
3425:   B->ops->matsolve          = NULL;
3426:   B->assembled              = PETSC_TRUE;
3427:   B->preallocated           = PETSC_TRUE;
3428:   PetscFunctionReturn(PETSC_SUCCESS);
3429: }

3431: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3432: /*
3433:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3434: */

3436: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3437: {
3438:   PetscFunctionBegin;
3439:   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3440:   PetscFunctionReturn(PETSC_SUCCESS);
3441: }

3443: /*
3444:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3445:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3446: */
3447: /*
3448:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3449: */

3451: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3452: {
3453:   Mat             C = fact;
3454:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3455:   IS              isrow = b->row, isicol = b->icol;
3456:   const PetscInt *r, *ic, *ics;
3457:   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3458:   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3459:   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3460:   PetscReal       dt = info->dt, shift = info->shiftamount;
3461:   PetscBool       row_identity, col_identity;

3463:   PetscFunctionBegin;
3464:   PetscCall(ISGetIndices(isrow, &r));
3465:   PetscCall(ISGetIndices(isicol, &ic));
3466:   PetscCall(PetscMalloc1(n + 1, &rtmp));
3467:   ics = ic;

3469:   for (i = 0; i < n; i++) {
3470:     /* initialize rtmp array */
3471:     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3472:     bjtmp = bj + bi[i];
3473:     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3474:     rtmp[i] = 0.0;
3475:     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3476:     bjtmp   = bj + bdiag[i + 1] + 1;
3477:     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;

3479:     /* load in initial unfactored row of A */
3480:     nz    = ai[r[i] + 1] - ai[r[i]];
3481:     ajtmp = aj + ai[r[i]];
3482:     v     = aa + ai[r[i]];
3483:     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];

3485:     /* numerical factorization */
3486:     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3487:     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3488:     k     = 0;
3489:     while (k < nzl) {
3490:       row        = *bjtmp++;
3491:       pc         = rtmp + row;
3492:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3493:       multiplier = (*pc) * (*pv);
3494:       *pc        = multiplier;
3495:       if (PetscAbsScalar(multiplier) > dt) {
3496:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3497:         pv = b->a + bdiag[row + 1] + 1;
3498:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3499:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3500:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3501:       }
3502:       k++;
3503:     }

3505:     /* finished row so stick it into b->a */
3506:     /* L-part */
3507:     pv  = b->a + bi[i];
3508:     pj  = bj + bi[i];
3509:     nzl = bi[i + 1] - bi[i];
3510:     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];

3512:     /* diagonal: invert diagonal entries for simpler triangular solves */
3513:     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3514:     b->a[bdiag[i]] = 1.0 / rtmp[i];

3516:     /* U-part */
3517:     pv  = b->a + bdiag[i + 1] + 1;
3518:     pj  = bj + bdiag[i + 1] + 1;
3519:     nzu = bdiag[i] - bdiag[i + 1] - 1;
3520:     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3521:   }

3523:   PetscCall(PetscFree(rtmp));
3524:   PetscCall(ISRestoreIndices(isicol, &ic));
3525:   PetscCall(ISRestoreIndices(isrow, &r));

3527:   PetscCall(ISIdentity(isrow, &row_identity));
3528:   PetscCall(ISIdentity(isicol, &col_identity));
3529:   if (row_identity && col_identity) {
3530:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3531:   } else {
3532:     C->ops->solve = MatSolve_SeqAIJ;
3533:   }
3534:   C->ops->solveadd          = NULL;
3535:   C->ops->solvetranspose    = NULL;
3536:   C->ops->solvetransposeadd = NULL;
3537:   C->ops->matsolve          = NULL;
3538:   C->assembled              = PETSC_TRUE;
3539:   C->preallocated           = PETSC_TRUE;

3541:   PetscCall(PetscLogFlops(C->cmap->n));
3542:   PetscFunctionReturn(PETSC_SUCCESS);
3543: }
3544: #endif