Actual source code: kaij.c


  2: /*
  3:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  4:   This format is used to evaluate matrices of the form:

  6:     [I \otimes S + A \otimes T]

  8:   where
  9:     S is a dense (p \times q) matrix
 10:     T is a dense (p \times q) matrix
 11:     A is an AIJ  (n \times n) matrix
 12:     I is the identity matrix

 14:   The resulting matrix is (np \times nq)

 16:   We provide:
 17:      MatMult()
 18:      MatMultAdd()
 19:      MatInvertBlockDiagonal()
 20:   and
 21:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 23:   This single directory handles both the sequential and parallel codes
 24: */

 26: #include <../src/mat/impls/kaij/kaij.h>
 27: #include <../src/mat/utils/freespace.h>
 28: #include <petsc/private/vecimpl.h>

 30: /*@C
 31:    MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

 33:    Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

 35:    Input Parameter:
 36: .  A - the `MATKAIJ` matrix

 38:    Output Parameter:
 39: .  B - the `MATAIJ` matrix

 41:    Level: advanced

 43:    Note:
 44:    The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 46: .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
 47: @*/
 48: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
 49: {
 50:   PetscBool ismpikaij, isseqkaij;

 52:   PetscFunctionBegin;
 53:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
 54:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
 55:   if (ismpikaij) {
 56:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

 58:     *B = b->A;
 59:   } else if (isseqkaij) {
 60:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

 62:     *B = b->AIJ;
 63:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }

 67: /*@C
 68:    MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix

 70:    Not Collective; the entire `S` is stored and returned independently on all processes.

 72:    Input Parameter:
 73: .  A - the `MATKAIJ` matrix

 75:    Output Parameters:
 76: +  m - the number of rows in `S`
 77: .  n - the number of columns in `S`
 78: -  S - the S matrix, in form of a scalar array in column-major format

 80:    Level: advanced

 82:    Note:
 83:    All output parameters are optional (pass `NULL` if not desired)

 85: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
 86: @*/
 87: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
 88: {
 89:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
 90:   PetscFunctionBegin;
 91:   if (m) *m = b->p;
 92:   if (n) *n = b->q;
 93:   if (S) *S = b->S;
 94:   PetscFunctionReturn(PETSC_SUCCESS);
 95: }

 97: /*@C
 98:    MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix

100:    Not Collective; the entire `S` is stored and returned independently on all processes.

102:    Input Parameter:
103: .  A - the `MATKAIJ` matrix

105:    Output Parameters:
106: +  m - the number of rows in `S`
107: .  n - the number of columns in `S`
108: -  S - the S matrix, in form of a scalar array in column-major format

110:    Level: advanced

112:    Note:
113:    All output parameters are optional (pass `NULL` if not desired)

115: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116: @*/
117: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
118: {
119:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120:   PetscFunctionBegin;
121:   if (m) *m = b->p;
122:   if (n) *n = b->q;
123:   if (S) *S = b->S;
124:   PetscFunctionReturn(PETSC_SUCCESS);
125: }

127: /*@C
128:   MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`

130:   Not Collective

132:   Input Parameters:
133: + A - the `MATKAIJ` matrix
134: - S - location of pointer to array obtained with `MatKAIJGetS()`

136:   Level: advanced

138:   Note:
139:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
140:   If `NULL` is passed, it will not attempt to zero the array pointer.

142: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
143: @*/
144: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
145: {
146:   PetscFunctionBegin;
147:   if (S) *S = NULL;
148:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
149:   PetscFunctionReturn(PETSC_SUCCESS);
150: }

152: /*@C
153:   MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`

155:   Not Collective

157:   Input Parameters:
158: + A - the `MATKAIJ` matrix
159: - S - location of pointer to array obtained with `MatKAIJGetS()`

161:   Level: advanced

163:   Note:
164:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
165:   If `NULL` is passed, it will not attempt to zero the array pointer.

167: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
170: {
171:   PetscFunctionBegin;
172:   if (S) *S = NULL;
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: /*@C
177:    MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix

179:    Not Collective; the entire `T` is stored and returned independently on all processes

181:    Input Parameter:
182: .  A - the `MATKAIJ` matrix

184:    Output Parameters:
185: +  m - the number of rows in `T`
186: .  n - the number of columns in `T`
187: -  T - the T matrix, in form of a scalar array in column-major format

189:    Level: advanced

191:    Note:
192:    All output parameters are optional (pass `NULL` if not desired)

194: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
195: @*/
196: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
197: {
198:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
199:   PetscFunctionBegin;
200:   if (m) *m = b->p;
201:   if (n) *n = b->q;
202:   if (T) *T = b->T;
203:   PetscFunctionReturn(PETSC_SUCCESS);
204: }

206: /*@C
207:    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix

209:    Not Collective; the entire `T` is stored and returned independently on all processes

211:    Input Parameter:
212: .  A - the `MATKAIJ` matrix

214:    Output Parameters:
215: +  m - the number of rows in `T`
216: .  n - the number of columns in `T`
217: -  T - the T matrix, in form of a scalar array in column-major format

219:    Level: advanced

221:    Note:
222:    All output parameters are optional (pass `NULL` if not desired)

224: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
225: @*/
226: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
227: {
228:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
229:   PetscFunctionBegin;
230:   if (m) *m = b->p;
231:   if (n) *n = b->q;
232:   if (T) *T = b->T;
233:   PetscFunctionReturn(PETSC_SUCCESS);
234: }

236: /*@C
237:   MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`

239:   Not Collective

241:   Input Parameters:
242: + A - the `MATKAIJ` matrix
243: - T - location of pointer to array obtained with `MatKAIJGetS()`

245:   Level: advanced

247:   Note:
248:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
249:   If `NULL` is passed, it will not attempt to zero the array pointer.

251: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
252: @*/
253: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
254: {
255:   PetscFunctionBegin;
256:   if (T) *T = NULL;
257:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
258:   PetscFunctionReturn(PETSC_SUCCESS);
259: }

261: /*@C
262:   MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`

264:   Not Collective

266:   Input Parameters:
267: + A - the `MATKAIJ` matrix
268: - T - location of pointer to array obtained with `MatKAIJGetS()`

270:   Level: advanced

272:   Note:
273:   This routine zeros the array pointer to prevent accidental reuse after it has been restored.
274:   If `NULL` is passed, it will not attempt to zero the array pointer.

276: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
279: {
280:   PetscFunctionBegin;
281:   if (T) *T = NULL;
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: /*@
286:    MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix

288:    Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel

290:    Input Parameters:
291: +  A - the `MATKAIJ` matrix
292: -  B - the `MATAIJ` matrix

294:    Level: advanced

296:    Notes:
297:    This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

299:    Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

301: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
302: @*/
303: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
304: {
305:   PetscMPIInt size;
306:   PetscBool   flg;

308:   PetscFunctionBegin;
309:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
310:   if (size == 1) {
311:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
312:     PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
313:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
314:     a->AIJ         = B;
315:   } else {
316:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
317:     a->A           = B;
318:   }
319:   PetscCall(PetscObjectReference((PetscObject)B));
320:   PetscFunctionReturn(PETSC_SUCCESS);
321: }

323: /*@C
324:    MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix

326:    Logically Collective; the entire `S` is stored independently on all processes.

328:    Input Parameters:
329: +  A - the `MATKAIJ` matrix
330: .  p - the number of rows in `S`
331: .  q - the number of columns in `S`
332: -  S - the S matrix, in form of a scalar array in column-major format

334:    Level: advanced

336:    Notes:
337:    The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.

339:    The `S` matrix is copied, so the user can destroy this array.

341: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
342: @*/
343: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
344: {
345:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;

347:   PetscFunctionBegin;
348:   PetscCall(PetscFree(a->S));
349:   if (S) {
350:     PetscCall(PetscMalloc1(p * q, &a->S));
351:     PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
352:   } else a->S = NULL;

354:   a->p = p;
355:   a->q = q;
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }

359: /*@C
360:    MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.

362:    Logically Collective.

364:    Input Parameter:
365: .  A - the `MATKAIJ` matrix

367:   Output Parameter:
368: .  identity - the Boolean value

370:    Level: Advanced

372: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
373: @*/
374: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
375: {
376:   Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
377:   PetscInt     i, j;

379:   PetscFunctionBegin;
380:   if (a->p != a->q) {
381:     *identity = PETSC_FALSE;
382:     PetscFunctionReturn(PETSC_SUCCESS);
383:   } else *identity = PETSC_TRUE;
384:   if (!a->isTI || a->S) {
385:     for (i = 0; i < a->p && *identity; i++) {
386:       for (j = 0; j < a->p && *identity; j++) {
387:         if (i != j) {
388:           if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389:           if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
390:         } else {
391:           if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392:           if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
393:         }
394:       }
395:     }
396:   }
397:   PetscFunctionReturn(PETSC_SUCCESS);
398: }

400: /*@C
401:    MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix

403:    Logically Collective; the entire `T` is stored independently on all processes.

405:    Input Parameters:
406: +  A - the `MATKAIJ` matrix
407: .  p - the number of rows in `S`
408: .  q - the number of columns in `S`
409: -  T - the `T` matrix, in form of a scalar array in column-major format

411:    Level: Advanced

413:    Notes:
414:    The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.

416:    The `T` matrix is copied, so the user can destroy this array.

418: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
419: @*/
420: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
421: {
422:   PetscInt     i, j;
423:   Mat_SeqKAIJ *a    = (Mat_SeqKAIJ *)A->data;
424:   PetscBool    isTI = PETSC_FALSE;

426:   PetscFunctionBegin;
427:   /* check if T is an identity matrix */
428:   if (T && (p == q)) {
429:     isTI = PETSC_TRUE;
430:     for (i = 0; i < p; i++) {
431:       for (j = 0; j < q; j++) {
432:         if (i == j) {
433:           /* diagonal term must be 1 */
434:           if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
435:         } else {
436:           /* off-diagonal term must be 0 */
437:           if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
438:         }
439:       }
440:     }
441:   }
442:   a->isTI = isTI;

444:   PetscCall(PetscFree(a->T));
445:   if (T && (!isTI)) {
446:     PetscCall(PetscMalloc1(p * q, &a->T));
447:     PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
448:   } else a->T = NULL;

450:   a->p = p;
451:   a->q = q;
452:   PetscFunctionReturn(PETSC_SUCCESS);
453: }

455: static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
456: {
457:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;

459:   PetscFunctionBegin;
460:   PetscCall(MatDestroy(&b->AIJ));
461:   PetscCall(PetscFree(b->S));
462:   PetscCall(PetscFree(b->T));
463:   PetscCall(PetscFree(b->ibdiag));
464:   PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
465:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
466:   PetscCall(PetscFree(A->data));
467:   PetscFunctionReturn(PETSC_SUCCESS);
468: }

470: static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
471: {
472:   Mat_MPIKAIJ     *a;
473:   Mat_MPIAIJ      *mpiaij;
474:   PetscScalar     *T;
475:   PetscInt         i, j;
476:   PetscObjectState state;

478:   PetscFunctionBegin;
479:   a      = (Mat_MPIKAIJ *)A->data;
480:   mpiaij = (Mat_MPIAIJ *)a->A->data;

482:   PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
483:   if (state == a->state) {
484:     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
485:     PetscFunctionReturn(PETSC_SUCCESS);
486:   } else {
487:     PetscCall(MatDestroy(&a->AIJ));
488:     PetscCall(MatDestroy(&a->OAIJ));
489:     if (a->isTI) {
490:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
491:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
492:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
493:        * to pass in. */
494:       PetscCall(PetscMalloc1(a->p * a->q, &T));
495:       for (i = 0; i < a->p; i++) {
496:         for (j = 0; j < a->q; j++) {
497:           if (i == j) T[i + j * a->p] = 1.0;
498:           else T[i + j * a->p] = 0.0;
499:         }
500:       }
501:     } else T = a->T;
502:     PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
503:     PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
504:     if (a->isTI) PetscCall(PetscFree(T));
505:     a->state = state;
506:   }

508:   PetscFunctionReturn(PETSC_SUCCESS);
509: }

511: static PetscErrorCode MatSetUp_KAIJ(Mat A)
512: {
513:   PetscInt     n;
514:   PetscMPIInt  size;
515:   Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;

517:   PetscFunctionBegin;
518:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
519:   if (size == 1) {
520:     PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
521:     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
522:     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
523:     PetscCall(PetscLayoutSetUp(A->rmap));
524:     PetscCall(PetscLayoutSetUp(A->cmap));
525:   } else {
526:     Mat_MPIKAIJ *a;
527:     Mat_MPIAIJ  *mpiaij;
528:     IS           from, to;
529:     Vec          gvec;

531:     a      = (Mat_MPIKAIJ *)A->data;
532:     mpiaij = (Mat_MPIAIJ *)a->A->data;
533:     PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
534:     PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
535:     PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
536:     PetscCall(PetscLayoutSetUp(A->rmap));
537:     PetscCall(PetscLayoutSetUp(A->cmap));

539:     PetscCall(MatKAIJ_build_AIJ_OAIJ(A));

541:     PetscCall(VecGetSize(mpiaij->lvec, &n));
542:     PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
543:     PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
544:     PetscCall(VecSetBlockSize(a->w, a->q));
545:     PetscCall(VecSetType(a->w, VECSEQ));

547:     /* create two temporary Index sets for build scatter gather */
548:     PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
549:     PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));

551:     /* create temporary global vector to generate scatter context */
552:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));

554:     /* generate the scatter context */
555:     PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));

557:     PetscCall(ISDestroy(&from));
558:     PetscCall(ISDestroy(&to));
559:     PetscCall(VecDestroy(&gvec));
560:   }

562:   A->assembled = PETSC_TRUE;
563:   PetscFunctionReturn(PETSC_SUCCESS);
564: }

566: static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
567: {
568:   PetscViewerFormat format;
569:   Mat_SeqKAIJ      *a = (Mat_SeqKAIJ *)A->data;
570:   Mat               B;
571:   PetscInt          i;
572:   PetscBool         ismpikaij;

574:   PetscFunctionBegin;
575:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
576:   PetscCall(PetscViewerGetFormat(viewer, &format));
577:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578:     PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));

580:     /* Print appropriate details for S. */
581:     if (!a->S) {
582:       PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
583:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584:       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
585:       for (i = 0; i < (a->p * a->q); i++) {
586: #if defined(PETSC_USE_COMPLEX)
587:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
588: #else
589:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
590: #endif
591:       }
592:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
593:     }

595:     /* Print appropriate details for T. */
596:     if (a->isTI) {
597:       PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
598:     } else if (!a->T) {
599:       PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
600:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601:       PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
602:       for (i = 0; i < (a->p * a->q); i++) {
603: #if defined(PETSC_USE_COMPLEX)
604:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
605: #else
606:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
607: #endif
608:       }
609:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
610:     }

612:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
613:     PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
614:     if (ismpikaij) {
615:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
616:       PetscCall(MatView(b->A, viewer));
617:     } else {
618:       PetscCall(MatView(a->AIJ, viewer));
619:     }

621:   } else {
622:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
624:     PetscCall(MatView(B, viewer));
625:     PetscCall(MatDestroy(&B));
626:   }
627:   PetscFunctionReturn(PETSC_SUCCESS);
628: }

630: static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
631: {
632:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

634:   PetscFunctionBegin;
635:   PetscCall(MatDestroy(&b->AIJ));
636:   PetscCall(MatDestroy(&b->OAIJ));
637:   PetscCall(MatDestroy(&b->A));
638:   PetscCall(VecScatterDestroy(&b->ctx));
639:   PetscCall(VecDestroy(&b->w));
640:   PetscCall(PetscFree(b->S));
641:   PetscCall(PetscFree(b->T));
642:   PetscCall(PetscFree(b->ibdiag));
643:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
644:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
645:   PetscCall(PetscFree(A->data));
646:   PetscFunctionReturn(PETSC_SUCCESS);
647: }

649: /* zz = yy + Axx */
650: static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
651: {
652:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
653:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
654:   const PetscScalar *s = b->S, *t = b->T;
655:   const PetscScalar *x, *v, *bx;
656:   PetscScalar       *y, *sums;
657:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
658:   PetscInt           n, i, jrow, j, l, p = b->p, q = b->q, k;

660:   PetscFunctionBegin;
661:   if (!yy) {
662:     PetscCall(VecSet(zz, 0.0));
663:   } else {
664:     PetscCall(VecCopy(yy, zz));
665:   }
666:   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);

668:   PetscCall(VecGetArrayRead(xx, &x));
669:   PetscCall(VecGetArray(zz, &y));
670:   idx = a->j;
671:   v   = a->a;
672:   ii  = a->i;

674:   if (b->isTI) {
675:     for (i = 0; i < m; i++) {
676:       jrow = ii[i];
677:       n    = ii[i + 1] - jrow;
678:       sums = y + p * i;
679:       for (j = 0; j < n; j++) {
680:         for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
681:       }
682:     }
683:     PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
684:   } else if (t) {
685:     for (i = 0; i < m; i++) {
686:       jrow = ii[i];
687:       n    = ii[i + 1] - jrow;
688:       sums = y + p * i;
689:       for (j = 0; j < n; j++) {
690:         for (k = 0; k < p; k++) {
691:           for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
692:         }
693:       }
694:     }
695:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
696:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
697:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
698:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
699:     PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
700:   }
701:   if (s) {
702:     for (i = 0; i < m; i++) {
703:       sums = y + p * i;
704:       bx   = x + q * i;
705:       if (i < b->AIJ->cmap->n) {
706:         for (j = 0; j < q; j++) {
707:           for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
708:         }
709:       }
710:     }
711:     PetscCall(PetscLogFlops(2.0 * m * p * q));
712:   }

714:   PetscCall(VecRestoreArrayRead(xx, &x));
715:   PetscCall(VecRestoreArray(zz, &y));
716:   PetscFunctionReturn(PETSC_SUCCESS);
717: }

719: static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
720: {
721:   PetscFunctionBegin;
722:   PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
723:   PetscFunctionReturn(PETSC_SUCCESS);
724: }

726: #include <petsc/private/kernels/blockinvert.h>

728: static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
729: {
730:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ *)A->data;
731:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
732:   const PetscScalar *S = b->S;
733:   const PetscScalar *T = b->T;
734:   const PetscScalar *v = a->a;
735:   const PetscInt     p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
736:   PetscInt           i, j, *v_pivots, dof, dof2;
737:   PetscScalar       *diag, aval, *v_work;

739:   PetscFunctionBegin;
740:   PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
741:   PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");

743:   dof  = p;
744:   dof2 = dof * dof;

746:   if (b->ibdiagvalid) {
747:     if (values) *values = b->ibdiag;
748:     PetscFunctionReturn(PETSC_SUCCESS);
749:   }
750:   if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
751:   if (values) *values = b->ibdiag;
752:   diag = b->ibdiag;

754:   PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
755:   for (i = 0; i < m; i++) {
756:     if (S) {
757:       PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
758:     } else {
759:       PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
760:     }
761:     if (b->isTI) {
762:       aval = 0;
763:       for (j = ii[i]; j < ii[i + 1]; j++)
764:         if (idx[j] == i) aval = v[j];
765:       for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
766:     } else if (T) {
767:       aval = 0;
768:       for (j = ii[i]; j < ii[i + 1]; j++)
769:         if (idx[j] == i) aval = v[j];
770:       for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
771:     }
772:     PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
773:     diag += dof2;
774:   }
775:   PetscCall(PetscFree2(v_work, v_pivots));

777:   b->ibdiagvalid = PETSC_TRUE;
778:   PetscFunctionReturn(PETSC_SUCCESS);
779: }

781: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
782: {
783:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;

785:   PetscFunctionBegin;
786:   *B = kaij->AIJ;
787:   PetscFunctionReturn(PETSC_SUCCESS);
788: }

790: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
791: {
792:   Mat_SeqKAIJ   *a = (Mat_SeqKAIJ *)A->data;
793:   Mat            AIJ, OAIJ, B;
794:   PetscInt      *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
795:   const PetscInt p = a->p, q = a->q;
796:   PetscBool      ismpikaij, missing;

798:   PetscFunctionBegin;
799:   if (reuse != MAT_REUSE_MATRIX) {
800:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
801:     if (ismpikaij) {
802:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
803:       AIJ            = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
804:       OAIJ           = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
805:     } else {
806:       AIJ  = a->AIJ;
807:       OAIJ = NULL;
808:     }
809:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
810:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
811:     PetscCall(MatSetType(B, MATAIJ));
812:     PetscCall(MatGetSize(AIJ, &m, NULL));
813:     PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
814:     if (!missing || !a->S) d = m;
815:     PetscCall(PetscMalloc1(m * p, &d_nnz));
816:     for (i = 0; i < m; ++i) {
817:       PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
818:       for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
819:       PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820:     }
821:     if (OAIJ) {
822:       PetscCall(PetscMalloc1(m * p, &o_nnz));
823:       for (i = 0; i < m; ++i) {
824:         PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
825:         for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
826:         PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827:       }
828:       PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
829:     } else {
830:       PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
831:     }
832:     PetscCall(PetscFree(d_nnz));
833:     PetscCall(PetscFree(o_nnz));
834:   } else B = *newmat;
835:   PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
836:   if (reuse == MAT_INPLACE_MATRIX) {
837:     PetscCall(MatHeaderReplace(A, &B));
838:   } else *newmat = B;
839:   PetscFunctionReturn(PETSC_SUCCESS);
840: }

842: static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
843: {
844:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ *)A->data;
845:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ *)kaij->AIJ->data;
846:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
847:   const PetscInt     m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
848:   const PetscScalar *b, *xb, *idiag;
849:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
850:   PetscInt           i, j, k, i2, bs, bs2, nz;

852:   PetscFunctionBegin;
853:   its = its * lits;
854:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
855:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
856:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
857:   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
858:   PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
859:   bs  = p;
860:   bs2 = bs * bs;

862:   if (!m) PetscFunctionReturn(PETSC_SUCCESS);

864:   if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
865:   idiag = kaij->ibdiag;
866:   diag  = a->diag;

868:   if (!kaij->sor.setup) {
869:     PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
870:     kaij->sor.setup = PETSC_TRUE;
871:   }
872:   y    = kaij->sor.y;
873:   w    = kaij->sor.w;
874:   work = kaij->sor.work;
875:   t    = kaij->sor.t;
876:   arr  = kaij->sor.arr;

878:   PetscCall(VecGetArray(xx, &x));
879:   PetscCall(VecGetArrayRead(bb, &b));

881:   if (flag & SOR_ZERO_INITIAL_GUESS) {
882:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
883:       PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
884:       PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
885:       i2 = bs;
886:       idiag += bs2;
887:       for (i = 1; i < m; i++) {
888:         v  = aa + ai[i];
889:         vi = aj + ai[i];
890:         nz = diag[i] - ai[i];

892:         if (T) { /* b - T (Arow * x) */
893:           PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
894:           for (j = 0; j < nz; j++) {
895:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
896:           }
897:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
898:           for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
899:         } else if (kaij->isTI) {
900:           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
901:           for (j = 0; j < nz; j++) {
902:             for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
903:           }
904:         } else {
905:           PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
906:         }

908:         PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
909:         for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];

911:         idiag += bs2;
912:         i2 += bs;
913:       }
914:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
915:       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
916:       xb = t;
917:     } else xb = b;
918:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
919:       idiag = kaij->ibdiag + bs2 * (m - 1);
920:       i2    = bs * (m - 1);
921:       PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
922:       PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
923:       i2 -= bs;
924:       idiag -= bs2;
925:       for (i = m - 2; i >= 0; i--) {
926:         v  = aa + diag[i] + 1;
927:         vi = aj + diag[i] + 1;
928:         nz = ai[i + 1] - diag[i] - 1;

930:         if (T) { /* FIXME: This branch untested */
931:           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
932:           /* copy all rows of x that are needed into contiguous space */
933:           workt = work;
934:           for (j = 0; j < nz; j++) {
935:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
936:             workt += bs;
937:           }
938:           arrt = arr;
939:           for (j = 0; j < nz; j++) {
940:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
941:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
942:             arrt += bs2;
943:           }
944:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
945:         } else if (kaij->isTI) {
946:           PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
947:           for (j = 0; j < nz; j++) {
948:             for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
949:           }
950:         }

952:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
953:         for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];

955:         idiag -= bs2;
956:         i2 -= bs;
957:       }
958:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
959:     }
960:     its--;
961:   }
962:   while (its--) { /* FIXME: This branch not updated */
963:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
964:       i2    = 0;
965:       idiag = kaij->ibdiag;
966:       for (i = 0; i < m; i++) {
967:         PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));

969:         v     = aa + ai[i];
970:         vi    = aj + ai[i];
971:         nz    = diag[i] - ai[i];
972:         workt = work;
973:         for (j = 0; j < nz; j++) {
974:           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
975:           workt += bs;
976:         }
977:         arrt = arr;
978:         if (T) {
979:           for (j = 0; j < nz; j++) {
980:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
981:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
982:             arrt += bs2;
983:           }
984:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
985:         } else if (kaij->isTI) {
986:           for (j = 0; j < nz; j++) {
987:             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
988:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
989:             arrt += bs2;
990:           }
991:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
992:         }
993:         PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));

995:         v     = aa + diag[i] + 1;
996:         vi    = aj + diag[i] + 1;
997:         nz    = ai[i + 1] - diag[i] - 1;
998:         workt = work;
999:         for (j = 0; j < nz; j++) {
1000:           PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1001:           workt += bs;
1002:         }
1003:         arrt = arr;
1004:         if (T) {
1005:           for (j = 0; j < nz; j++) {
1006:             PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1007:             for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1008:             arrt += bs2;
1009:           }
1010:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1011:         } else if (kaij->isTI) {
1012:           for (j = 0; j < nz; j++) {
1013:             PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1014:             for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1015:             arrt += bs2;
1016:           }
1017:           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1018:         }

1020:         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1021:         for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);

1023:         idiag += bs2;
1024:         i2 += bs;
1025:       }
1026:       xb = t;
1027:     } else xb = b;
1028:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029:       idiag = kaij->ibdiag + bs2 * (m - 1);
1030:       i2    = bs * (m - 1);
1031:       if (xb == b) {
1032:         for (i = m - 1; i >= 0; i--) {
1033:           PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));

1035:           v     = aa + ai[i];
1036:           vi    = aj + ai[i];
1037:           nz    = diag[i] - ai[i];
1038:           workt = work;
1039:           for (j = 0; j < nz; j++) {
1040:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1041:             workt += bs;
1042:           }
1043:           arrt = arr;
1044:           if (T) {
1045:             for (j = 0; j < nz; j++) {
1046:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1047:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1048:               arrt += bs2;
1049:             }
1050:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1051:           } else if (kaij->isTI) {
1052:             for (j = 0; j < nz; j++) {
1053:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1054:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1055:               arrt += bs2;
1056:             }
1057:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1058:           }

1060:           v     = aa + diag[i] + 1;
1061:           vi    = aj + diag[i] + 1;
1062:           nz    = ai[i + 1] - diag[i] - 1;
1063:           workt = work;
1064:           for (j = 0; j < nz; j++) {
1065:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1066:             workt += bs;
1067:           }
1068:           arrt = arr;
1069:           if (T) {
1070:             for (j = 0; j < nz; j++) {
1071:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1072:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1073:               arrt += bs2;
1074:             }
1075:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1076:           } else if (kaij->isTI) {
1077:             for (j = 0; j < nz; j++) {
1078:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1079:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1080:               arrt += bs2;
1081:             }
1082:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1083:           }

1085:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1086:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1087:         }
1088:       } else {
1089:         for (i = m - 1; i >= 0; i--) {
1090:           PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1091:           v     = aa + diag[i] + 1;
1092:           vi    = aj + diag[i] + 1;
1093:           nz    = ai[i + 1] - diag[i] - 1;
1094:           workt = work;
1095:           for (j = 0; j < nz; j++) {
1096:             PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1097:             workt += bs;
1098:           }
1099:           arrt = arr;
1100:           if (T) {
1101:             for (j = 0; j < nz; j++) {
1102:               PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1103:               for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1104:               arrt += bs2;
1105:             }
1106:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1107:           } else if (kaij->isTI) {
1108:             for (j = 0; j < nz; j++) {
1109:               PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1110:               for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1111:               arrt += bs2;
1112:             }
1113:             PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1114:           }
1115:           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1116:           for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1117:         }
1118:       }
1119:       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1120:     }
1121:   }

1123:   PetscCall(VecRestoreArray(xx, &x));
1124:   PetscCall(VecRestoreArrayRead(bb, &b));
1125:   PetscFunctionReturn(PETSC_SUCCESS);
1126: }

1128: /*===================================================================================*/

1130: static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1131: {
1132:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1134:   PetscFunctionBegin;
1135:   if (!yy) {
1136:     PetscCall(VecSet(zz, 0.0));
1137:   } else {
1138:     PetscCall(VecCopy(yy, zz));
1139:   }
1140:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1141:   /* start the scatter */
1142:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1143:   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1144:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1145:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1146:   PetscFunctionReturn(PETSC_SUCCESS);
1147: }

1149: static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1150: {
1151:   PetscFunctionBegin;
1152:   PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1153:   PetscFunctionReturn(PETSC_SUCCESS);
1154: }

1156: static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1157: {
1158:   Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;

1160:   PetscFunctionBegin;
1161:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1162:   PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1163:   PetscFunctionReturn(PETSC_SUCCESS);
1164: }

1166: static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1167: {
1168:   Mat_SeqKAIJ *b    = (Mat_SeqKAIJ *)A->data;
1169:   PetscBool    diag = PETSC_FALSE;
1170:   PetscInt     nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1171:   PetscScalar *vaij, *v, *S = b->S, *T = b->T;

1173:   PetscFunctionBegin;
1174:   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1175:   b->getrowactive = PETSC_TRUE;
1176:   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);

1178:   if ((!S) && (!T) && (!b->isTI)) {
1179:     if (ncols) *ncols = 0;
1180:     if (cols) *cols = NULL;
1181:     if (values) *values = NULL;
1182:     PetscFunctionReturn(PETSC_SUCCESS);
1183:   }

1185:   if (T || b->isTI) {
1186:     PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1187:     c = nzaij;
1188:     for (i = 0; i < nzaij; i++) {
1189:       /* check if this row contains a diagonal entry */
1190:       if (colsaij[i] == r) {
1191:         diag = PETSC_TRUE;
1192:         c    = i;
1193:       }
1194:     }
1195:   } else nzaij = c = 0;

1197:   /* calculate size of row */
1198:   nz = 0;
1199:   if (S) nz += q;
1200:   if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);

1202:   if (cols || values) {
1203:     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1204:     for (i = 0; i < q; i++) {
1205:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1206:       v[i] = 0.0;
1207:     }
1208:     if (b->isTI) {
1209:       for (i = 0; i < nzaij; i++) {
1210:         for (j = 0; j < q; j++) {
1211:           idx[i * q + j] = colsaij[i] * q + j;
1212:           v[i * q + j]   = (j == s ? vaij[i] : 0);
1213:         }
1214:       }
1215:     } else if (T) {
1216:       for (i = 0; i < nzaij; i++) {
1217:         for (j = 0; j < q; j++) {
1218:           idx[i * q + j] = colsaij[i] * q + j;
1219:           v[i * q + j]   = vaij[i] * T[s + j * p];
1220:         }
1221:       }
1222:     }
1223:     if (S) {
1224:       for (j = 0; j < q; j++) {
1225:         idx[c * q + j] = r * q + j;
1226:         v[c * q + j] += S[s + j * p];
1227:       }
1228:     }
1229:   }

1231:   if (ncols) *ncols = nz;
1232:   if (cols) *cols = idx;
1233:   if (values) *values = v;
1234:   PetscFunctionReturn(PETSC_SUCCESS);
1235: }

1237: static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1238: {
1239:   PetscFunctionBegin;
1240:   if (nz) *nz = 0;
1241:   PetscCall(PetscFree2(*idx, *v));
1242:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1243:   PetscFunctionReturn(PETSC_SUCCESS);
1244: }

1246: static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1247: {
1248:   Mat_MPIKAIJ   *b    = (Mat_MPIKAIJ *)A->data;
1249:   Mat            AIJ  = b->A;
1250:   PetscBool      diag = PETSC_FALSE;
1251:   Mat            MatAIJ, MatOAIJ;
1252:   const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1253:   PetscInt       nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1254:   PetscScalar   *v, *vals, *ovals, *S = b->S, *T = b->T;

1256:   PetscFunctionBegin;
1257:   PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1258:   MatAIJ  = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1259:   MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1260:   PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1261:   b->getrowactive = PETSC_TRUE;
1262:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1263:   lrow = row - rstart;

1265:   if ((!S) && (!T) && (!b->isTI)) {
1266:     if (ncols) *ncols = 0;
1267:     if (cols) *cols = NULL;
1268:     if (values) *values = NULL;
1269:     PetscFunctionReturn(PETSC_SUCCESS);
1270:   }

1272:   r = lrow / p;
1273:   s = lrow % p;

1275:   if (T || b->isTI) {
1276:     PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1277:     PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1278:     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));

1280:     c = ncolsaij + ncolsoaij;
1281:     for (i = 0; i < ncolsaij; i++) {
1282:       /* check if this row contains a diagonal entry */
1283:       if (colsaij[i] == r) {
1284:         diag = PETSC_TRUE;
1285:         c    = i;
1286:       }
1287:     }
1288:   } else c = 0;

1290:   /* calculate size of row */
1291:   nz = 0;
1292:   if (S) nz += q;
1293:   if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);

1295:   if (cols || values) {
1296:     PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1297:     for (i = 0; i < q; i++) {
1298:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1299:       v[i] = 0.0;
1300:     }
1301:     if (b->isTI) {
1302:       for (i = 0; i < ncolsaij; i++) {
1303:         for (j = 0; j < q; j++) {
1304:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1305:           v[i * q + j]   = (j == s ? vals[i] : 0.0);
1306:         }
1307:       }
1308:       for (i = 0; i < ncolsoaij; i++) {
1309:         for (j = 0; j < q; j++) {
1310:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1311:           v[(i + ncolsaij) * q + j]   = (j == s ? ovals[i] : 0.0);
1312:         }
1313:       }
1314:     } else if (T) {
1315:       for (i = 0; i < ncolsaij; i++) {
1316:         for (j = 0; j < q; j++) {
1317:           idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1318:           v[i * q + j]   = vals[i] * T[s + j * p];
1319:         }
1320:       }
1321:       for (i = 0; i < ncolsoaij; i++) {
1322:         for (j = 0; j < q; j++) {
1323:           idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1324:           v[(i + ncolsaij) * q + j]   = ovals[i] * T[s + j * p];
1325:         }
1326:       }
1327:     }
1328:     if (S) {
1329:       for (j = 0; j < q; j++) {
1330:         idx[c * q + j] = (r + rstart / p) * q + j;
1331:         v[c * q + j] += S[s + j * p];
1332:       }
1333:     }
1334:   }

1336:   if (ncols) *ncols = nz;
1337:   if (cols) *cols = idx;
1338:   if (values) *values = v;
1339:   PetscFunctionReturn(PETSC_SUCCESS);
1340: }

1342: static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1343: {
1344:   PetscFunctionBegin;
1345:   PetscCall(PetscFree2(*idx, *v));
1346:   ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1347:   PetscFunctionReturn(PETSC_SUCCESS);
1348: }

1350: static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1351: {
1352:   Mat A;

1354:   PetscFunctionBegin;
1355:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1356:   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1357:   PetscCall(MatDestroy(&A));
1358:   PetscFunctionReturn(PETSC_SUCCESS);
1359: }

1361: /*@C
1362:   MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form
1363: .vb
1364:     [I \otimes S + A \otimes T]
1365: .ve
1366:   where
1367: .vb
1368:     S is a dense (p \times q) matrix
1369:     T is a dense (p \times q) matrix
1370:     A is a `MATAIJ`  (n \times n) matrix
1371:     I is the identity matrix
1372: .ve
1373:   The resulting matrix is (np \times nq)

1375:   `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in column-major format.

1377:   Collective

1379:   Input Parameters:
1380: + A - the `MATAIJ` matrix
1381: . p - number of rows in `S` and `T`
1382: . q - number of columns in `S` and `T`
1383: . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1384: - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)

1386:   Output Parameter:
1387: . kaij - the new `MATKAIJ` matrix

1389:   Level: advanced

1391:   Notes:
1392:   This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.

1394:   Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.

1396:   Developer Note:
1397:   In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1398:   of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1399:   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1400:   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).

1402: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1403: @*/
1404: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1405: {
1406:   PetscFunctionBegin;
1407:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1408:   PetscCall(MatSetType(*kaij, MATKAIJ));
1409:   PetscCall(MatKAIJSetAIJ(*kaij, A));
1410:   PetscCall(MatKAIJSetS(*kaij, p, q, S));
1411:   PetscCall(MatKAIJSetT(*kaij, p, q, T));
1412:   PetscCall(MatSetUp(*kaij));
1413:   PetscFunctionReturn(PETSC_SUCCESS);
1414: }

1416: /*MC
1417:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1418:     [I \otimes S + A \otimes T],
1419:   where
1420: .vb
1421:     S is a dense (p \times q) matrix,
1422:     T is a dense (p \times q) matrix,
1423:     A is an AIJ  (n \times n) matrix,
1424:     and I is the identity matrix.
1425: .ve
1426:   The resulting matrix is (np \times nq).

1428:   S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.

1430:   Level: advanced

1432:   Note:
1433:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1434:   where x and b are column vectors containing the row-major representations of X and B.

1436: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1437: M*/

1439: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1440: {
1441:   Mat_MPIKAIJ *b;
1442:   PetscMPIInt  size;

1444:   PetscFunctionBegin;
1445:   PetscCall(PetscNew(&b));
1446:   A->data = (void *)b;

1448:   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));

1450:   b->w = NULL;
1451:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1452:   if (size == 1) {
1453:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1454:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1455:     A->ops->mult                = MatMult_SeqKAIJ;
1456:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1457:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1458:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1459:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1460:     A->ops->sor                 = MatSOR_SeqKAIJ;
1461:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1462:   } else {
1463:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1464:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1465:     A->ops->mult                = MatMult_MPIKAIJ;
1466:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1467:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1468:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1469:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1470:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1471:     PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1472:   }
1473:   A->ops->setup           = MatSetUp_KAIJ;
1474:   A->ops->view            = MatView_KAIJ;
1475:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1476:   PetscFunctionReturn(PETSC_SUCCESS);
1477: }