Actual source code: maij.c


  2: #include <../src/mat/impls/maij/maij.h>
  3: #include <../src/mat/utils/freespace.h>

  5: /*@
  6:    MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix

  8:    Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel

 10:    Input Parameter:
 11: .  A - the `MATMAIJ` matrix

 13:    Output Parameter:
 14: .  B - the `MATAIJ` matrix

 16:    Level: advanced

 18:    Note:
 19:     The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.

 21: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
 22: @*/
 23: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
 24: {
 25:   PetscBool ismpimaij, isseqmaij;

 27:   PetscFunctionBegin;
 28:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
 29:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
 30:   if (ismpimaij) {
 31:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

 33:     *B = b->A;
 34:   } else if (isseqmaij) {
 35:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;

 37:     *B = b->AIJ;
 38:   } else {
 39:     *B = A;
 40:   }
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }

 44: /*@
 45:    MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size

 47:    Logically Collective

 49:    Input Parameters:
 50: +  A - the `MATMAIJ` matrix
 51: -  dof - the block size for the new matrix

 53:    Output Parameter:
 54: .  B - the new `MATMAIJ` matrix

 56:    Level: advanced

 58: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
 59: @*/
 60: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
 61: {
 62:   Mat Aij = NULL;

 64:   PetscFunctionBegin;
 66:   PetscCall(MatMAIJGetAIJ(A, &Aij));
 67:   PetscCall(MatCreateMAIJ(Aij, dof, B));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 72: {
 73:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;

 75:   PetscFunctionBegin;
 76:   PetscCall(MatDestroy(&b->AIJ));
 77:   PetscCall(PetscFree(A->data));
 78:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
 79:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
 80:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
 81:   PetscFunctionReturn(PETSC_SUCCESS);
 82: }

 84: static PetscErrorCode MatSetUp_MAIJ(Mat A)
 85: {
 86:   PetscFunctionBegin;
 87:   SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
 88: }

 90: static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
 91: {
 92:   Mat B;

 94:   PetscFunctionBegin;
 95:   PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
 96:   PetscCall(MatView(B, viewer));
 97:   PetscCall(MatDestroy(&B));
 98:   PetscFunctionReturn(PETSC_SUCCESS);
 99: }

101: static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102: {
103:   Mat B;

105:   PetscFunctionBegin;
106:   PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
107:   PetscCall(MatView(B, viewer));
108:   PetscCall(MatDestroy(&B));
109:   PetscFunctionReturn(PETSC_SUCCESS);
110: }

112: static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113: {
114:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

116:   PetscFunctionBegin;
117:   PetscCall(MatDestroy(&b->AIJ));
118:   PetscCall(MatDestroy(&b->OAIJ));
119:   PetscCall(MatDestroy(&b->A));
120:   PetscCall(VecScatterDestroy(&b->ctx));
121:   PetscCall(VecDestroy(&b->w));
122:   PetscCall(PetscFree(A->data));
123:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
124:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
125:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
126:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127:   PetscFunctionReturn(PETSC_SUCCESS);
128: }

130: /*MC
131:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
132:   multicomponent problems, interpolating or restricting each component the same way independently.
133:   The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.

135:   Operations provided:
136: .vb
137:     MatMult()
138:     MatMultTranspose()
139:     MatMultAdd()
140:     MatMultTransposeAdd()
141: .ve

143:   Level: advanced

145: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
146: M*/

148: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149: {
150:   Mat_MPIMAIJ *b;
151:   PetscMPIInt  size;

153:   PetscFunctionBegin;
154:   PetscCall(PetscNew(&b));
155:   A->data = (void *)b;

157:   PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));

159:   A->ops->setup = MatSetUp_MAIJ;

161:   b->AIJ  = NULL;
162:   b->dof  = 0;
163:   b->OAIJ = NULL;
164:   b->ctx  = NULL;
165:   b->w    = NULL;
166:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
167:   if (size == 1) {
168:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
169:   } else {
170:     PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
171:   }
172:   A->preallocated = PETSC_TRUE;
173:   A->assembled    = PETSC_TRUE;
174:   PetscFunctionReturn(PETSC_SUCCESS);
175: }

177: #if PetscHasAttribute(always_inline)
178:   #define PETSC_FORCE_INLINE __attribute__((always_inline))
179: #else
180:   #define PETSC_FORCE_INLINE
181: #endif

183: #if defined(__clang__)
184:   #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
185: #else
186:   #define PETSC_PRAGMA_UNROLL
187: #endif

189: enum {
190:   MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
191: };

193: // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
194: // keyword into account for these...
195: PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
196: {
197:   const PetscBool    mult_add   = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
198:   const Mat_SeqMAIJ *b          = (Mat_SeqMAIJ *)A->data;
199:   const Mat          baij       = b->AIJ;
200:   const Mat_SeqAIJ  *a          = (Mat_SeqAIJ *)baij->data;
201:   const PetscInt     m          = baij->rmap->n;
202:   const PetscInt     nz         = a->nz;
203:   const PetscInt    *idx        = a->j;
204:   const PetscInt    *ii         = a->i;
205:   const PetscScalar *v          = a->a;
206:   PetscInt           nonzerorow = 0;
207:   const PetscScalar *x;
208:   PetscScalar       *z;

210:   PetscFunctionBegin;
211:   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
212:   if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
213:   PetscCall(VecGetArrayRead(xx, &x));
214:   if (mult_add) {
215:     PetscCall(VecGetArray(zz, &z));
216:   } else {
217:     PetscCall(VecGetArrayWrite(zz, &z));
218:   }

220:   for (PetscInt i = 0; i < m; ++i) {
221:     PetscInt       jrow = ii[i];
222:     const PetscInt n    = ii[i + 1] - jrow;
223:     // leave a line so clang-format does not align these decls
224:     PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};

226:     nonzerorow += n > 0;
227:     for (PetscInt j = 0; j < n; ++j, ++jrow) {
228:       const PetscScalar v_jrow     = v[jrow];
229:       const PetscInt    N_idx_jrow = N * idx[jrow];

231:       PETSC_PRAGMA_UNROLL
232:       for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
233:     }

235:     PETSC_PRAGMA_UNROLL
236:     for (int k = 0; k < N; ++k) {
237:       const PetscInt z_idx = N * i + k;

239:       if (mult_add) {
240:         z[z_idx] += sum[k];
241:       } else {
242:         z[z_idx] = sum[k];
243:       }
244:     }
245:   }
246:   PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
247:   PetscCall(VecRestoreArrayRead(xx, &x));
248:   if (mult_add) {
249:     PetscCall(VecRestoreArray(zz, &z));
250:   } else {
251:     PetscCall(VecRestoreArrayWrite(zz, &z));
252:   }
253:   PetscFunctionReturn(PETSC_SUCCESS);
254: }

256: PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
257: {
258:   const PetscBool    mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
259:   const Mat_SeqMAIJ *b        = (Mat_SeqMAIJ *)A->data;
260:   const Mat          baij     = b->AIJ;
261:   const Mat_SeqAIJ  *a        = (Mat_SeqAIJ *)baij->data;
262:   const PetscInt     m        = baij->rmap->n;
263:   const PetscInt     nz       = a->nz;
264:   const PetscInt    *a_j      = a->j;
265:   const PetscInt    *a_i      = a->i;
266:   const PetscScalar *a_a      = a->a;
267:   const PetscScalar *x;
268:   PetscScalar       *z;

270:   PetscFunctionBegin;
271:   PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
272:   if (mult_add) {
273:     if (yy != zz) PetscCall(VecCopy(yy, zz));
274:   } else {
275:     PetscCall(VecSet(zz, 0.0));
276:   }
277:   PetscCall(VecGetArrayRead(xx, &x));
278:   PetscCall(VecGetArray(zz, &z));

280:   for (PetscInt i = 0; i < m; i++) {
281:     const PetscInt     a_ii = a_i[i];
282:     const PetscInt    *idx  = a_j + a_ii;
283:     const PetscScalar *v    = a_a + a_ii;
284:     const PetscInt     n    = a_i[i + 1] - a_ii;
285:     PetscScalar        alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];

287:     PETSC_PRAGMA_UNROLL
288:     for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
289:     for (PetscInt j = 0; j < n; ++j) {
290:       const PetscInt    N_idx_j = N * idx[j];
291:       const PetscScalar v_j     = v[j];

293:       PETSC_PRAGMA_UNROLL
294:       for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
295:     }
296:   }

298:   PetscCall(PetscLogFlops(2 * N * nz));
299:   PetscCall(VecRestoreArrayRead(xx, &x));
300:   PetscCall(VecRestoreArray(zz, &z));
301:   PetscFunctionReturn(PETSC_SUCCESS);
302: }

304: #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
305:   static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
306:   { \
307:     PetscFunctionBegin; \
308:     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
309:     PetscFunctionReturn(PETSC_SUCCESS); \
310:   } \
311:   static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
312:   { \
313:     PetscFunctionBegin; \
314:     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
315:     PetscFunctionReturn(PETSC_SUCCESS); \
316:   } \
317:   static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
318:   { \
319:     PetscFunctionBegin; \
320:     PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
321:     PetscFunctionReturn(PETSC_SUCCESS); \
322:   } \
323:   static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
324:   { \
325:     PetscFunctionBegin; \
326:     PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
327:     PetscFunctionReturn(PETSC_SUCCESS); \
328:   }

330: // clang-format off
331: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
332: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
333: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
334: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
335: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
336: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
337: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
338: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
339: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
340: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
341: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
342: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
343: // clang-format on

345: #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE

347: static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
348: {
349:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
350:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
351:   const PetscScalar *x, *v;
352:   PetscScalar       *y, *sums;
353:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
354:   PetscInt           n, i, jrow, j, dof = b->dof, k;

356:   PetscFunctionBegin;
357:   PetscCall(VecGetArrayRead(xx, &x));
358:   PetscCall(VecSet(yy, 0.0));
359:   PetscCall(VecGetArray(yy, &y));
360:   idx = a->j;
361:   v   = a->a;
362:   ii  = a->i;

364:   for (i = 0; i < m; i++) {
365:     jrow = ii[i];
366:     n    = ii[i + 1] - jrow;
367:     sums = y + dof * i;
368:     for (j = 0; j < n; j++) {
369:       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
370:       jrow++;
371:     }
372:   }

374:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
375:   PetscCall(VecRestoreArrayRead(xx, &x));
376:   PetscCall(VecRestoreArray(yy, &y));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
381: {
382:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
383:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
384:   const PetscScalar *x, *v;
385:   PetscScalar       *y, *sums;
386:   const PetscInt     m = b->AIJ->rmap->n, *idx, *ii;
387:   PetscInt           n, i, jrow, j, dof = b->dof, k;

389:   PetscFunctionBegin;
390:   if (yy != zz) PetscCall(VecCopy(yy, zz));
391:   PetscCall(VecGetArrayRead(xx, &x));
392:   PetscCall(VecGetArray(zz, &y));
393:   idx = a->j;
394:   v   = a->a;
395:   ii  = a->i;

397:   for (i = 0; i < m; i++) {
398:     jrow = ii[i];
399:     n    = ii[i + 1] - jrow;
400:     sums = y + dof * i;
401:     for (j = 0; j < n; j++) {
402:       for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
403:       jrow++;
404:     }
405:   }

407:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
408:   PetscCall(VecRestoreArrayRead(xx, &x));
409:   PetscCall(VecRestoreArray(zz, &y));
410:   PetscFunctionReturn(PETSC_SUCCESS);
411: }

413: static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
414: {
415:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
416:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
417:   const PetscScalar *x, *v, *alpha;
418:   PetscScalar       *y;
419:   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
420:   PetscInt           n, i, k;

422:   PetscFunctionBegin;
423:   PetscCall(VecGetArrayRead(xx, &x));
424:   PetscCall(VecSet(yy, 0.0));
425:   PetscCall(VecGetArray(yy, &y));
426:   for (i = 0; i < m; i++) {
427:     idx   = a->j + a->i[i];
428:     v     = a->a + a->i[i];
429:     n     = a->i[i + 1] - a->i[i];
430:     alpha = x + dof * i;
431:     while (n-- > 0) {
432:       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
433:       idx++;
434:       v++;
435:     }
436:   }
437:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
438:   PetscCall(VecRestoreArrayRead(xx, &x));
439:   PetscCall(VecRestoreArray(yy, &y));
440:   PetscFunctionReturn(PETSC_SUCCESS);
441: }

443: static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
444: {
445:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ *)A->data;
446:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)b->AIJ->data;
447:   const PetscScalar *x, *v, *alpha;
448:   PetscScalar       *y;
449:   const PetscInt     m = b->AIJ->rmap->n, *idx, dof = b->dof;
450:   PetscInt           n, i, k;

452:   PetscFunctionBegin;
453:   if (yy != zz) PetscCall(VecCopy(yy, zz));
454:   PetscCall(VecGetArrayRead(xx, &x));
455:   PetscCall(VecGetArray(zz, &y));
456:   for (i = 0; i < m; i++) {
457:     idx   = a->j + a->i[i];
458:     v     = a->a + a->i[i];
459:     n     = a->i[i + 1] - a->i[i];
460:     alpha = x + dof * i;
461:     while (n-- > 0) {
462:       for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
463:       idx++;
464:       v++;
465:     }
466:   }
467:   PetscCall(PetscLogFlops(2.0 * dof * a->nz));
468:   PetscCall(VecRestoreArrayRead(xx, &x));
469:   PetscCall(VecRestoreArray(zz, &y));
470:   PetscFunctionReturn(PETSC_SUCCESS);
471: }

473: static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
474: {
475:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

477:   PetscFunctionBegin;
478:   /* start the scatter */
479:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
480:   PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
481:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
482:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
483:   PetscFunctionReturn(PETSC_SUCCESS);
484: }

486: static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
487: {
488:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

490:   PetscFunctionBegin;
491:   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
492:   PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
493:   PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
494:   PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
495:   PetscFunctionReturn(PETSC_SUCCESS);
496: }

498: static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
499: {
500:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

502:   PetscFunctionBegin;
503:   /* start the scatter */
504:   PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
505:   PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
506:   PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
507:   PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
508:   PetscFunctionReturn(PETSC_SUCCESS);
509: }

511: static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
512: {
513:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;

515:   PetscFunctionBegin;
516:   PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
517:   PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
518:   PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
519:   PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
520:   PetscFunctionReturn(PETSC_SUCCESS);
521: }

523: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
524: {
525:   Mat_Product *product = C->product;

527:   PetscFunctionBegin;
528:   if (product->type == MATPRODUCT_PtAP) {
529:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
530:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
531:   PetscFunctionReturn(PETSC_SUCCESS);
532: }

534: static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
535: {
536:   Mat_Product *product = C->product;
537:   PetscBool    flg     = PETSC_FALSE;
538:   Mat          A = product->A, P = product->B;
539:   PetscInt     alg = 1; /* set default algorithm */
540: #if !defined(PETSC_HAVE_HYPRE)
541:   const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
542:   PetscInt    nalg        = 4;
543: #else
544:   const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
545:   PetscInt    nalg        = 5;
546: #endif

548:   PetscFunctionBegin;
549:   PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);

551:   /* PtAP */
552:   /* Check matrix local sizes */
553:   PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
554:              A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
555:   PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
556:              A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);

558:   /* Set the default algorithm */
559:   PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
560:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

562:   /* Get runtime option */
563:   PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
564:   PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
565:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
566:   PetscOptionsEnd();

568:   PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
569:   if (flg) {
570:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
571:     PetscFunctionReturn(PETSC_SUCCESS);
572:   }

574:   PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
575:   if (flg) {
576:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
577:     PetscFunctionReturn(PETSC_SUCCESS);
578:   }

580:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
581:   PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
582:   PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
583:   PetscCall(MatProductSetFromOptions(C));
584:   PetscFunctionReturn(PETSC_SUCCESS);
585: }

587: static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
588: {
589:   /* This routine requires testing -- first draft only */
590:   Mat_SeqMAIJ     *pp = (Mat_SeqMAIJ *)PP->data;
591:   Mat              P  = pp->AIJ;
592:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
593:   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *)P->data;
594:   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *)C->data;
595:   const PetscInt  *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
596:   const PetscInt  *ci = c->i, *cj = c->j, *cjj;
597:   const PetscInt   am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
598:   PetscInt         i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
599:   const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
600:   MatScalar       *ca = c->a, *caj, *apa;

602:   PetscFunctionBegin;
603:   /* Allocate temporary array for storage of one row of A*P */
604:   PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));

606:   /* Clear old values in C */
607:   PetscCall(PetscArrayzero(ca, ci[cm]));

609:   for (i = 0; i < am; i++) {
610:     /* Form sparse row of A*P */
611:     anzi  = ai[i + 1] - ai[i];
612:     apnzj = 0;
613:     for (j = 0; j < anzi; j++) {
614:       /* Get offset within block of P */
615:       pshift = *aj % ppdof;
616:       /* Get block row of P */
617:       prow = *aj++ / ppdof; /* integer division */
618:       pnzj = pi[prow + 1] - pi[prow];
619:       pjj  = pj + pi[prow];
620:       paj  = pa + pi[prow];
621:       for (k = 0; k < pnzj; k++) {
622:         poffset = pjj[k] * ppdof + pshift;
623:         if (!apjdense[poffset]) {
624:           apjdense[poffset] = -1;
625:           apj[apnzj++]      = poffset;
626:         }
627:         apa[poffset] += (*aa) * paj[k];
628:       }
629:       PetscCall(PetscLogFlops(2.0 * pnzj));
630:       aa++;
631:     }

633:     /* Sort the j index array for quick sparse axpy. */
634:     /* Note: a array does not need sorting as it is in dense storage locations. */
635:     PetscCall(PetscSortInt(apnzj, apj));

637:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
638:     prow    = i / ppdof; /* integer division */
639:     pshift  = i % ppdof;
640:     poffset = pi[prow];
641:     pnzi    = pi[prow + 1] - poffset;
642:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
643:     pJ = pj + poffset;
644:     pA = pa + poffset;
645:     for (j = 0; j < pnzi; j++) {
646:       crow = (*pJ) * ppdof + pshift;
647:       cjj  = cj + ci[crow];
648:       caj  = ca + ci[crow];
649:       pJ++;
650:       /* Perform sparse axpy operation.  Note cjj includes apj. */
651:       for (k = 0, nextap = 0; nextap < apnzj; k++) {
652:         if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
653:       }
654:       PetscCall(PetscLogFlops(2.0 * apnzj));
655:       pA++;
656:     }

658:     /* Zero the current row info for A*P */
659:     for (j = 0; j < apnzj; j++) {
660:       apa[apj[j]]      = 0.;
661:       apjdense[apj[j]] = 0;
662:     }
663:   }

665:   /* Assemble the final matrix and clean up */
666:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
667:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
668:   PetscCall(PetscFree3(apa, apj, apjdense));
669:   PetscFunctionReturn(PETSC_SUCCESS);
670: }

672: static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
673: {
674:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
675:   Mat_SeqMAIJ       *pp = (Mat_SeqMAIJ *)PP->data;
676:   Mat                P  = pp->AIJ;
677:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
678:   PetscInt          *pti, *ptj, *ptJ;
679:   PetscInt          *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
680:   const PetscInt     an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
681:   PetscInt           i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
682:   MatScalar         *ca;
683:   const PetscInt    *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;

685:   PetscFunctionBegin;
686:   /* Get ij structure of P^T */
687:   PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));

689:   cn = pn * ppdof;
690:   /* Allocate ci array, arrays for fill computation and */
691:   /* free space for accumulating nonzero column info */
692:   PetscCall(PetscMalloc1(cn + 1, &ci));
693:   ci[0] = 0;

695:   /* Work arrays for rows of P^T*A */
696:   PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
697:   PetscCall(PetscArrayzero(ptadenserow, an));
698:   PetscCall(PetscArrayzero(denserow, cn));

700:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
701:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
702:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
703:   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
704:   current_space = free_space;

706:   /* Determine symbolic info for each row of C: */
707:   for (i = 0; i < pn; i++) {
708:     ptnzi = pti[i + 1] - pti[i];
709:     ptJ   = ptj + pti[i];
710:     for (dof = 0; dof < ppdof; dof++) {
711:       ptanzi = 0;
712:       /* Determine symbolic row of PtA: */
713:       for (j = 0; j < ptnzi; j++) {
714:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
715:         arow = ptJ[j] * ppdof + dof;
716:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
717:         anzj = ai[arow + 1] - ai[arow];
718:         ajj  = aj + ai[arow];
719:         for (k = 0; k < anzj; k++) {
720:           if (!ptadenserow[ajj[k]]) {
721:             ptadenserow[ajj[k]]    = -1;
722:             ptasparserow[ptanzi++] = ajj[k];
723:           }
724:         }
725:       }
726:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
727:       ptaj = ptasparserow;
728:       cnzi = 0;
729:       for (j = 0; j < ptanzi; j++) {
730:         /* Get offset within block of P */
731:         pshift = *ptaj % ppdof;
732:         /* Get block row of P */
733:         prow = (*ptaj++) / ppdof; /* integer division */
734:         /* P has same number of nonzeros per row as the compressed form */
735:         pnzj = pi[prow + 1] - pi[prow];
736:         pjj  = pj + pi[prow];
737:         for (k = 0; k < pnzj; k++) {
738:           /* Locations in C are shifted by the offset within the block */
739:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
740:           if (!denserow[pjj[k] * ppdof + pshift]) {
741:             denserow[pjj[k] * ppdof + pshift] = -1;
742:             sparserow[cnzi++]                 = pjj[k] * ppdof + pshift;
743:           }
744:         }
745:       }

747:       /* sort sparserow */
748:       PetscCall(PetscSortInt(cnzi, sparserow));

750:       /* If free space is not available, make more free space */
751:       /* Double the amount of total space in the list */
752:       if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), &current_space));

754:       /* Copy data into free space, and zero out denserows */
755:       PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));

757:       current_space->array += cnzi;
758:       current_space->local_used += cnzi;
759:       current_space->local_remaining -= cnzi;

761:       for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
762:       for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;

764:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
765:       /*        For now, we will recompute what is needed. */
766:       ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
767:     }
768:   }
769:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
770:   /* Allocate space for cj, initialize cj, and */
771:   /* destroy list of free space and other temporary array(s) */
772:   PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
773:   PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
774:   PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));

776:   /* Allocate space for ca */
777:   PetscCall(PetscCalloc1(ci[cn] + 1, &ca));

779:   /* put together the new matrix */
780:   PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
781:   PetscCall(MatSetBlockSize(C, pp->dof));

783:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
784:   /* Since these are PETSc arrays, change flags to free them as necessary. */
785:   c          = (Mat_SeqAIJ *)(C->data);
786:   c->free_a  = PETSC_TRUE;
787:   c->free_ij = PETSC_TRUE;
788:   c->nonew   = 0;

790:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
791:   C->ops->productnumeric = MatProductNumeric_PtAP;

793:   /* Clean up. */
794:   PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
795:   PetscFunctionReturn(PETSC_SUCCESS);
796: }

798: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
799: {
800:   Mat_Product *product = C->product;
801:   Mat          A = product->A, P = product->B;

803:   PetscFunctionBegin;
804:   PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
805:   PetscFunctionReturn(PETSC_SUCCESS);
806: }

808: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);

810: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
811: {
812:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

814:   PetscFunctionBegin;

816:   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
817:   PetscFunctionReturn(PETSC_SUCCESS);
818: }

820: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);

822: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
823: {
824:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

826:   PetscFunctionBegin;
827:   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
828:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
829:   PetscFunctionReturn(PETSC_SUCCESS);
830: }

832: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);

834: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
835: {
836:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

838:   PetscFunctionBegin;

840:   PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
841:   PetscFunctionReturn(PETSC_SUCCESS);
842: }

844: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);

846: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
847: {
848:   Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;

850:   PetscFunctionBegin;

852:   PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
853:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
854:   PetscFunctionReturn(PETSC_SUCCESS);
855: }

857: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
858: {
859:   Mat_Product *product = C->product;
860:   Mat          A = product->A, P = product->B;
861:   PetscBool    flg;

863:   PetscFunctionBegin;
864:   PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
865:   if (flg) {
866:     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
867:     C->ops->productnumeric = MatProductNumeric_PtAP;
868:     PetscFunctionReturn(PETSC_SUCCESS);
869:   }

871:   PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
872:   if (flg) {
873:     PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
874:     C->ops->productnumeric = MatProductNumeric_PtAP;
875:     PetscFunctionReturn(PETSC_SUCCESS);
876:   }

878:   SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
879: }

881: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
882: {
883:   Mat_SeqMAIJ *b   = (Mat_SeqMAIJ *)A->data;
884:   Mat          a   = b->AIJ, B;
885:   Mat_SeqAIJ  *aij = (Mat_SeqAIJ *)a->data;
886:   PetscInt     m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
887:   PetscInt    *cols;
888:   PetscScalar *vals;

890:   PetscFunctionBegin;
891:   PetscCall(MatGetSize(a, &m, &n));
892:   PetscCall(PetscMalloc1(dof * m, &ilen));
893:   for (i = 0; i < m; i++) {
894:     nmax = PetscMax(nmax, aij->ilen[i]);
895:     for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
896:   }
897:   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
898:   PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
899:   PetscCall(MatSetType(B, newtype));
900:   PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
901:   PetscCall(PetscFree(ilen));
902:   PetscCall(PetscMalloc1(nmax, &icols));
903:   ii = 0;
904:   for (i = 0; i < m; i++) {
905:     PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
906:     for (j = 0; j < dof; j++) {
907:       for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
908:       PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
909:       ii++;
910:     }
911:     PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
912:   }
913:   PetscCall(PetscFree(icols));
914:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
915:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

917:   if (reuse == MAT_INPLACE_MATRIX) {
918:     PetscCall(MatHeaderReplace(A, &B));
919:   } else {
920:     *newmat = B;
921:   }
922:   PetscFunctionReturn(PETSC_SUCCESS);
923: }

925: #include <../src/mat/impls/aij/mpi/mpiaij.h>

927: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
928: {
929:   Mat_MPIMAIJ *maij    = (Mat_MPIMAIJ *)A->data;
930:   Mat          MatAIJ  = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
931:   Mat          MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
932:   Mat_SeqAIJ  *AIJ     = (Mat_SeqAIJ *)MatAIJ->data;
933:   Mat_SeqAIJ  *OAIJ    = (Mat_SeqAIJ *)MatOAIJ->data;
934:   Mat_MPIAIJ  *mpiaij  = (Mat_MPIAIJ *)maij->A->data;
935:   PetscInt     dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
936:   PetscInt    *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
937:   PetscInt     rstart, cstart, *garray, ii, k;
938:   PetscScalar *vals, *ovals;

940:   PetscFunctionBegin;
941:   PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
942:   for (i = 0; i < A->rmap->n / dof; i++) {
943:     nmax  = PetscMax(nmax, AIJ->ilen[i]);
944:     onmax = PetscMax(onmax, OAIJ->ilen[i]);
945:     for (j = 0; j < dof; j++) {
946:       dnz[dof * i + j] = AIJ->ilen[i];
947:       onz[dof * i + j] = OAIJ->ilen[i];
948:     }
949:   }
950:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
951:   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
952:   PetscCall(MatSetType(B, newtype));
953:   PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
954:   PetscCall(MatSetBlockSize(B, dof));
955:   PetscCall(PetscFree2(dnz, onz));

957:   PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
958:   rstart = dof * maij->A->rmap->rstart;
959:   cstart = dof * maij->A->cmap->rstart;
960:   garray = mpiaij->garray;

962:   ii = rstart;
963:   for (i = 0; i < A->rmap->n / dof; i++) {
964:     PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
965:     PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
966:     for (j = 0; j < dof; j++) {
967:       for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
968:       for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
969:       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
970:       PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
971:       ii++;
972:     }
973:     PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
974:     PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
975:   }
976:   PetscCall(PetscFree2(icols, oicols));

978:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
979:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

981:   if (reuse == MAT_INPLACE_MATRIX) {
982:     PetscInt refct          = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
983:     ((PetscObject)A)->refct = 1;

985:     PetscCall(MatHeaderReplace(A, &B));

987:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
988:   } else {
989:     *newmat = B;
990:   }
991:   PetscFunctionReturn(PETSC_SUCCESS);
992: }

994: static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
995: {
996:   Mat A;

998:   PetscFunctionBegin;
999:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1000:   PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1001:   PetscCall(MatDestroy(&A));
1002:   PetscFunctionReturn(PETSC_SUCCESS);
1003: }

1005: static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1006: {
1007:   Mat A;

1009:   PetscFunctionBegin;
1010:   PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1011:   PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1012:   PetscCall(MatDestroy(&A));
1013:   PetscFunctionReturn(PETSC_SUCCESS);
1014: }

1016: /*@
1017:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1018:   operations for multicomponent problems.  It interpolates each component the same
1019:   way independently.  The matrix type is based on `MATSEQAIJ` for sequential matrices,
1020:   and `MATMPIAIJ` for distributed matrices.

1022:   Collective

1024:   Input Parameters:
1025: + A - the `MATAIJ` matrix describing the action on blocks
1026: - dof - the block size (number of components per node)

1028:   Output Parameter:
1029: . maij - the new `MATMAIJ` matrix

1031:   Level: advanced

1033:   Operations provided:
1034: .vb
1035:     MatMult()
1036:     MatMultTranspose()
1037:     MatMultAdd()
1038:     MatMultTransposeAdd()
1039:     MatView()
1040: .ve

1042: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
1043: @*/
1044: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1045: {
1046:   PetscInt  n;
1047:   Mat       B;
1048:   PetscBool flg;
1049: #if defined(PETSC_HAVE_CUDA)
1050:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1051:   PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1052: #endif

1054:   PetscFunctionBegin;
1055:   dof = PetscAbs(dof);
1056:   PetscCall(PetscObjectReference((PetscObject)A));

1058:   if (dof == 1) *maij = A;
1059:   else {
1060:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1061:     /* propagate vec type */
1062:     PetscCall(MatSetVecType(B, A->defaultvectype));
1063:     PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1064:     PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1065:     PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1066:     PetscCall(PetscLayoutSetUp(B->rmap));
1067:     PetscCall(PetscLayoutSetUp(B->cmap));

1069:     B->assembled = PETSC_TRUE;

1071:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1072:     if (flg) {
1073:       Mat_SeqMAIJ *b;

1075:       PetscCall(MatSetType(B, MATSEQMAIJ));

1077:       B->ops->setup   = NULL;
1078:       B->ops->destroy = MatDestroy_SeqMAIJ;
1079:       B->ops->view    = MatView_SeqMAIJ;

1081:       b      = (Mat_SeqMAIJ *)B->data;
1082:       b->dof = dof;
1083:       b->AIJ = A;

1085:       if (dof == 2) {
1086:         B->ops->mult             = MatMult_SeqMAIJ_2;
1087:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1088:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1089:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1090:       } else if (dof == 3) {
1091:         B->ops->mult             = MatMult_SeqMAIJ_3;
1092:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1093:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1094:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1095:       } else if (dof == 4) {
1096:         B->ops->mult             = MatMult_SeqMAIJ_4;
1097:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1098:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1099:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1100:       } else if (dof == 5) {
1101:         B->ops->mult             = MatMult_SeqMAIJ_5;
1102:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1103:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1104:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1105:       } else if (dof == 6) {
1106:         B->ops->mult             = MatMult_SeqMAIJ_6;
1107:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1108:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1109:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1110:       } else if (dof == 7) {
1111:         B->ops->mult             = MatMult_SeqMAIJ_7;
1112:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
1113:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
1114:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1115:       } else if (dof == 8) {
1116:         B->ops->mult             = MatMult_SeqMAIJ_8;
1117:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1118:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1119:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1120:       } else if (dof == 9) {
1121:         B->ops->mult             = MatMult_SeqMAIJ_9;
1122:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1123:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1124:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1125:       } else if (dof == 10) {
1126:         B->ops->mult             = MatMult_SeqMAIJ_10;
1127:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
1128:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
1129:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1130:       } else if (dof == 11) {
1131:         B->ops->mult             = MatMult_SeqMAIJ_11;
1132:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
1133:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
1134:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1135:       } else if (dof == 16) {
1136:         B->ops->mult             = MatMult_SeqMAIJ_16;
1137:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1138:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1139:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1140:       } else if (dof == 18) {
1141:         B->ops->mult             = MatMult_SeqMAIJ_18;
1142:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
1143:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
1144:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1145:       } else {
1146:         B->ops->mult             = MatMult_SeqMAIJ_N;
1147:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
1148:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
1149:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1150:       }
1151: #if defined(PETSC_HAVE_CUDA)
1152:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1153: #endif
1154:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1155:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1156:     } else {
1157:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
1158:       Mat_MPIMAIJ *b;
1159:       IS           from, to;
1160:       Vec          gvec;

1162:       PetscCall(MatSetType(B, MATMPIMAIJ));

1164:       B->ops->setup   = NULL;
1165:       B->ops->destroy = MatDestroy_MPIMAIJ;
1166:       B->ops->view    = MatView_MPIMAIJ;

1168:       b      = (Mat_MPIMAIJ *)B->data;
1169:       b->dof = dof;
1170:       b->A   = A;

1172:       PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1173:       PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));

1175:       PetscCall(VecGetSize(mpiaij->lvec, &n));
1176:       PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1177:       PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1178:       PetscCall(VecSetBlockSize(b->w, dof));
1179:       PetscCall(VecSetType(b->w, VECSEQ));

1181:       /* create two temporary Index sets for build scatter gather */
1182:       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1183:       PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));

1185:       /* create temporary global vector to generate scatter context */
1186:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));

1188:       /* generate the scatter context */
1189:       PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));

1191:       PetscCall(ISDestroy(&from));
1192:       PetscCall(ISDestroy(&to));
1193:       PetscCall(VecDestroy(&gvec));

1195:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1196:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1197:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1198:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

1200: #if defined(PETSC_HAVE_CUDA)
1201:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1202: #endif
1203:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1204:       PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1205:     }
1206:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
1207:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1208:     PetscCall(MatSetUp(B));
1209: #if defined(PETSC_HAVE_CUDA)
1210:     /* temporary until we have CUDA implementation of MAIJ */
1211:     {
1212:       PetscBool flg;
1213:       if (convert) {
1214:         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1215:         if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1216:       }
1217:     }
1218: #endif
1219:     *maij = B;
1220:   }
1221:   PetscFunctionReturn(PETSC_SUCCESS);
1222: }