Actual source code: maij.c
2: #include <../src/mat/impls/maij/maij.h>
3: #include <../src/mat/utils/freespace.h>
5: /*@
6: MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
8: Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
10: Input Parameter:
11: . A - the `MATMAIJ` matrix
13: Output Parameter:
14: . B - the `MATAIJ` matrix
16: Level: advanced
18: Note:
19: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
21: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22: @*/
23: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24: {
25: PetscBool ismpimaij, isseqmaij;
27: PetscFunctionBegin;
28: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
29: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
30: if (ismpimaij) {
31: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
33: *B = b->A;
34: } else if (isseqmaij) {
35: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
37: *B = b->AIJ;
38: } else {
39: *B = A;
40: }
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: /*@
45: MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
47: Logically Collective
49: Input Parameters:
50: + A - the `MATMAIJ` matrix
51: - dof - the block size for the new matrix
53: Output Parameter:
54: . B - the new `MATMAIJ` matrix
56: Level: advanced
58: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
59: @*/
60: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
61: {
62: Mat Aij = NULL;
64: PetscFunctionBegin;
66: PetscCall(MatMAIJGetAIJ(A, &Aij));
67: PetscCall(MatCreateMAIJ(Aij, dof, B));
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
72: {
73: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
75: PetscFunctionBegin;
76: PetscCall(MatDestroy(&b->AIJ));
77: PetscCall(PetscFree(A->data));
78: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
79: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
80: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: static PetscErrorCode MatSetUp_MAIJ(Mat A)
85: {
86: PetscFunctionBegin;
87: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
88: }
90: static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
91: {
92: Mat B;
94: PetscFunctionBegin;
95: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
96: PetscCall(MatView(B, viewer));
97: PetscCall(MatDestroy(&B));
98: PetscFunctionReturn(PETSC_SUCCESS);
99: }
101: static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
102: {
103: Mat B;
105: PetscFunctionBegin;
106: PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
107: PetscCall(MatView(B, viewer));
108: PetscCall(MatDestroy(&B));
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
112: static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
113: {
114: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
116: PetscFunctionBegin;
117: PetscCall(MatDestroy(&b->AIJ));
118: PetscCall(MatDestroy(&b->OAIJ));
119: PetscCall(MatDestroy(&b->A));
120: PetscCall(VecScatterDestroy(&b->ctx));
121: PetscCall(VecDestroy(&b->w));
122: PetscCall(PetscFree(A->data));
123: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
124: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
125: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
126: PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
127: PetscFunctionReturn(PETSC_SUCCESS);
128: }
130: /*MC
131: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
132: multicomponent problems, interpolating or restricting each component the same way independently.
133: The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
135: Operations provided:
136: .vb
137: MatMult()
138: MatMultTranspose()
139: MatMultAdd()
140: MatMultTransposeAdd()
141: .ve
143: Level: advanced
145: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
146: M*/
148: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
149: {
150: Mat_MPIMAIJ *b;
151: PetscMPIInt size;
153: PetscFunctionBegin;
154: PetscCall(PetscNew(&b));
155: A->data = (void *)b;
157: PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
159: A->ops->setup = MatSetUp_MAIJ;
161: b->AIJ = NULL;
162: b->dof = 0;
163: b->OAIJ = NULL;
164: b->ctx = NULL;
165: b->w = NULL;
166: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
167: if (size == 1) {
168: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
169: } else {
170: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
171: }
172: A->preallocated = PETSC_TRUE;
173: A->assembled = PETSC_TRUE;
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: #if PetscHasAttribute(always_inline)
178: #define PETSC_FORCE_INLINE __attribute__((always_inline))
179: #else
180: #define PETSC_FORCE_INLINE
181: #endif
183: #if defined(__clang__)
184: #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
185: #else
186: #define PETSC_PRAGMA_UNROLL
187: #endif
189: enum {
190: MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
191: };
193: // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
194: // keyword into account for these...
195: PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
196: {
197: const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
198: const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
199: const Mat baij = b->AIJ;
200: const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
201: const PetscInt m = baij->rmap->n;
202: const PetscInt nz = a->nz;
203: const PetscInt *idx = a->j;
204: const PetscInt *ii = a->i;
205: const PetscScalar *v = a->a;
206: PetscInt nonzerorow = 0;
207: const PetscScalar *x;
208: PetscScalar *z;
210: PetscFunctionBegin;
211: PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
212: if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
213: PetscCall(VecGetArrayRead(xx, &x));
214: if (mult_add) {
215: PetscCall(VecGetArray(zz, &z));
216: } else {
217: PetscCall(VecGetArrayWrite(zz, &z));
218: }
220: for (PetscInt i = 0; i < m; ++i) {
221: PetscInt jrow = ii[i];
222: const PetscInt n = ii[i + 1] - jrow;
223: // leave a line so clang-format does not align these decls
224: PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
226: nonzerorow += n > 0;
227: for (PetscInt j = 0; j < n; ++j, ++jrow) {
228: const PetscScalar v_jrow = v[jrow];
229: const PetscInt N_idx_jrow = N * idx[jrow];
231: PETSC_PRAGMA_UNROLL
232: for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
233: }
235: PETSC_PRAGMA_UNROLL
236: for (int k = 0; k < N; ++k) {
237: const PetscInt z_idx = N * i + k;
239: if (mult_add) {
240: z[z_idx] += sum[k];
241: } else {
242: z[z_idx] = sum[k];
243: }
244: }
245: }
246: PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
247: PetscCall(VecRestoreArrayRead(xx, &x));
248: if (mult_add) {
249: PetscCall(VecRestoreArray(zz, &z));
250: } else {
251: PetscCall(VecRestoreArrayWrite(zz, &z));
252: }
253: PetscFunctionReturn(PETSC_SUCCESS);
254: }
256: PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
257: {
258: const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
259: const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
260: const Mat baij = b->AIJ;
261: const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
262: const PetscInt m = baij->rmap->n;
263: const PetscInt nz = a->nz;
264: const PetscInt *a_j = a->j;
265: const PetscInt *a_i = a->i;
266: const PetscScalar *a_a = a->a;
267: const PetscScalar *x;
268: PetscScalar *z;
270: PetscFunctionBegin;
271: PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
272: if (mult_add) {
273: if (yy != zz) PetscCall(VecCopy(yy, zz));
274: } else {
275: PetscCall(VecSet(zz, 0.0));
276: }
277: PetscCall(VecGetArrayRead(xx, &x));
278: PetscCall(VecGetArray(zz, &z));
280: for (PetscInt i = 0; i < m; i++) {
281: const PetscInt a_ii = a_i[i];
282: const PetscInt *idx = a_j + a_ii;
283: const PetscScalar *v = a_a + a_ii;
284: const PetscInt n = a_i[i + 1] - a_ii;
285: PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
287: PETSC_PRAGMA_UNROLL
288: for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
289: for (PetscInt j = 0; j < n; ++j) {
290: const PetscInt N_idx_j = N * idx[j];
291: const PetscScalar v_j = v[j];
293: PETSC_PRAGMA_UNROLL
294: for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
295: }
296: }
298: PetscCall(PetscLogFlops(2 * N * nz));
299: PetscCall(VecRestoreArrayRead(xx, &x));
300: PetscCall(VecRestoreArray(zz, &z));
301: PetscFunctionReturn(PETSC_SUCCESS);
302: }
304: #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
305: static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
306: { \
307: PetscFunctionBegin; \
308: PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
309: PetscFunctionReturn(PETSC_SUCCESS); \
310: } \
311: static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
312: { \
313: PetscFunctionBegin; \
314: PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
315: PetscFunctionReturn(PETSC_SUCCESS); \
316: } \
317: static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
318: { \
319: PetscFunctionBegin; \
320: PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
321: PetscFunctionReturn(PETSC_SUCCESS); \
322: } \
323: static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
324: { \
325: PetscFunctionBegin; \
326: PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
327: PetscFunctionReturn(PETSC_SUCCESS); \
328: }
330: // clang-format off
331: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
332: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
333: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
334: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
335: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
336: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
337: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
338: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
339: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
340: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
341: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
342: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
343: // clang-format on
345: #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
347: static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
348: {
349: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
350: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
351: const PetscScalar *x, *v;
352: PetscScalar *y, *sums;
353: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
354: PetscInt n, i, jrow, j, dof = b->dof, k;
356: PetscFunctionBegin;
357: PetscCall(VecGetArrayRead(xx, &x));
358: PetscCall(VecSet(yy, 0.0));
359: PetscCall(VecGetArray(yy, &y));
360: idx = a->j;
361: v = a->a;
362: ii = a->i;
364: for (i = 0; i < m; i++) {
365: jrow = ii[i];
366: n = ii[i + 1] - jrow;
367: sums = y + dof * i;
368: for (j = 0; j < n; j++) {
369: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
370: jrow++;
371: }
372: }
374: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
375: PetscCall(VecRestoreArrayRead(xx, &x));
376: PetscCall(VecRestoreArray(yy, &y));
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
381: {
382: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
383: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
384: const PetscScalar *x, *v;
385: PetscScalar *y, *sums;
386: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
387: PetscInt n, i, jrow, j, dof = b->dof, k;
389: PetscFunctionBegin;
390: if (yy != zz) PetscCall(VecCopy(yy, zz));
391: PetscCall(VecGetArrayRead(xx, &x));
392: PetscCall(VecGetArray(zz, &y));
393: idx = a->j;
394: v = a->a;
395: ii = a->i;
397: for (i = 0; i < m; i++) {
398: jrow = ii[i];
399: n = ii[i + 1] - jrow;
400: sums = y + dof * i;
401: for (j = 0; j < n; j++) {
402: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
403: jrow++;
404: }
405: }
407: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
408: PetscCall(VecRestoreArrayRead(xx, &x));
409: PetscCall(VecRestoreArray(zz, &y));
410: PetscFunctionReturn(PETSC_SUCCESS);
411: }
413: static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
414: {
415: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
416: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
417: const PetscScalar *x, *v, *alpha;
418: PetscScalar *y;
419: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
420: PetscInt n, i, k;
422: PetscFunctionBegin;
423: PetscCall(VecGetArrayRead(xx, &x));
424: PetscCall(VecSet(yy, 0.0));
425: PetscCall(VecGetArray(yy, &y));
426: for (i = 0; i < m; i++) {
427: idx = a->j + a->i[i];
428: v = a->a + a->i[i];
429: n = a->i[i + 1] - a->i[i];
430: alpha = x + dof * i;
431: while (n-- > 0) {
432: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
433: idx++;
434: v++;
435: }
436: }
437: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
438: PetscCall(VecRestoreArrayRead(xx, &x));
439: PetscCall(VecRestoreArray(yy, &y));
440: PetscFunctionReturn(PETSC_SUCCESS);
441: }
443: static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
444: {
445: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
446: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
447: const PetscScalar *x, *v, *alpha;
448: PetscScalar *y;
449: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
450: PetscInt n, i, k;
452: PetscFunctionBegin;
453: if (yy != zz) PetscCall(VecCopy(yy, zz));
454: PetscCall(VecGetArrayRead(xx, &x));
455: PetscCall(VecGetArray(zz, &y));
456: for (i = 0; i < m; i++) {
457: idx = a->j + a->i[i];
458: v = a->a + a->i[i];
459: n = a->i[i + 1] - a->i[i];
460: alpha = x + dof * i;
461: while (n-- > 0) {
462: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
463: idx++;
464: v++;
465: }
466: }
467: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
468: PetscCall(VecRestoreArrayRead(xx, &x));
469: PetscCall(VecRestoreArray(zz, &y));
470: PetscFunctionReturn(PETSC_SUCCESS);
471: }
473: static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
474: {
475: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
477: PetscFunctionBegin;
478: /* start the scatter */
479: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
480: PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
481: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
482: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
483: PetscFunctionReturn(PETSC_SUCCESS);
484: }
486: static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
487: {
488: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
490: PetscFunctionBegin;
491: PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
492: PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
493: PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
494: PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
495: PetscFunctionReturn(PETSC_SUCCESS);
496: }
498: static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
499: {
500: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
502: PetscFunctionBegin;
503: /* start the scatter */
504: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
505: PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
506: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
507: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
508: PetscFunctionReturn(PETSC_SUCCESS);
509: }
511: static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
512: {
513: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
515: PetscFunctionBegin;
516: PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
517: PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
518: PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
519: PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
520: PetscFunctionReturn(PETSC_SUCCESS);
521: }
523: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
524: {
525: Mat_Product *product = C->product;
527: PetscFunctionBegin;
528: if (product->type == MATPRODUCT_PtAP) {
529: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
530: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
531: PetscFunctionReturn(PETSC_SUCCESS);
532: }
534: static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
535: {
536: Mat_Product *product = C->product;
537: PetscBool flg = PETSC_FALSE;
538: Mat A = product->A, P = product->B;
539: PetscInt alg = 1; /* set default algorithm */
540: #if !defined(PETSC_HAVE_HYPRE)
541: const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
542: PetscInt nalg = 4;
543: #else
544: const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
545: PetscInt nalg = 5;
546: #endif
548: PetscFunctionBegin;
549: PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
551: /* PtAP */
552: /* Check matrix local sizes */
553: PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
554: A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
555: PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
556: A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
558: /* Set the default algorithm */
559: PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
560: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
562: /* Get runtime option */
563: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
564: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
565: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
566: PetscOptionsEnd();
568: PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
569: if (flg) {
570: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
571: PetscFunctionReturn(PETSC_SUCCESS);
572: }
574: PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
575: if (flg) {
576: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
577: PetscFunctionReturn(PETSC_SUCCESS);
578: }
580: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
581: PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
582: PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
583: PetscCall(MatProductSetFromOptions(C));
584: PetscFunctionReturn(PETSC_SUCCESS);
585: }
587: static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
588: {
589: /* This routine requires testing -- first draft only */
590: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
591: Mat P = pp->AIJ;
592: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
593: Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data;
594: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
595: const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
596: const PetscInt *ci = c->i, *cj = c->j, *cjj;
597: const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
598: PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
599: const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
600: MatScalar *ca = c->a, *caj, *apa;
602: PetscFunctionBegin;
603: /* Allocate temporary array for storage of one row of A*P */
604: PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
606: /* Clear old values in C */
607: PetscCall(PetscArrayzero(ca, ci[cm]));
609: for (i = 0; i < am; i++) {
610: /* Form sparse row of A*P */
611: anzi = ai[i + 1] - ai[i];
612: apnzj = 0;
613: for (j = 0; j < anzi; j++) {
614: /* Get offset within block of P */
615: pshift = *aj % ppdof;
616: /* Get block row of P */
617: prow = *aj++ / ppdof; /* integer division */
618: pnzj = pi[prow + 1] - pi[prow];
619: pjj = pj + pi[prow];
620: paj = pa + pi[prow];
621: for (k = 0; k < pnzj; k++) {
622: poffset = pjj[k] * ppdof + pshift;
623: if (!apjdense[poffset]) {
624: apjdense[poffset] = -1;
625: apj[apnzj++] = poffset;
626: }
627: apa[poffset] += (*aa) * paj[k];
628: }
629: PetscCall(PetscLogFlops(2.0 * pnzj));
630: aa++;
631: }
633: /* Sort the j index array for quick sparse axpy. */
634: /* Note: a array does not need sorting as it is in dense storage locations. */
635: PetscCall(PetscSortInt(apnzj, apj));
637: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
638: prow = i / ppdof; /* integer division */
639: pshift = i % ppdof;
640: poffset = pi[prow];
641: pnzi = pi[prow + 1] - poffset;
642: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
643: pJ = pj + poffset;
644: pA = pa + poffset;
645: for (j = 0; j < pnzi; j++) {
646: crow = (*pJ) * ppdof + pshift;
647: cjj = cj + ci[crow];
648: caj = ca + ci[crow];
649: pJ++;
650: /* Perform sparse axpy operation. Note cjj includes apj. */
651: for (k = 0, nextap = 0; nextap < apnzj; k++) {
652: if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
653: }
654: PetscCall(PetscLogFlops(2.0 * apnzj));
655: pA++;
656: }
658: /* Zero the current row info for A*P */
659: for (j = 0; j < apnzj; j++) {
660: apa[apj[j]] = 0.;
661: apjdense[apj[j]] = 0;
662: }
663: }
665: /* Assemble the final matrix and clean up */
666: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
667: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
668: PetscCall(PetscFree3(apa, apj, apjdense));
669: PetscFunctionReturn(PETSC_SUCCESS);
670: }
672: static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
673: {
674: PetscFreeSpaceList free_space = NULL, current_space = NULL;
675: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
676: Mat P = pp->AIJ;
677: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
678: PetscInt *pti, *ptj, *ptJ;
679: PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
680: const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
681: PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
682: MatScalar *ca;
683: const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
685: PetscFunctionBegin;
686: /* Get ij structure of P^T */
687: PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
689: cn = pn * ppdof;
690: /* Allocate ci array, arrays for fill computation and */
691: /* free space for accumulating nonzero column info */
692: PetscCall(PetscMalloc1(cn + 1, &ci));
693: ci[0] = 0;
695: /* Work arrays for rows of P^T*A */
696: PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
697: PetscCall(PetscArrayzero(ptadenserow, an));
698: PetscCall(PetscArrayzero(denserow, cn));
700: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
701: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
702: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
703: PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
704: current_space = free_space;
706: /* Determine symbolic info for each row of C: */
707: for (i = 0; i < pn; i++) {
708: ptnzi = pti[i + 1] - pti[i];
709: ptJ = ptj + pti[i];
710: for (dof = 0; dof < ppdof; dof++) {
711: ptanzi = 0;
712: /* Determine symbolic row of PtA: */
713: for (j = 0; j < ptnzi; j++) {
714: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
715: arow = ptJ[j] * ppdof + dof;
716: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
717: anzj = ai[arow + 1] - ai[arow];
718: ajj = aj + ai[arow];
719: for (k = 0; k < anzj; k++) {
720: if (!ptadenserow[ajj[k]]) {
721: ptadenserow[ajj[k]] = -1;
722: ptasparserow[ptanzi++] = ajj[k];
723: }
724: }
725: }
726: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
727: ptaj = ptasparserow;
728: cnzi = 0;
729: for (j = 0; j < ptanzi; j++) {
730: /* Get offset within block of P */
731: pshift = *ptaj % ppdof;
732: /* Get block row of P */
733: prow = (*ptaj++) / ppdof; /* integer division */
734: /* P has same number of nonzeros per row as the compressed form */
735: pnzj = pi[prow + 1] - pi[prow];
736: pjj = pj + pi[prow];
737: for (k = 0; k < pnzj; k++) {
738: /* Locations in C are shifted by the offset within the block */
739: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
740: if (!denserow[pjj[k] * ppdof + pshift]) {
741: denserow[pjj[k] * ppdof + pshift] = -1;
742: sparserow[cnzi++] = pjj[k] * ppdof + pshift;
743: }
744: }
745: }
747: /* sort sparserow */
748: PetscCall(PetscSortInt(cnzi, sparserow));
750: /* If free space is not available, make more free space */
751: /* Double the amount of total space in the list */
752: if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
754: /* Copy data into free space, and zero out denserows */
755: PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
757: current_space->array += cnzi;
758: current_space->local_used += cnzi;
759: current_space->local_remaining -= cnzi;
761: for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
762: for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
764: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
765: /* For now, we will recompute what is needed. */
766: ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
767: }
768: }
769: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
770: /* Allocate space for cj, initialize cj, and */
771: /* destroy list of free space and other temporary array(s) */
772: PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
773: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
774: PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
776: /* Allocate space for ca */
777: PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
779: /* put together the new matrix */
780: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
781: PetscCall(MatSetBlockSize(C, pp->dof));
783: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
784: /* Since these are PETSc arrays, change flags to free them as necessary. */
785: c = (Mat_SeqAIJ *)(C->data);
786: c->free_a = PETSC_TRUE;
787: c->free_ij = PETSC_TRUE;
788: c->nonew = 0;
790: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
791: C->ops->productnumeric = MatProductNumeric_PtAP;
793: /* Clean up. */
794: PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
795: PetscFunctionReturn(PETSC_SUCCESS);
796: }
798: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
799: {
800: Mat_Product *product = C->product;
801: Mat A = product->A, P = product->B;
803: PetscFunctionBegin;
804: PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
805: PetscFunctionReturn(PETSC_SUCCESS);
806: }
808: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
810: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
811: {
812: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
814: PetscFunctionBegin;
816: PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
817: PetscFunctionReturn(PETSC_SUCCESS);
818: }
820: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
822: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
823: {
824: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
826: PetscFunctionBegin;
827: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
828: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
829: PetscFunctionReturn(PETSC_SUCCESS);
830: }
832: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
834: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
835: {
836: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
838: PetscFunctionBegin;
840: PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
841: PetscFunctionReturn(PETSC_SUCCESS);
842: }
844: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
846: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
847: {
848: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
850: PetscFunctionBegin;
852: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
853: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
854: PetscFunctionReturn(PETSC_SUCCESS);
855: }
857: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
858: {
859: Mat_Product *product = C->product;
860: Mat A = product->A, P = product->B;
861: PetscBool flg;
863: PetscFunctionBegin;
864: PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
865: if (flg) {
866: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
867: C->ops->productnumeric = MatProductNumeric_PtAP;
868: PetscFunctionReturn(PETSC_SUCCESS);
869: }
871: PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
872: if (flg) {
873: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
874: C->ops->productnumeric = MatProductNumeric_PtAP;
875: PetscFunctionReturn(PETSC_SUCCESS);
876: }
878: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
879: }
881: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
882: {
883: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
884: Mat a = b->AIJ, B;
885: Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data;
886: PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
887: PetscInt *cols;
888: PetscScalar *vals;
890: PetscFunctionBegin;
891: PetscCall(MatGetSize(a, &m, &n));
892: PetscCall(PetscMalloc1(dof * m, &ilen));
893: for (i = 0; i < m; i++) {
894: nmax = PetscMax(nmax, aij->ilen[i]);
895: for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
896: }
897: PetscCall(MatCreate(PETSC_COMM_SELF, &B));
898: PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
899: PetscCall(MatSetType(B, newtype));
900: PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
901: PetscCall(PetscFree(ilen));
902: PetscCall(PetscMalloc1(nmax, &icols));
903: ii = 0;
904: for (i = 0; i < m; i++) {
905: PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
906: for (j = 0; j < dof; j++) {
907: for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
908: PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
909: ii++;
910: }
911: PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
912: }
913: PetscCall(PetscFree(icols));
914: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
915: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
917: if (reuse == MAT_INPLACE_MATRIX) {
918: PetscCall(MatHeaderReplace(A, &B));
919: } else {
920: *newmat = B;
921: }
922: PetscFunctionReturn(PETSC_SUCCESS);
923: }
925: #include <../src/mat/impls/aij/mpi/mpiaij.h>
927: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
928: {
929: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data;
930: Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
931: Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
932: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data;
933: Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data;
934: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data;
935: PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
936: PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
937: PetscInt rstart, cstart, *garray, ii, k;
938: PetscScalar *vals, *ovals;
940: PetscFunctionBegin;
941: PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
942: for (i = 0; i < A->rmap->n / dof; i++) {
943: nmax = PetscMax(nmax, AIJ->ilen[i]);
944: onmax = PetscMax(onmax, OAIJ->ilen[i]);
945: for (j = 0; j < dof; j++) {
946: dnz[dof * i + j] = AIJ->ilen[i];
947: onz[dof * i + j] = OAIJ->ilen[i];
948: }
949: }
950: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
951: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
952: PetscCall(MatSetType(B, newtype));
953: PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
954: PetscCall(MatSetBlockSize(B, dof));
955: PetscCall(PetscFree2(dnz, onz));
957: PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
958: rstart = dof * maij->A->rmap->rstart;
959: cstart = dof * maij->A->cmap->rstart;
960: garray = mpiaij->garray;
962: ii = rstart;
963: for (i = 0; i < A->rmap->n / dof; i++) {
964: PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
965: PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
966: for (j = 0; j < dof; j++) {
967: for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
968: for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
969: PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
970: PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
971: ii++;
972: }
973: PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
974: PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
975: }
976: PetscCall(PetscFree2(icols, oicols));
978: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
979: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
981: if (reuse == MAT_INPLACE_MATRIX) {
982: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
983: ((PetscObject)A)->refct = 1;
985: PetscCall(MatHeaderReplace(A, &B));
987: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
988: } else {
989: *newmat = B;
990: }
991: PetscFunctionReturn(PETSC_SUCCESS);
992: }
994: static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
995: {
996: Mat A;
998: PetscFunctionBegin;
999: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1000: PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1001: PetscCall(MatDestroy(&A));
1002: PetscFunctionReturn(PETSC_SUCCESS);
1003: }
1005: static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1006: {
1007: Mat A;
1009: PetscFunctionBegin;
1010: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1011: PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1012: PetscCall(MatDestroy(&A));
1013: PetscFunctionReturn(PETSC_SUCCESS);
1014: }
1016: /*@
1017: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1018: operations for multicomponent problems. It interpolates each component the same
1019: way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices,
1020: and `MATMPIAIJ` for distributed matrices.
1022: Collective
1024: Input Parameters:
1025: + A - the `MATAIJ` matrix describing the action on blocks
1026: - dof - the block size (number of components per node)
1028: Output Parameter:
1029: . maij - the new `MATMAIJ` matrix
1031: Level: advanced
1033: Operations provided:
1034: .vb
1035: MatMult()
1036: MatMultTranspose()
1037: MatMultAdd()
1038: MatMultTransposeAdd()
1039: MatView()
1040: .ve
1042: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
1043: @*/
1044: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1045: {
1046: PetscInt n;
1047: Mat B;
1048: PetscBool flg;
1049: #if defined(PETSC_HAVE_CUDA)
1050: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1051: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1052: #endif
1054: PetscFunctionBegin;
1055: dof = PetscAbs(dof);
1056: PetscCall(PetscObjectReference((PetscObject)A));
1058: if (dof == 1) *maij = A;
1059: else {
1060: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1061: /* propagate vec type */
1062: PetscCall(MatSetVecType(B, A->defaultvectype));
1063: PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1064: PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1065: PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1066: PetscCall(PetscLayoutSetUp(B->rmap));
1067: PetscCall(PetscLayoutSetUp(B->cmap));
1069: B->assembled = PETSC_TRUE;
1071: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1072: if (flg) {
1073: Mat_SeqMAIJ *b;
1075: PetscCall(MatSetType(B, MATSEQMAIJ));
1077: B->ops->setup = NULL;
1078: B->ops->destroy = MatDestroy_SeqMAIJ;
1079: B->ops->view = MatView_SeqMAIJ;
1081: b = (Mat_SeqMAIJ *)B->data;
1082: b->dof = dof;
1083: b->AIJ = A;
1085: if (dof == 2) {
1086: B->ops->mult = MatMult_SeqMAIJ_2;
1087: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1088: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1089: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1090: } else if (dof == 3) {
1091: B->ops->mult = MatMult_SeqMAIJ_3;
1092: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1093: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1094: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1095: } else if (dof == 4) {
1096: B->ops->mult = MatMult_SeqMAIJ_4;
1097: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1098: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1099: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1100: } else if (dof == 5) {
1101: B->ops->mult = MatMult_SeqMAIJ_5;
1102: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1103: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1104: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1105: } else if (dof == 6) {
1106: B->ops->mult = MatMult_SeqMAIJ_6;
1107: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1108: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1109: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1110: } else if (dof == 7) {
1111: B->ops->mult = MatMult_SeqMAIJ_7;
1112: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
1113: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
1114: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1115: } else if (dof == 8) {
1116: B->ops->mult = MatMult_SeqMAIJ_8;
1117: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
1118: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
1119: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1120: } else if (dof == 9) {
1121: B->ops->mult = MatMult_SeqMAIJ_9;
1122: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
1123: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
1124: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1125: } else if (dof == 10) {
1126: B->ops->mult = MatMult_SeqMAIJ_10;
1127: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
1128: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
1129: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1130: } else if (dof == 11) {
1131: B->ops->mult = MatMult_SeqMAIJ_11;
1132: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
1133: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
1134: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1135: } else if (dof == 16) {
1136: B->ops->mult = MatMult_SeqMAIJ_16;
1137: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
1138: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
1139: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1140: } else if (dof == 18) {
1141: B->ops->mult = MatMult_SeqMAIJ_18;
1142: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
1143: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
1144: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1145: } else {
1146: B->ops->mult = MatMult_SeqMAIJ_N;
1147: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
1148: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
1149: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1150: }
1151: #if defined(PETSC_HAVE_CUDA)
1152: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1153: #endif
1154: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1155: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1156: } else {
1157: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1158: Mat_MPIMAIJ *b;
1159: IS from, to;
1160: Vec gvec;
1162: PetscCall(MatSetType(B, MATMPIMAIJ));
1164: B->ops->setup = NULL;
1165: B->ops->destroy = MatDestroy_MPIMAIJ;
1166: B->ops->view = MatView_MPIMAIJ;
1168: b = (Mat_MPIMAIJ *)B->data;
1169: b->dof = dof;
1170: b->A = A;
1172: PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1173: PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1175: PetscCall(VecGetSize(mpiaij->lvec, &n));
1176: PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1177: PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1178: PetscCall(VecSetBlockSize(b->w, dof));
1179: PetscCall(VecSetType(b->w, VECSEQ));
1181: /* create two temporary Index sets for build scatter gather */
1182: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1183: PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1185: /* create temporary global vector to generate scatter context */
1186: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1188: /* generate the scatter context */
1189: PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1191: PetscCall(ISDestroy(&from));
1192: PetscCall(ISDestroy(&to));
1193: PetscCall(VecDestroy(&gvec));
1195: B->ops->mult = MatMult_MPIMAIJ_dof;
1196: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1197: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1198: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1200: #if defined(PETSC_HAVE_CUDA)
1201: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1202: #endif
1203: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1204: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1205: }
1206: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
1207: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1208: PetscCall(MatSetUp(B));
1209: #if defined(PETSC_HAVE_CUDA)
1210: /* temporary until we have CUDA implementation of MAIJ */
1211: {
1212: PetscBool flg;
1213: if (convert) {
1214: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1215: if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1216: }
1217: }
1218: #endif
1219: *maij = B;
1220: }
1221: PetscFunctionReturn(PETSC_SUCCESS);
1222: }