Actual source code: kaij.c
2: /*
3: Defines the basic matrix operations for the KAIJ matrix storage format.
4: This format is used to evaluate matrices of the form:
6: [I \otimes S + A \otimes T]
8: where
9: S is a dense (p \times q) matrix
10: T is a dense (p \times q) matrix
11: A is an AIJ (n \times n) matrix
12: I is the identity matrix
14: The resulting matrix is (np \times nq)
16: We provide:
17: MatMult()
18: MatMultAdd()
19: MatInvertBlockDiagonal()
20: and
21: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
23: This single directory handles both the sequential and parallel codes
24: */
26: #include <../src/mat/impls/kaij/kaij.h>
27: #include <../src/mat/utils/freespace.h>
28: #include <petsc/private/vecimpl.h>
30: /*@C
31: MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
33: Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
35: Input Parameter:
36: . A - the `MATKAIJ` matrix
38: Output Parameter:
39: . B - the `MATAIJ` matrix
41: Level: advanced
43: Note:
44: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
46: .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
47: @*/
48: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
49: {
50: PetscBool ismpikaij, isseqkaij;
52: PetscFunctionBegin;
53: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
54: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij));
55: if (ismpikaij) {
56: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
58: *B = b->A;
59: } else if (isseqkaij) {
60: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
62: *B = b->AIJ;
63: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
67: /*@C
68: MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix
70: Not Collective; the entire `S` is stored and returned independently on all processes.
72: Input Parameter:
73: . A - the `MATKAIJ` matrix
75: Output Parameters:
76: + m - the number of rows in `S`
77: . n - the number of columns in `S`
78: - S - the S matrix, in form of a scalar array in column-major format
80: Level: advanced
82: Note:
83: All output parameters are optional (pass `NULL` if not desired)
85: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
86: @*/
87: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
88: {
89: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
90: PetscFunctionBegin;
91: if (m) *m = b->p;
92: if (n) *n = b->q;
93: if (S) *S = b->S;
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
97: /*@C
98: MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix
100: Not Collective; the entire `S` is stored and returned independently on all processes.
102: Input Parameter:
103: . A - the `MATKAIJ` matrix
105: Output Parameters:
106: + m - the number of rows in `S`
107: . n - the number of columns in `S`
108: - S - the S matrix, in form of a scalar array in column-major format
110: Level: advanced
112: Note:
113: All output parameters are optional (pass `NULL` if not desired)
115: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
116: @*/
117: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
118: {
119: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
120: PetscFunctionBegin;
121: if (m) *m = b->p;
122: if (n) *n = b->q;
123: if (S) *S = b->S;
124: PetscFunctionReturn(PETSC_SUCCESS);
125: }
127: /*@C
128: MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
130: Not Collective
132: Input Parameters:
133: + A - the `MATKAIJ` matrix
134: - S - location of pointer to array obtained with `MatKAIJGetS()`
136: Level: advanced
138: Note:
139: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
140: If `NULL` is passed, it will not attempt to zero the array pointer.
142: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
143: @*/
144: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
145: {
146: PetscFunctionBegin;
147: if (S) *S = NULL;
148: PetscCall(PetscObjectStateIncrease((PetscObject)A));
149: PetscFunctionReturn(PETSC_SUCCESS);
150: }
152: /*@C
153: MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
155: Not Collective
157: Input Parameters:
158: + A - the `MATKAIJ` matrix
159: - S - location of pointer to array obtained with `MatKAIJGetS()`
161: Level: advanced
163: Note:
164: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
165: If `NULL` is passed, it will not attempt to zero the array pointer.
167: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
170: {
171: PetscFunctionBegin;
172: if (S) *S = NULL;
173: PetscFunctionReturn(PETSC_SUCCESS);
174: }
176: /*@C
177: MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix
179: Not Collective; the entire `T` is stored and returned independently on all processes
181: Input Parameter:
182: . A - the `MATKAIJ` matrix
184: Output Parameters:
185: + m - the number of rows in `T`
186: . n - the number of columns in `T`
187: - T - the T matrix, in form of a scalar array in column-major format
189: Level: advanced
191: Note:
192: All output parameters are optional (pass `NULL` if not desired)
194: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
195: @*/
196: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
197: {
198: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
199: PetscFunctionBegin;
200: if (m) *m = b->p;
201: if (n) *n = b->q;
202: if (T) *T = b->T;
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: /*@C
207: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix
209: Not Collective; the entire `T` is stored and returned independently on all processes
211: Input Parameter:
212: . A - the `MATKAIJ` matrix
214: Output Parameters:
215: + m - the number of rows in `T`
216: . n - the number of columns in `T`
217: - T - the T matrix, in form of a scalar array in column-major format
219: Level: advanced
221: Note:
222: All output parameters are optional (pass `NULL` if not desired)
224: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
225: @*/
226: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
227: {
228: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
229: PetscFunctionBegin;
230: if (m) *m = b->p;
231: if (n) *n = b->q;
232: if (T) *T = b->T;
233: PetscFunctionReturn(PETSC_SUCCESS);
234: }
236: /*@C
237: MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
239: Not Collective
241: Input Parameters:
242: + A - the `MATKAIJ` matrix
243: - T - location of pointer to array obtained with `MatKAIJGetS()`
245: Level: advanced
247: Note:
248: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
249: If `NULL` is passed, it will not attempt to zero the array pointer.
251: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
252: @*/
253: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
254: {
255: PetscFunctionBegin;
256: if (T) *T = NULL;
257: PetscCall(PetscObjectStateIncrease((PetscObject)A));
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: /*@C
262: MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
264: Not Collective
266: Input Parameters:
267: + A - the `MATKAIJ` matrix
268: - T - location of pointer to array obtained with `MatKAIJGetS()`
270: Level: advanced
272: Note:
273: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
274: If `NULL` is passed, it will not attempt to zero the array pointer.
276: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
279: {
280: PetscFunctionBegin;
281: if (T) *T = NULL;
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: /*@
286: MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
288: Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
290: Input Parameters:
291: + A - the `MATKAIJ` matrix
292: - B - the `MATAIJ` matrix
294: Level: advanced
296: Notes:
297: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
299: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
301: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
302: @*/
303: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
304: {
305: PetscMPIInt size;
306: PetscBool flg;
308: PetscFunctionBegin;
309: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
310: if (size == 1) {
311: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg));
312: PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name);
313: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
314: a->AIJ = B;
315: } else {
316: Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
317: a->A = B;
318: }
319: PetscCall(PetscObjectReference((PetscObject)B));
320: PetscFunctionReturn(PETSC_SUCCESS);
321: }
323: /*@C
324: MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix
326: Logically Collective; the entire `S` is stored independently on all processes.
328: Input Parameters:
329: + A - the `MATKAIJ` matrix
330: . p - the number of rows in `S`
331: . q - the number of columns in `S`
332: - S - the S matrix, in form of a scalar array in column-major format
334: Level: advanced
336: Notes:
337: The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix.
339: The `S` matrix is copied, so the user can destroy this array.
341: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
342: @*/
343: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
344: {
345: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
347: PetscFunctionBegin;
348: PetscCall(PetscFree(a->S));
349: if (S) {
350: PetscCall(PetscMalloc1(p * q, &a->S));
351: PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar)));
352: } else a->S = NULL;
354: a->p = p;
355: a->q = q;
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }
359: /*@C
360: MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities.
362: Logically Collective.
364: Input Parameter:
365: . A - the `MATKAIJ` matrix
367: Output Parameter:
368: . identity - the Boolean value
370: Level: Advanced
372: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
373: @*/
374: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
375: {
376: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
377: PetscInt i, j;
379: PetscFunctionBegin;
380: if (a->p != a->q) {
381: *identity = PETSC_FALSE;
382: PetscFunctionReturn(PETSC_SUCCESS);
383: } else *identity = PETSC_TRUE;
384: if (!a->isTI || a->S) {
385: for (i = 0; i < a->p && *identity; i++) {
386: for (j = 0; j < a->p && *identity; j++) {
387: if (i != j) {
388: if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389: if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
390: } else {
391: if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392: if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
393: }
394: }
395: }
396: }
397: PetscFunctionReturn(PETSC_SUCCESS);
398: }
400: /*@C
401: MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix
403: Logically Collective; the entire `T` is stored independently on all processes.
405: Input Parameters:
406: + A - the `MATKAIJ` matrix
407: . p - the number of rows in `S`
408: . q - the number of columns in `S`
409: - T - the `T` matrix, in form of a scalar array in column-major format
411: Level: Advanced
413: Notes:
414: The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix.
416: The `T` matrix is copied, so the user can destroy this array.
418: .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
419: @*/
420: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
421: {
422: PetscInt i, j;
423: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
424: PetscBool isTI = PETSC_FALSE;
426: PetscFunctionBegin;
427: /* check if T is an identity matrix */
428: if (T && (p == q)) {
429: isTI = PETSC_TRUE;
430: for (i = 0; i < p; i++) {
431: for (j = 0; j < q; j++) {
432: if (i == j) {
433: /* diagonal term must be 1 */
434: if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
435: } else {
436: /* off-diagonal term must be 0 */
437: if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
438: }
439: }
440: }
441: }
442: a->isTI = isTI;
444: PetscCall(PetscFree(a->T));
445: if (T && (!isTI)) {
446: PetscCall(PetscMalloc1(p * q, &a->T));
447: PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar)));
448: } else a->T = NULL;
450: a->p = p;
451: a->q = q;
452: PetscFunctionReturn(PETSC_SUCCESS);
453: }
455: static PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
456: {
457: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
459: PetscFunctionBegin;
460: PetscCall(MatDestroy(&b->AIJ));
461: PetscCall(PetscFree(b->S));
462: PetscCall(PetscFree(b->T));
463: PetscCall(PetscFree(b->ibdiag));
464: PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr));
465: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL));
466: PetscCall(PetscFree(A->data));
467: PetscFunctionReturn(PETSC_SUCCESS);
468: }
470: static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
471: {
472: Mat_MPIKAIJ *a;
473: Mat_MPIAIJ *mpiaij;
474: PetscScalar *T;
475: PetscInt i, j;
476: PetscObjectState state;
478: PetscFunctionBegin;
479: a = (Mat_MPIKAIJ *)A->data;
480: mpiaij = (Mat_MPIAIJ *)a->A->data;
482: PetscCall(PetscObjectStateGet((PetscObject)a->A, &state));
483: if (state == a->state) {
484: /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
485: PetscFunctionReturn(PETSC_SUCCESS);
486: } else {
487: PetscCall(MatDestroy(&a->AIJ));
488: PetscCall(MatDestroy(&a->OAIJ));
489: if (a->isTI) {
490: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
491: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
492: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
493: * to pass in. */
494: PetscCall(PetscMalloc1(a->p * a->q, &T));
495: for (i = 0; i < a->p; i++) {
496: for (j = 0; j < a->q; j++) {
497: if (i == j) T[i + j * a->p] = 1.0;
498: else T[i + j * a->p] = 0.0;
499: }
500: }
501: } else T = a->T;
502: PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ));
503: PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ));
504: if (a->isTI) PetscCall(PetscFree(T));
505: a->state = state;
506: }
508: PetscFunctionReturn(PETSC_SUCCESS);
509: }
511: static PetscErrorCode MatSetUp_KAIJ(Mat A)
512: {
513: PetscInt n;
514: PetscMPIInt size;
515: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
517: PetscFunctionBegin;
518: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
519: if (size == 1) {
520: PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N));
521: PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
522: PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
523: PetscCall(PetscLayoutSetUp(A->rmap));
524: PetscCall(PetscLayoutSetUp(A->cmap));
525: } else {
526: Mat_MPIKAIJ *a;
527: Mat_MPIAIJ *mpiaij;
528: IS from, to;
529: Vec gvec;
531: a = (Mat_MPIKAIJ *)A->data;
532: mpiaij = (Mat_MPIAIJ *)a->A->data;
533: PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N));
534: PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p));
535: PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q));
536: PetscCall(PetscLayoutSetUp(A->rmap));
537: PetscCall(PetscLayoutSetUp(A->cmap));
539: PetscCall(MatKAIJ_build_AIJ_OAIJ(A));
541: PetscCall(VecGetSize(mpiaij->lvec, &n));
542: PetscCall(VecCreate(PETSC_COMM_SELF, &a->w));
543: PetscCall(VecSetSizes(a->w, n * a->q, n * a->q));
544: PetscCall(VecSetBlockSize(a->w, a->q));
545: PetscCall(VecSetType(a->w, VECSEQ));
547: /* create two temporary Index sets for build scatter gather */
548: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
549: PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to));
551: /* create temporary global vector to generate scatter context */
552: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec));
554: /* generate the scatter context */
555: PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx));
557: PetscCall(ISDestroy(&from));
558: PetscCall(ISDestroy(&to));
559: PetscCall(VecDestroy(&gvec));
560: }
562: A->assembled = PETSC_TRUE;
563: PetscFunctionReturn(PETSC_SUCCESS);
564: }
566: static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
567: {
568: PetscViewerFormat format;
569: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
570: Mat B;
571: PetscInt i;
572: PetscBool ismpikaij;
574: PetscFunctionBegin;
575: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
576: PetscCall(PetscViewerGetFormat(viewer, &format));
577: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578: PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q));
580: /* Print appropriate details for S. */
581: if (!a->S) {
582: PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n"));
583: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584: PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are "));
585: for (i = 0; i < (a->p * a->q); i++) {
586: #if defined(PETSC_USE_COMPLEX)
587: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i])));
588: #else
589: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i])));
590: #endif
591: }
592: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
593: }
595: /* Print appropriate details for T. */
596: if (a->isTI) {
597: PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n"));
598: } else if (!a->T) {
599: PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n"));
600: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601: PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are "));
602: for (i = 0; i < (a->p * a->q); i++) {
603: #if defined(PETSC_USE_COMPLEX)
604: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i])));
605: #else
606: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i])));
607: #endif
608: }
609: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
610: }
612: /* Now print details for the AIJ matrix, using the AIJ viewer. */
613: PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n"));
614: if (ismpikaij) {
615: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
616: PetscCall(MatView(b->A, viewer));
617: } else {
618: PetscCall(MatView(a->AIJ, viewer));
619: }
621: } else {
622: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623: PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B));
624: PetscCall(MatView(B, viewer));
625: PetscCall(MatDestroy(&B));
626: }
627: PetscFunctionReturn(PETSC_SUCCESS);
628: }
630: static PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
631: {
632: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
634: PetscFunctionBegin;
635: PetscCall(MatDestroy(&b->AIJ));
636: PetscCall(MatDestroy(&b->OAIJ));
637: PetscCall(MatDestroy(&b->A));
638: PetscCall(VecScatterDestroy(&b->ctx));
639: PetscCall(VecDestroy(&b->w));
640: PetscCall(PetscFree(b->S));
641: PetscCall(PetscFree(b->T));
642: PetscCall(PetscFree(b->ibdiag));
643: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL));
644: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL));
645: PetscCall(PetscFree(A->data));
646: PetscFunctionReturn(PETSC_SUCCESS);
647: }
649: /* zz = yy + Axx */
650: static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
651: {
652: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
653: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
654: const PetscScalar *s = b->S, *t = b->T;
655: const PetscScalar *x, *v, *bx;
656: PetscScalar *y, *sums;
657: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
658: PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k;
660: PetscFunctionBegin;
661: if (!yy) {
662: PetscCall(VecSet(zz, 0.0));
663: } else {
664: PetscCall(VecCopy(yy, zz));
665: }
666: if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS);
668: PetscCall(VecGetArrayRead(xx, &x));
669: PetscCall(VecGetArray(zz, &y));
670: idx = a->j;
671: v = a->a;
672: ii = a->i;
674: if (b->isTI) {
675: for (i = 0; i < m; i++) {
676: jrow = ii[i];
677: n = ii[i + 1] - jrow;
678: sums = y + p * i;
679: for (j = 0; j < n; j++) {
680: for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
681: }
682: }
683: PetscCall(PetscLogFlops(3.0 * (a->nz) * p));
684: } else if (t) {
685: for (i = 0; i < m; i++) {
686: jrow = ii[i];
687: n = ii[i + 1] - jrow;
688: sums = y + p * i;
689: for (j = 0; j < n; j++) {
690: for (k = 0; k < p; k++) {
691: for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
692: }
693: }
694: }
695: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
696: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
697: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
698: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
699: PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz));
700: }
701: if (s) {
702: for (i = 0; i < m; i++) {
703: sums = y + p * i;
704: bx = x + q * i;
705: if (i < b->AIJ->cmap->n) {
706: for (j = 0; j < q; j++) {
707: for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
708: }
709: }
710: }
711: PetscCall(PetscLogFlops(2.0 * m * p * q));
712: }
714: PetscCall(VecRestoreArrayRead(xx, &x));
715: PetscCall(VecRestoreArray(zz, &y));
716: PetscFunctionReturn(PETSC_SUCCESS);
717: }
719: static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
720: {
721: PetscFunctionBegin;
722: PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy));
723: PetscFunctionReturn(PETSC_SUCCESS);
724: }
726: #include <petsc/private/kernels/blockinvert.h>
728: static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
729: {
730: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
731: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
732: const PetscScalar *S = b->S;
733: const PetscScalar *T = b->T;
734: const PetscScalar *v = a->a;
735: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
736: PetscInt i, j, *v_pivots, dof, dof2;
737: PetscScalar *diag, aval, *v_work;
739: PetscFunctionBegin;
740: PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse.");
741: PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix.");
743: dof = p;
744: dof2 = dof * dof;
746: if (b->ibdiagvalid) {
747: if (values) *values = b->ibdiag;
748: PetscFunctionReturn(PETSC_SUCCESS);
749: }
750: if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag));
751: if (values) *values = b->ibdiag;
752: diag = b->ibdiag;
754: PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots));
755: for (i = 0; i < m; i++) {
756: if (S) {
757: PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar)));
758: } else {
759: PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar)));
760: }
761: if (b->isTI) {
762: aval = 0;
763: for (j = ii[i]; j < ii[i + 1]; j++)
764: if (idx[j] == i) aval = v[j];
765: for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
766: } else if (T) {
767: aval = 0;
768: for (j = ii[i]; j < ii[i + 1]; j++)
769: if (idx[j] == i) aval = v[j];
770: for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
771: }
772: PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL));
773: diag += dof2;
774: }
775: PetscCall(PetscFree2(v_work, v_pivots));
777: b->ibdiagvalid = PETSC_TRUE;
778: PetscFunctionReturn(PETSC_SUCCESS);
779: }
781: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
782: {
783: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
785: PetscFunctionBegin;
786: *B = kaij->AIJ;
787: PetscFunctionReturn(PETSC_SUCCESS);
788: }
790: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
791: {
792: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
793: Mat AIJ, OAIJ, B;
794: PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
795: const PetscInt p = a->p, q = a->q;
796: PetscBool ismpikaij, missing;
798: PetscFunctionBegin;
799: if (reuse != MAT_REUSE_MATRIX) {
800: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij));
801: if (ismpikaij) {
802: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
803: AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
804: OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
805: } else {
806: AIJ = a->AIJ;
807: OAIJ = NULL;
808: }
809: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
810: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
811: PetscCall(MatSetType(B, MATAIJ));
812: PetscCall(MatGetSize(AIJ, &m, NULL));
813: PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */
814: if (!missing || !a->S) d = m;
815: PetscCall(PetscMalloc1(m * p, &d_nnz));
816: for (i = 0; i < m; ++i) {
817: PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
818: for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
819: PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL));
820: }
821: if (OAIJ) {
822: PetscCall(PetscMalloc1(m * p, &o_nnz));
823: for (i = 0; i < m; ++i) {
824: PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
825: for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
826: PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL));
827: }
828: PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
829: } else {
830: PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz));
831: }
832: PetscCall(PetscFree(d_nnz));
833: PetscCall(PetscFree(o_nnz));
834: } else B = *newmat;
835: PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B));
836: if (reuse == MAT_INPLACE_MATRIX) {
837: PetscCall(MatHeaderReplace(A, &B));
838: } else *newmat = B;
839: PetscFunctionReturn(PETSC_SUCCESS);
840: }
842: static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
843: {
844: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data;
845: Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data;
846: const PetscScalar *aa = a->a, *T = kaij->T, *v;
847: const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
848: const PetscScalar *b, *xb, *idiag;
849: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
850: PetscInt i, j, k, i2, bs, bs2, nz;
852: PetscFunctionBegin;
853: its = its * lits;
854: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
855: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
856: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
857: PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
858: PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks");
859: bs = p;
860: bs2 = bs * bs;
862: if (!m) PetscFunctionReturn(PETSC_SUCCESS);
864: if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL));
865: idiag = kaij->ibdiag;
866: diag = a->diag;
868: if (!kaij->sor.setup) {
869: PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr));
870: kaij->sor.setup = PETSC_TRUE;
871: }
872: y = kaij->sor.y;
873: w = kaij->sor.w;
874: work = kaij->sor.work;
875: t = kaij->sor.t;
876: arr = kaij->sor.arr;
878: PetscCall(VecGetArray(xx, &x));
879: PetscCall(VecGetArrayRead(bb, &b));
881: if (flag & SOR_ZERO_INITIAL_GUESS) {
882: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
883: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
884: PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar)));
885: i2 = bs;
886: idiag += bs2;
887: for (i = 1; i < m; i++) {
888: v = aa + ai[i];
889: vi = aj + ai[i];
890: nz = diag[i] - ai[i];
892: if (T) { /* b - T (Arow * x) */
893: PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar)));
894: for (j = 0; j < nz; j++) {
895: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
896: }
897: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
898: for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
899: } else if (kaij->isTI) {
900: PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
901: for (j = 0; j < nz; j++) {
902: for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
903: }
904: } else {
905: PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar)));
906: }
908: PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
909: for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
911: idiag += bs2;
912: i2 += bs;
913: }
914: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
915: PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
916: xb = t;
917: } else xb = b;
918: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
919: idiag = kaij->ibdiag + bs2 * (m - 1);
920: i2 = bs * (m - 1);
921: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
922: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
923: i2 -= bs;
924: idiag -= bs2;
925: for (i = m - 2; i >= 0; i--) {
926: v = aa + diag[i] + 1;
927: vi = aj + diag[i] + 1;
928: nz = ai[i + 1] - diag[i] - 1;
930: if (T) { /* FIXME: This branch untested */
931: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
932: /* copy all rows of x that are needed into contiguous space */
933: workt = work;
934: for (j = 0; j < nz; j++) {
935: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
936: workt += bs;
937: }
938: arrt = arr;
939: for (j = 0; j < nz; j++) {
940: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
941: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
942: arrt += bs2;
943: }
944: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
945: } else if (kaij->isTI) {
946: PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar)));
947: for (j = 0; j < nz; j++) {
948: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
949: }
950: }
952: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
953: for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
955: idiag -= bs2;
956: i2 -= bs;
957: }
958: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
959: }
960: its--;
961: }
962: while (its--) { /* FIXME: This branch not updated */
963: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
964: i2 = 0;
965: idiag = kaij->ibdiag;
966: for (i = 0; i < m; i++) {
967: PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
969: v = aa + ai[i];
970: vi = aj + ai[i];
971: nz = diag[i] - ai[i];
972: workt = work;
973: for (j = 0; j < nz; j++) {
974: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
975: workt += bs;
976: }
977: arrt = arr;
978: if (T) {
979: for (j = 0; j < nz; j++) {
980: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
981: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
982: arrt += bs2;
983: }
984: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
985: } else if (kaij->isTI) {
986: for (j = 0; j < nz; j++) {
987: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
988: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
989: arrt += bs2;
990: }
991: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
992: }
993: PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar)));
995: v = aa + diag[i] + 1;
996: vi = aj + diag[i] + 1;
997: nz = ai[i + 1] - diag[i] - 1;
998: workt = work;
999: for (j = 0; j < nz; j++) {
1000: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1001: workt += bs;
1002: }
1003: arrt = arr;
1004: if (T) {
1005: for (j = 0; j < nz; j++) {
1006: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1007: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1008: arrt += bs2;
1009: }
1010: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1011: } else if (kaij->isTI) {
1012: for (j = 0; j < nz; j++) {
1013: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1014: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1015: arrt += bs2;
1016: }
1017: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1018: }
1020: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1021: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1023: idiag += bs2;
1024: i2 += bs;
1025: }
1026: xb = t;
1027: } else xb = b;
1028: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1029: idiag = kaij->ibdiag + bs2 * (m - 1);
1030: i2 = bs * (m - 1);
1031: if (xb == b) {
1032: for (i = m - 1; i >= 0; i--) {
1033: PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar)));
1035: v = aa + ai[i];
1036: vi = aj + ai[i];
1037: nz = diag[i] - ai[i];
1038: workt = work;
1039: for (j = 0; j < nz; j++) {
1040: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1041: workt += bs;
1042: }
1043: arrt = arr;
1044: if (T) {
1045: for (j = 0; j < nz; j++) {
1046: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1047: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1048: arrt += bs2;
1049: }
1050: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1051: } else if (kaij->isTI) {
1052: for (j = 0; j < nz; j++) {
1053: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1054: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1055: arrt += bs2;
1056: }
1057: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1058: }
1060: v = aa + diag[i] + 1;
1061: vi = aj + diag[i] + 1;
1062: nz = ai[i + 1] - diag[i] - 1;
1063: workt = work;
1064: for (j = 0; j < nz; j++) {
1065: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1066: workt += bs;
1067: }
1068: arrt = arr;
1069: if (T) {
1070: for (j = 0; j < nz; j++) {
1071: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1072: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1073: arrt += bs2;
1074: }
1075: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1076: } else if (kaij->isTI) {
1077: for (j = 0; j < nz; j++) {
1078: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1079: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1080: arrt += bs2;
1081: }
1082: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1083: }
1085: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1086: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1087: }
1088: } else {
1089: for (i = m - 1; i >= 0; i--) {
1090: PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar)));
1091: v = aa + diag[i] + 1;
1092: vi = aj + diag[i] + 1;
1093: nz = ai[i + 1] - diag[i] - 1;
1094: workt = work;
1095: for (j = 0; j < nz; j++) {
1096: PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar)));
1097: workt += bs;
1098: }
1099: arrt = arr;
1100: if (T) {
1101: for (j = 0; j < nz; j++) {
1102: PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar)));
1103: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1104: arrt += bs2;
1105: }
1106: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1107: } else if (kaij->isTI) {
1108: for (j = 0; j < nz; j++) {
1109: PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar)));
1110: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1111: arrt += bs2;
1112: }
1113: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1114: }
1115: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1116: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1117: }
1118: }
1119: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
1120: }
1121: }
1123: PetscCall(VecRestoreArray(xx, &x));
1124: PetscCall(VecRestoreArrayRead(bb, &b));
1125: PetscFunctionReturn(PETSC_SUCCESS);
1126: }
1128: /*===================================================================================*/
1130: static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1131: {
1132: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1134: PetscFunctionBegin;
1135: if (!yy) {
1136: PetscCall(VecSet(zz, 0.0));
1137: } else {
1138: PetscCall(VecCopy(yy, zz));
1139: }
1140: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1141: /* start the scatter */
1142: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1143: PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz));
1144: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
1145: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
1146: PetscFunctionReturn(PETSC_SUCCESS);
1147: }
1149: static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1150: {
1151: PetscFunctionBegin;
1152: PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy));
1153: PetscFunctionReturn(PETSC_SUCCESS);
1154: }
1156: static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1157: {
1158: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1160: PetscFunctionBegin;
1161: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1162: PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values));
1163: PetscFunctionReturn(PETSC_SUCCESS);
1164: }
1166: static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1167: {
1168: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
1169: PetscBool diag = PETSC_FALSE;
1170: PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1171: PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1173: PetscFunctionBegin;
1174: PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1175: b->getrowactive = PETSC_TRUE;
1176: PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1178: if ((!S) && (!T) && (!b->isTI)) {
1179: if (ncols) *ncols = 0;
1180: if (cols) *cols = NULL;
1181: if (values) *values = NULL;
1182: PetscFunctionReturn(PETSC_SUCCESS);
1183: }
1185: if (T || b->isTI) {
1186: PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij));
1187: c = nzaij;
1188: for (i = 0; i < nzaij; i++) {
1189: /* check if this row contains a diagonal entry */
1190: if (colsaij[i] == r) {
1191: diag = PETSC_TRUE;
1192: c = i;
1193: }
1194: }
1195: } else nzaij = c = 0;
1197: /* calculate size of row */
1198: nz = 0;
1199: if (S) nz += q;
1200: if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1202: if (cols || values) {
1203: PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1204: for (i = 0; i < q; i++) {
1205: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1206: v[i] = 0.0;
1207: }
1208: if (b->isTI) {
1209: for (i = 0; i < nzaij; i++) {
1210: for (j = 0; j < q; j++) {
1211: idx[i * q + j] = colsaij[i] * q + j;
1212: v[i * q + j] = (j == s ? vaij[i] : 0);
1213: }
1214: }
1215: } else if (T) {
1216: for (i = 0; i < nzaij; i++) {
1217: for (j = 0; j < q; j++) {
1218: idx[i * q + j] = colsaij[i] * q + j;
1219: v[i * q + j] = vaij[i] * T[s + j * p];
1220: }
1221: }
1222: }
1223: if (S) {
1224: for (j = 0; j < q; j++) {
1225: idx[c * q + j] = r * q + j;
1226: v[c * q + j] += S[s + j * p];
1227: }
1228: }
1229: }
1231: if (ncols) *ncols = nz;
1232: if (cols) *cols = idx;
1233: if (values) *values = v;
1234: PetscFunctionReturn(PETSC_SUCCESS);
1235: }
1237: static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1238: {
1239: PetscFunctionBegin;
1240: if (nz) *nz = 0;
1241: PetscCall(PetscFree2(*idx, *v));
1242: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1243: PetscFunctionReturn(PETSC_SUCCESS);
1244: }
1246: static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1247: {
1248: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1249: Mat AIJ = b->A;
1250: PetscBool diag = PETSC_FALSE;
1251: Mat MatAIJ, MatOAIJ;
1252: const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1253: PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1254: PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T;
1256: PetscFunctionBegin;
1257: PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1258: MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1259: MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1260: PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1261: b->getrowactive = PETSC_TRUE;
1262: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1263: lrow = row - rstart;
1265: if ((!S) && (!T) && (!b->isTI)) {
1266: if (ncols) *ncols = 0;
1267: if (cols) *cols = NULL;
1268: if (values) *values = NULL;
1269: PetscFunctionReturn(PETSC_SUCCESS);
1270: }
1272: r = lrow / p;
1273: s = lrow % p;
1275: if (T || b->isTI) {
1276: PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray));
1277: PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals));
1278: PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals));
1280: c = ncolsaij + ncolsoaij;
1281: for (i = 0; i < ncolsaij; i++) {
1282: /* check if this row contains a diagonal entry */
1283: if (colsaij[i] == r) {
1284: diag = PETSC_TRUE;
1285: c = i;
1286: }
1287: }
1288: } else c = 0;
1290: /* calculate size of row */
1291: nz = 0;
1292: if (S) nz += q;
1293: if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1295: if (cols || values) {
1296: PetscCall(PetscMalloc2(nz, &idx, nz, &v));
1297: for (i = 0; i < q; i++) {
1298: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1299: v[i] = 0.0;
1300: }
1301: if (b->isTI) {
1302: for (i = 0; i < ncolsaij; i++) {
1303: for (j = 0; j < q; j++) {
1304: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1305: v[i * q + j] = (j == s ? vals[i] : 0.0);
1306: }
1307: }
1308: for (i = 0; i < ncolsoaij; i++) {
1309: for (j = 0; j < q; j++) {
1310: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1311: v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0);
1312: }
1313: }
1314: } else if (T) {
1315: for (i = 0; i < ncolsaij; i++) {
1316: for (j = 0; j < q; j++) {
1317: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1318: v[i * q + j] = vals[i] * T[s + j * p];
1319: }
1320: }
1321: for (i = 0; i < ncolsoaij; i++) {
1322: for (j = 0; j < q; j++) {
1323: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1324: v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p];
1325: }
1326: }
1327: }
1328: if (S) {
1329: for (j = 0; j < q; j++) {
1330: idx[c * q + j] = (r + rstart / p) * q + j;
1331: v[c * q + j] += S[s + j * p];
1332: }
1333: }
1334: }
1336: if (ncols) *ncols = nz;
1337: if (cols) *cols = idx;
1338: if (values) *values = v;
1339: PetscFunctionReturn(PETSC_SUCCESS);
1340: }
1342: static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1343: {
1344: PetscFunctionBegin;
1345: PetscCall(PetscFree2(*idx, *v));
1346: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1347: PetscFunctionReturn(PETSC_SUCCESS);
1348: }
1350: static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1351: {
1352: Mat A;
1354: PetscFunctionBegin;
1355: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1356: PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
1357: PetscCall(MatDestroy(&A));
1358: PetscFunctionReturn(PETSC_SUCCESS);
1359: }
1361: /*@C
1362: MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form
1363: .vb
1364: [I \otimes S + A \otimes T]
1365: .ve
1366: where
1367: .vb
1368: S is a dense (p \times q) matrix
1369: T is a dense (p \times q) matrix
1370: A is a `MATAIJ` (n \times n) matrix
1371: I is the identity matrix
1372: .ve
1373: The resulting matrix is (np \times nq)
1375: `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1377: Collective
1379: Input Parameters:
1380: + A - the `MATAIJ` matrix
1381: . p - number of rows in `S` and `T`
1382: . q - number of columns in `S` and `T`
1383: . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1384: - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major)
1386: Output Parameter:
1387: . kaij - the new `MATKAIJ` matrix
1389: Level: advanced
1391: Notes:
1392: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1394: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1396: Developer Note:
1397: In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1398: of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily
1399: rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added,
1400: routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1402: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1403: @*/
1404: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1405: {
1406: PetscFunctionBegin;
1407: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij));
1408: PetscCall(MatSetType(*kaij, MATKAIJ));
1409: PetscCall(MatKAIJSetAIJ(*kaij, A));
1410: PetscCall(MatKAIJSetS(*kaij, p, q, S));
1411: PetscCall(MatKAIJSetT(*kaij, p, q, T));
1412: PetscCall(MatSetUp(*kaij));
1413: PetscFunctionReturn(PETSC_SUCCESS);
1414: }
1416: /*MC
1417: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1418: [I \otimes S + A \otimes T],
1419: where
1420: .vb
1421: S is a dense (p \times q) matrix,
1422: T is a dense (p \times q) matrix,
1423: A is an AIJ (n \times n) matrix,
1424: and I is the identity matrix.
1425: .ve
1426: The resulting matrix is (np \times nq).
1428: S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1430: Level: advanced
1432: Note:
1433: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1434: where x and b are column vectors containing the row-major representations of X and B.
1436: .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1437: M*/
1439: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1440: {
1441: Mat_MPIKAIJ *b;
1442: PetscMPIInt size;
1444: PetscFunctionBegin;
1445: PetscCall(PetscNew(&b));
1446: A->data = (void *)b;
1448: PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
1450: b->w = NULL;
1451: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1452: if (size == 1) {
1453: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ));
1454: A->ops->destroy = MatDestroy_SeqKAIJ;
1455: A->ops->mult = MatMult_SeqKAIJ;
1456: A->ops->multadd = MatMultAdd_SeqKAIJ;
1457: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1458: A->ops->getrow = MatGetRow_SeqKAIJ;
1459: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1460: A->ops->sor = MatSOR_SeqKAIJ;
1461: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ));
1462: } else {
1463: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ));
1464: A->ops->destroy = MatDestroy_MPIKAIJ;
1465: A->ops->mult = MatMult_MPIKAIJ;
1466: A->ops->multadd = MatMultAdd_MPIKAIJ;
1467: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1468: A->ops->getrow = MatGetRow_MPIKAIJ;
1469: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1470: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ));
1471: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ));
1472: }
1473: A->ops->setup = MatSetUp_KAIJ;
1474: A->ops->view = MatView_KAIJ;
1475: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1476: PetscFunctionReturn(PETSC_SUCCESS);
1477: }