Actual source code: blockmat.c
2: /*
3: This provides a matrix that consists of Mats
4: */
6: #include <petsc/private/matimpl.h>
7: #include <../src/mat/impls/baij/seq/baij.h>
9: typedef struct {
10: SEQAIJHEADER(Mat);
11: SEQBAIJHEADER;
12: Mat *diags;
14: Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
15: } Mat_BlockMat;
17: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
18: {
19: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
20: PetscScalar *x;
21: const Mat *v;
22: const PetscScalar *b;
23: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
24: const PetscInt *idx;
25: IS row, col;
26: MatFactorInfo info;
27: Vec left = a->left, right = a->right, middle = a->middle;
28: Mat *diag;
30: PetscFunctionBegin;
31: its = its * lits;
32: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
33: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
34: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
35: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
36: PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
38: if (!a->diags) {
39: PetscCall(PetscMalloc1(mbs, &a->diags));
40: PetscCall(MatFactorInfoInitialize(&info));
41: for (i = 0; i < mbs; i++) {
42: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
43: PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
44: PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
45: PetscCall(ISDestroy(&row));
46: PetscCall(ISDestroy(&col));
47: }
48: PetscCall(VecDuplicate(bb, &a->workb));
49: }
50: diag = a->diags;
52: PetscCall(VecSet(xx, 0.0));
53: PetscCall(VecGetArray(xx, &x));
54: /* copy right hand side because it must be modified during iteration */
55: PetscCall(VecCopy(bb, a->workb));
56: PetscCall(VecGetArrayRead(a->workb, &b));
58: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
59: while (its--) {
60: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
61: for (i = 0; i < mbs; i++) {
62: n = a->i[i + 1] - a->i[i] - 1;
63: idx = a->j + a->i[i] + 1;
64: v = a->a + a->i[i] + 1;
66: PetscCall(VecSet(left, 0.0));
67: for (j = 0; j < n; j++) {
68: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
69: PetscCall(MatMultAdd(v[j], right, left, left));
70: PetscCall(VecResetArray(right));
71: }
72: PetscCall(VecPlaceArray(right, b + i * bs));
73: PetscCall(VecAYPX(left, -1.0, right));
74: PetscCall(VecResetArray(right));
76: PetscCall(VecPlaceArray(right, x + i * bs));
77: PetscCall(MatSolve(diag[i], left, right));
79: /* now adjust right hand side, see MatSOR_SeqSBAIJ */
80: for (j = 0; j < n; j++) {
81: PetscCall(MatMultTranspose(v[j], right, left));
82: PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
83: PetscCall(VecAXPY(middle, -1.0, left));
84: PetscCall(VecResetArray(middle));
85: }
86: PetscCall(VecResetArray(right));
87: }
88: }
89: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
90: for (i = mbs - 1; i >= 0; i--) {
91: n = a->i[i + 1] - a->i[i] - 1;
92: idx = a->j + a->i[i] + 1;
93: v = a->a + a->i[i] + 1;
95: PetscCall(VecSet(left, 0.0));
96: for (j = 0; j < n; j++) {
97: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
98: PetscCall(MatMultAdd(v[j], right, left, left));
99: PetscCall(VecResetArray(right));
100: }
101: PetscCall(VecPlaceArray(right, b + i * bs));
102: PetscCall(VecAYPX(left, -1.0, right));
103: PetscCall(VecResetArray(right));
105: PetscCall(VecPlaceArray(right, x + i * bs));
106: PetscCall(MatSolve(diag[i], left, right));
107: PetscCall(VecResetArray(right));
108: }
109: }
110: }
111: PetscCall(VecRestoreArray(xx, &x));
112: PetscCall(VecRestoreArrayRead(a->workb, &b));
113: PetscFunctionReturn(PETSC_SUCCESS);
114: }
116: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
117: {
118: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
119: PetscScalar *x;
120: const Mat *v;
121: const PetscScalar *b;
122: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
123: const PetscInt *idx;
124: IS row, col;
125: MatFactorInfo info;
126: Vec left = a->left, right = a->right;
127: Mat *diag;
129: PetscFunctionBegin;
130: its = its * lits;
131: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
132: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
133: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
134: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
136: if (!a->diags) {
137: PetscCall(PetscMalloc1(mbs, &a->diags));
138: PetscCall(MatFactorInfoInitialize(&info));
139: for (i = 0; i < mbs; i++) {
140: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
141: PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
142: PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
143: PetscCall(ISDestroy(&row));
144: PetscCall(ISDestroy(&col));
145: }
146: }
147: diag = a->diags;
149: PetscCall(VecSet(xx, 0.0));
150: PetscCall(VecGetArray(xx, &x));
151: PetscCall(VecGetArrayRead(bb, &b));
153: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
154: while (its--) {
155: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
156: for (i = 0; i < mbs; i++) {
157: n = a->i[i + 1] - a->i[i];
158: idx = a->j + a->i[i];
159: v = a->a + a->i[i];
161: PetscCall(VecSet(left, 0.0));
162: for (j = 0; j < n; j++) {
163: if (idx[j] != i) {
164: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
165: PetscCall(MatMultAdd(v[j], right, left, left));
166: PetscCall(VecResetArray(right));
167: }
168: }
169: PetscCall(VecPlaceArray(right, b + i * bs));
170: PetscCall(VecAYPX(left, -1.0, right));
171: PetscCall(VecResetArray(right));
173: PetscCall(VecPlaceArray(right, x + i * bs));
174: PetscCall(MatSolve(diag[i], left, right));
175: PetscCall(VecResetArray(right));
176: }
177: }
178: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
179: for (i = mbs - 1; i >= 0; i--) {
180: n = a->i[i + 1] - a->i[i];
181: idx = a->j + a->i[i];
182: v = a->a + a->i[i];
184: PetscCall(VecSet(left, 0.0));
185: for (j = 0; j < n; j++) {
186: if (idx[j] != i) {
187: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
188: PetscCall(MatMultAdd(v[j], right, left, left));
189: PetscCall(VecResetArray(right));
190: }
191: }
192: PetscCall(VecPlaceArray(right, b + i * bs));
193: PetscCall(VecAYPX(left, -1.0, right));
194: PetscCall(VecResetArray(right));
196: PetscCall(VecPlaceArray(right, x + i * bs));
197: PetscCall(MatSolve(diag[i], left, right));
198: PetscCall(VecResetArray(right));
199: }
200: }
201: }
202: PetscCall(VecRestoreArray(xx, &x));
203: PetscCall(VecRestoreArrayRead(bb, &b));
204: PetscFunctionReturn(PETSC_SUCCESS);
205: }
207: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
208: {
209: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
210: PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
211: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
212: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
213: PetscInt ridx, cidx;
214: PetscBool roworiented = a->roworiented;
215: MatScalar value;
216: Mat *ap, *aa = a->a;
218: PetscFunctionBegin;
219: for (k = 0; k < m; k++) { /* loop over added rows */
220: row = im[k];
221: brow = row / bs;
222: if (row < 0) continue;
223: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
224: rp = aj + ai[brow];
225: ap = aa + ai[brow];
226: rmax = imax[brow];
227: nrow = ailen[brow];
228: low = 0;
229: high = nrow;
230: for (l = 0; l < n; l++) { /* loop over added columns */
231: if (in[l] < 0) continue;
232: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
233: col = in[l];
234: bcol = col / bs;
235: if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
236: ridx = row % bs;
237: cidx = col % bs;
238: if (roworiented) value = v[l + k * n];
239: else value = v[k + l * m];
241: if (col <= lastcol) low = 0;
242: else high = nrow;
243: lastcol = col;
244: while (high - low > 7) {
245: t = (low + high) / 2;
246: if (rp[t] > bcol) high = t;
247: else low = t;
248: }
249: for (i = low; i < high; i++) {
250: if (rp[i] > bcol) break;
251: if (rp[i] == bcol) goto noinsert1;
252: }
253: if (nonew == 1) goto noinsert1;
254: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
255: MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
256: N = nrow++ - 1;
257: high++;
258: /* shift up all the later entries in this row */
259: for (ii = N; ii >= i; ii--) {
260: rp[ii + 1] = rp[ii];
261: ap[ii + 1] = ap[ii];
262: }
263: if (N >= i) ap[i] = NULL;
264: rp[i] = bcol;
265: a->nz++;
266: A->nonzerostate++;
267: noinsert1:;
268: if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
269: PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
270: low = i;
271: }
272: ailen[brow] = nrow;
273: }
274: PetscFunctionReturn(PETSC_SUCCESS);
275: }
277: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
278: {
279: Mat tmpA;
280: PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
281: const PetscInt *cols;
282: const PetscScalar *values;
283: PetscBool flg = PETSC_FALSE, notdone;
284: Mat_SeqAIJ *a;
285: Mat_BlockMat *amat;
287: PetscFunctionBegin;
288: /* force binary viewer to load .info file if it has not yet done so */
289: PetscCall(PetscViewerSetUp(viewer));
290: PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
291: PetscCall(MatSetType(tmpA, MATSEQAIJ));
292: PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
294: PetscCall(MatGetLocalSize(tmpA, &m, &n));
295: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
296: PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
297: PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
298: PetscOptionsEnd();
300: /* Determine number of nonzero blocks for each block row */
301: a = (Mat_SeqAIJ *)tmpA->data;
302: mbs = m / bs;
303: PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
304: PetscCall(PetscArrayzero(lens, mbs));
306: for (i = 0; i < mbs; i++) {
307: for (j = 0; j < bs; j++) {
308: ii[j] = a->j + a->i[i * bs + j];
309: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
310: }
312: currentcol = -1;
313: while (PETSC_TRUE) {
314: notdone = PETSC_FALSE;
315: nextcol = 1000000000;
316: for (j = 0; j < bs; j++) {
317: while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) {
318: ii[j]++;
319: ilens[j]--;
320: }
321: if (ilens[j] > 0) {
322: notdone = PETSC_TRUE;
323: nextcol = PetscMin(nextcol, ii[j][0] / bs);
324: }
325: }
326: if (!notdone) break;
327: if (!flg || (nextcol >= i)) lens[i]++;
328: currentcol = nextcol;
329: }
330: }
332: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
333: PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
334: if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
335: amat = (Mat_BlockMat *)(newmat)->data;
337: /* preallocate the submatrices */
338: PetscCall(PetscMalloc1(bs, &llens));
339: for (i = 0; i < mbs; i++) { /* loops for block rows */
340: for (j = 0; j < bs; j++) {
341: ii[j] = a->j + a->i[i * bs + j];
342: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
343: }
345: currentcol = 1000000000;
346: for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
347: if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
348: }
350: while (PETSC_TRUE) { /* loops over blocks in block row */
351: notdone = PETSC_FALSE;
352: nextcol = 1000000000;
353: PetscCall(PetscArrayzero(llens, bs));
354: for (j = 0; j < bs; j++) { /* loop over rows in block */
355: while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */
356: ii[j]++;
357: ilens[j]--;
358: llens[j]++;
359: }
360: if (ilens[j] > 0) {
361: notdone = PETSC_TRUE;
362: nextcol = PetscMin(nextcol, ii[j][0] / bs);
363: }
364: }
365: PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
366: if (!flg || currentcol >= i) {
367: amat->j[cnt] = currentcol;
368: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
369: }
371: if (!notdone) break;
372: currentcol = nextcol;
373: }
374: amat->ilen[i] = lens[i];
375: }
377: PetscCall(PetscFree3(lens, ii, ilens));
378: PetscCall(PetscFree(llens));
380: /* copy over the matrix, one row at a time */
381: for (i = 0; i < m; i++) {
382: PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
383: PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
384: PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
385: }
386: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
387: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
392: {
393: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
394: const char *name;
395: PetscViewerFormat format;
397: PetscFunctionBegin;
398: PetscCall(PetscObjectGetName((PetscObject)A, &name));
399: PetscCall(PetscViewerGetFormat(viewer, &format));
400: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
401: PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
402: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
403: }
404: PetscFunctionReturn(PETSC_SUCCESS);
405: }
407: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
408: {
409: Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
410: PetscInt i;
412: PetscFunctionBegin;
413: PetscCall(VecDestroy(&bmat->right));
414: PetscCall(VecDestroy(&bmat->left));
415: PetscCall(VecDestroy(&bmat->middle));
416: PetscCall(VecDestroy(&bmat->workb));
417: if (bmat->diags) {
418: for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
419: }
420: if (bmat->a) {
421: for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
422: }
423: PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
424: PetscCall(PetscFree(mat->data));
425: PetscFunctionReturn(PETSC_SUCCESS);
426: }
428: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
429: {
430: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
431: PetscScalar *xx, *yy;
432: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
433: Mat *aa;
435: PetscFunctionBegin;
436: /*
437: Standard CSR multiply except each entry is a Mat
438: */
439: PetscCall(VecGetArray(x, &xx));
441: PetscCall(VecSet(y, 0.0));
442: PetscCall(VecGetArray(y, &yy));
443: aj = bmat->j;
444: aa = bmat->a;
445: ii = bmat->i;
446: for (i = 0; i < m; i++) {
447: jrow = ii[i];
448: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
449: n = ii[i + 1] - jrow;
450: for (j = 0; j < n; j++) {
451: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
452: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
453: PetscCall(VecResetArray(bmat->right));
454: jrow++;
455: }
456: PetscCall(VecResetArray(bmat->left));
457: }
458: PetscCall(VecRestoreArray(x, &xx));
459: PetscCall(VecRestoreArray(y, &yy));
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }
463: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
464: {
465: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
466: PetscScalar *xx, *yy;
467: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
468: Mat *aa;
470: PetscFunctionBegin;
471: /*
472: Standard CSR multiply except each entry is a Mat
473: */
474: PetscCall(VecGetArray(x, &xx));
476: PetscCall(VecSet(y, 0.0));
477: PetscCall(VecGetArray(y, &yy));
478: aj = bmat->j;
479: aa = bmat->a;
480: ii = bmat->i;
481: for (i = 0; i < m; i++) {
482: jrow = ii[i];
483: n = ii[i + 1] - jrow;
484: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
485: PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
486: /* if we ALWAYS required a diagonal entry then could remove this if test */
487: if (aj[jrow] == i) {
488: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
489: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
490: PetscCall(VecResetArray(bmat->right));
491: jrow++;
492: n--;
493: }
494: for (j = 0; j < n; j++) {
495: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
496: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497: PetscCall(VecResetArray(bmat->right));
499: PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
500: PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
501: PetscCall(VecResetArray(bmat->right));
502: jrow++;
503: }
504: PetscCall(VecResetArray(bmat->left));
505: PetscCall(VecResetArray(bmat->middle));
506: }
507: PetscCall(VecRestoreArray(x, &xx));
508: PetscCall(VecRestoreArray(y, &yy));
509: PetscFunctionReturn(PETSC_SUCCESS);
510: }
512: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
513: {
514: PetscFunctionBegin;
515: PetscFunctionReturn(PETSC_SUCCESS);
516: }
518: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
519: {
520: PetscFunctionBegin;
521: PetscFunctionReturn(PETSC_SUCCESS);
522: }
524: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
525: {
526: PetscFunctionBegin;
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: /*
531: Adds diagonal pointers to sparse matrix structure.
532: */
533: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
534: {
535: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
536: PetscInt i, j, mbs = A->rmap->n / A->rmap->bs;
538: PetscFunctionBegin;
539: if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
540: for (i = 0; i < mbs; i++) {
541: a->diag[i] = a->i[i + 1];
542: for (j = a->i[i]; j < a->i[i + 1]; j++) {
543: if (a->j[j] == i) {
544: a->diag[i] = j;
545: break;
546: }
547: }
548: }
549: PetscFunctionReturn(PETSC_SUCCESS);
550: }
552: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
553: {
554: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
555: Mat_SeqAIJ *c;
556: PetscInt i, k, first, step, lensi, nrows, ncols;
557: PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
558: PetscScalar *a_new;
559: Mat C, *aa = a->a;
560: PetscBool stride, equal;
562: PetscFunctionBegin;
563: PetscCall(ISEqual(isrow, iscol, &equal));
564: PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
565: PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
566: PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
567: PetscCall(ISStrideGetInfo(iscol, &first, &step));
568: PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
570: PetscCall(ISGetLocalSize(isrow, &nrows));
571: ncols = nrows;
573: /* create submatrix */
574: if (scall == MAT_REUSE_MATRIX) {
575: PetscInt n_cols, n_rows;
576: C = *B;
577: PetscCall(MatGetSize(C, &n_rows, &n_cols));
578: PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
579: PetscCall(MatZeroEntries(C));
580: } else {
581: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
582: PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
583: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
584: else PetscCall(MatSetType(C, MATSEQAIJ));
585: PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
586: PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
587: }
588: c = (Mat_SeqAIJ *)C->data;
590: /* loop over rows inserting into submatrix */
591: a_new = c->a;
592: j_new = c->j;
593: i_new = c->i;
595: for (i = 0; i < nrows; i++) {
596: lensi = ailen[i];
597: for (k = 0; k < lensi; k++) {
598: *j_new++ = *aj++;
599: PetscCall(MatGetValue(*aa++, first, first, a_new++));
600: }
601: i_new[i + 1] = i_new[i] + lensi;
602: c->ilen[i] = lensi;
603: }
605: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
606: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
607: *B = C;
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
611: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
612: {
613: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
614: PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
615: PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
616: Mat *aa = a->a, *ap;
618: PetscFunctionBegin;
619: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
621: if (m) rmax = ailen[0]; /* determine row with most nonzeros */
622: for (i = 1; i < m; i++) {
623: /* move each row back by the amount of empty slots (fshift) before it*/
624: fshift += imax[i - 1] - ailen[i - 1];
625: rmax = PetscMax(rmax, ailen[i]);
626: if (fshift) {
627: ip = aj + ai[i];
628: ap = aa + ai[i];
629: N = ailen[i];
630: for (j = 0; j < N; j++) {
631: ip[j - fshift] = ip[j];
632: ap[j - fshift] = ap[j];
633: }
634: }
635: ai[i] = ai[i - 1] + ailen[i - 1];
636: }
637: if (m) {
638: fshift += imax[m - 1] - ailen[m - 1];
639: ai[m] = ai[m - 1] + ailen[m - 1];
640: }
641: /* reset ilen and imax for each row */
642: for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
643: a->nz = ai[m];
644: for (i = 0; i < a->nz; i++) {
645: PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
646: PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
647: PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
648: }
649: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
650: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
651: PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
653: A->info.mallocs += a->reallocs;
654: a->reallocs = 0;
655: A->info.nz_unneeded = (double)fshift;
656: a->rmax = rmax;
657: PetscCall(MatMarkDiagonal_BlockMat(A));
658: PetscFunctionReturn(PETSC_SUCCESS);
659: }
661: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
662: {
663: PetscFunctionBegin;
664: if (opt == MAT_SYMMETRIC && flg) {
665: A->ops->sor = MatSOR_BlockMat_Symmetric;
666: A->ops->mult = MatMult_BlockMat_Symmetric;
667: } else {
668: PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
669: }
670: PetscFunctionReturn(PETSC_SUCCESS);
671: }
673: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
674: NULL,
675: NULL,
676: MatMult_BlockMat,
677: /* 4*/ MatMultAdd_BlockMat,
678: MatMultTranspose_BlockMat,
679: MatMultTransposeAdd_BlockMat,
680: NULL,
681: NULL,
682: NULL,
683: /* 10*/ NULL,
684: NULL,
685: NULL,
686: MatSOR_BlockMat,
687: NULL,
688: /* 15*/ NULL,
689: NULL,
690: NULL,
691: NULL,
692: NULL,
693: /* 20*/ NULL,
694: MatAssemblyEnd_BlockMat,
695: MatSetOption_BlockMat,
696: NULL,
697: /* 24*/ NULL,
698: NULL,
699: NULL,
700: NULL,
701: NULL,
702: /* 29*/ NULL,
703: NULL,
704: NULL,
705: NULL,
706: NULL,
707: /* 34*/ NULL,
708: NULL,
709: NULL,
710: NULL,
711: NULL,
712: /* 39*/ NULL,
713: NULL,
714: NULL,
715: NULL,
716: NULL,
717: /* 44*/ NULL,
718: NULL,
719: MatShift_Basic,
720: NULL,
721: NULL,
722: /* 49*/ NULL,
723: NULL,
724: NULL,
725: NULL,
726: NULL,
727: /* 54*/ NULL,
728: NULL,
729: NULL,
730: NULL,
731: NULL,
732: /* 59*/ MatCreateSubMatrix_BlockMat,
733: MatDestroy_BlockMat,
734: MatView_BlockMat,
735: NULL,
736: NULL,
737: /* 64*/ NULL,
738: NULL,
739: NULL,
740: NULL,
741: NULL,
742: /* 69*/ NULL,
743: NULL,
744: NULL,
745: NULL,
746: NULL,
747: /* 74*/ NULL,
748: NULL,
749: NULL,
750: NULL,
751: NULL,
752: /* 79*/ NULL,
753: NULL,
754: NULL,
755: NULL,
756: MatLoad_BlockMat,
757: /* 84*/ NULL,
758: NULL,
759: NULL,
760: NULL,
761: NULL,
762: /* 89*/ NULL,
763: NULL,
764: NULL,
765: NULL,
766: NULL,
767: /* 94*/ NULL,
768: NULL,
769: NULL,
770: NULL,
771: NULL,
772: /* 99*/ NULL,
773: NULL,
774: NULL,
775: NULL,
776: NULL,
777: /*104*/ NULL,
778: NULL,
779: NULL,
780: NULL,
781: NULL,
782: /*109*/ NULL,
783: NULL,
784: NULL,
785: NULL,
786: NULL,
787: /*114*/ NULL,
788: NULL,
789: NULL,
790: NULL,
791: NULL,
792: /*119*/ NULL,
793: NULL,
794: NULL,
795: NULL,
796: NULL,
797: /*124*/ NULL,
798: NULL,
799: NULL,
800: NULL,
801: NULL,
802: /*129*/ NULL,
803: NULL,
804: NULL,
805: NULL,
806: NULL,
807: /*134*/ NULL,
808: NULL,
809: NULL,
810: NULL,
811: NULL,
812: /*139*/ NULL,
813: NULL,
814: NULL,
815: NULL,
816: NULL,
817: /*144*/ NULL,
818: NULL,
819: NULL,
820: NULL,
821: NULL,
822: NULL,
823: /*150*/ NULL,
824: NULL};
826: /*@C
827: MatBlockMatSetPreallocation - For good matrix assembly performance
828: the user should preallocate the matrix storage by setting the parameter nz
829: (or the array nnz). By setting these parameters accurately, performance
830: during matrix assembly can be increased by more than a factor of 50.
832: Collective
834: Input Parameters:
835: + B - The matrix
836: . bs - size of each block in matrix
837: . nz - number of nonzeros per block row (same for all rows)
838: - nnz - array containing the number of nonzeros in the various block rows
839: (possibly different for each row) or `NULL`
841: Level: intermediate
843: Notes:
844: If `nnz` is given then `nz` is ignored
846: Specify the preallocated storage with either `nz` or `nnz` (not both).
847: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
848: allocation.
850: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
851: @*/
852: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
853: {
854: PetscFunctionBegin;
855: PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
856: PetscFunctionReturn(PETSC_SUCCESS);
857: }
859: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
860: {
861: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
862: PetscInt i;
864: PetscFunctionBegin;
865: PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
866: PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
867: PetscCall(PetscLayoutSetUp(A->rmap));
868: PetscCall(PetscLayoutSetUp(A->cmap));
869: PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
871: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
872: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
873: if (nnz) {
874: for (i = 0; i < A->rmap->n / bs; i++) {
875: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
876: PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
877: }
878: }
879: bmat->mbs = A->rmap->n / bs;
881: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
882: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
883: PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
885: if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
886: if (PetscLikely(nnz)) {
887: nz = 0;
888: for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
889: bmat->imax[i] = nnz[i];
890: nz += nnz[i];
891: }
892: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
894: /* bmat->ilen will count nonzeros in each row so far. */
895: PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
897: /* allocate the matrix space */
898: PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
899: PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
900: bmat->i[0] = 0;
901: for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
902: bmat->singlemalloc = PETSC_TRUE;
903: bmat->free_a = PETSC_TRUE;
904: bmat->free_ij = PETSC_TRUE;
906: bmat->nz = 0;
907: bmat->maxnz = nz;
908: A->info.nz_unneeded = (double)bmat->maxnz;
909: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
910: PetscFunctionReturn(PETSC_SUCCESS);
911: }
913: /*MC
914: MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
915: consisting of (usually) sparse blocks.
917: Level: advanced
919: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
920: M*/
922: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
923: {
924: Mat_BlockMat *b;
926: PetscFunctionBegin;
927: PetscCall(PetscNew(&b));
928: A->data = (void *)b;
929: PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
931: A->assembled = PETSC_TRUE;
932: A->preallocated = PETSC_FALSE;
933: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
935: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
936: PetscFunctionReturn(PETSC_SUCCESS);
937: }
939: /*@C
940: MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
942: Collective
944: Input Parameters:
945: + comm - MPI communicator
946: . m - number of rows
947: . n - number of columns
948: . bs - size of each submatrix
949: . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
950: - nnz - expected number of nonzers per block row if known (use `NULL` otherwise)
952: Output Parameter:
953: . A - the matrix
955: Level: intermediate
957: Notes:
958: Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must
959: have the same size and be sequential. The local and global sizes must be compatible with this decomposition.
961: For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
963: Developer Note:
964: I don't like the name, it is not `MATNESTMAT`
966: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
967: @*/
968: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
969: {
970: PetscFunctionBegin;
971: PetscCall(MatCreate(comm, A));
972: PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
973: PetscCall(MatSetType(*A, MATBLOCKMAT));
974: PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
975: PetscFunctionReturn(PETSC_SUCCESS);
976: }