Actual source code: blockmat.c


  2: /*
  3:    This provides a matrix that consists of Mats
  4: */

  6: #include <petsc/private/matimpl.h>
  7: #include <../src/mat/impls/baij/seq/baij.h>

  9: typedef struct {
 10:   SEQAIJHEADER(Mat);
 11:   SEQBAIJHEADER;
 12:   Mat *diags;

 14:   Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
 15: } Mat_BlockMat;

 17: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
 18: {
 19:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
 20:   PetscScalar       *x;
 21:   const Mat         *v;
 22:   const PetscScalar *b;
 23:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
 24:   const PetscInt    *idx;
 25:   IS                 row, col;
 26:   MatFactorInfo      info;
 27:   Vec                left = a->left, right = a->right, middle = a->middle;
 28:   Mat               *diag;

 30:   PetscFunctionBegin;
 31:   its = its * lits;
 32:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
 33:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
 34:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
 35:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
 36:   PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");

 38:   if (!a->diags) {
 39:     PetscCall(PetscMalloc1(mbs, &a->diags));
 40:     PetscCall(MatFactorInfoInitialize(&info));
 41:     for (i = 0; i < mbs; i++) {
 42:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
 43:       PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
 44:       PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
 45:       PetscCall(ISDestroy(&row));
 46:       PetscCall(ISDestroy(&col));
 47:     }
 48:     PetscCall(VecDuplicate(bb, &a->workb));
 49:   }
 50:   diag = a->diags;

 52:   PetscCall(VecSet(xx, 0.0));
 53:   PetscCall(VecGetArray(xx, &x));
 54:   /* copy right hand side because it must be modified during iteration */
 55:   PetscCall(VecCopy(bb, a->workb));
 56:   PetscCall(VecGetArrayRead(a->workb, &b));

 58:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
 59:   while (its--) {
 60:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
 61:       for (i = 0; i < mbs; i++) {
 62:         n   = a->i[i + 1] - a->i[i] - 1;
 63:         idx = a->j + a->i[i] + 1;
 64:         v   = a->a + a->i[i] + 1;

 66:         PetscCall(VecSet(left, 0.0));
 67:         for (j = 0; j < n; j++) {
 68:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 69:           PetscCall(MatMultAdd(v[j], right, left, left));
 70:           PetscCall(VecResetArray(right));
 71:         }
 72:         PetscCall(VecPlaceArray(right, b + i * bs));
 73:         PetscCall(VecAYPX(left, -1.0, right));
 74:         PetscCall(VecResetArray(right));

 76:         PetscCall(VecPlaceArray(right, x + i * bs));
 77:         PetscCall(MatSolve(diag[i], left, right));

 79:         /* now adjust right hand side, see MatSOR_SeqSBAIJ */
 80:         for (j = 0; j < n; j++) {
 81:           PetscCall(MatMultTranspose(v[j], right, left));
 82:           PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
 83:           PetscCall(VecAXPY(middle, -1.0, left));
 84:           PetscCall(VecResetArray(middle));
 85:         }
 86:         PetscCall(VecResetArray(right));
 87:       }
 88:     }
 89:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
 90:       for (i = mbs - 1; i >= 0; i--) {
 91:         n   = a->i[i + 1] - a->i[i] - 1;
 92:         idx = a->j + a->i[i] + 1;
 93:         v   = a->a + a->i[i] + 1;

 95:         PetscCall(VecSet(left, 0.0));
 96:         for (j = 0; j < n; j++) {
 97:           PetscCall(VecPlaceArray(right, x + idx[j] * bs));
 98:           PetscCall(MatMultAdd(v[j], right, left, left));
 99:           PetscCall(VecResetArray(right));
100:         }
101:         PetscCall(VecPlaceArray(right, b + i * bs));
102:         PetscCall(VecAYPX(left, -1.0, right));
103:         PetscCall(VecResetArray(right));

105:         PetscCall(VecPlaceArray(right, x + i * bs));
106:         PetscCall(MatSolve(diag[i], left, right));
107:         PetscCall(VecResetArray(right));
108:       }
109:     }
110:   }
111:   PetscCall(VecRestoreArray(xx, &x));
112:   PetscCall(VecRestoreArrayRead(a->workb, &b));
113:   PetscFunctionReturn(PETSC_SUCCESS);
114: }

116: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
117: {
118:   Mat_BlockMat      *a = (Mat_BlockMat *)A->data;
119:   PetscScalar       *x;
120:   const Mat         *v;
121:   const PetscScalar *b;
122:   PetscInt           n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
123:   const PetscInt    *idx;
124:   IS                 row, col;
125:   MatFactorInfo      info;
126:   Vec                left = a->left, right = a->right;
127:   Mat               *diag;

129:   PetscFunctionBegin;
130:   its = its * lits;
131:   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
132:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
133:   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
134:   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");

136:   if (!a->diags) {
137:     PetscCall(PetscMalloc1(mbs, &a->diags));
138:     PetscCall(MatFactorInfoInitialize(&info));
139:     for (i = 0; i < mbs; i++) {
140:       PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
141:       PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
142:       PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
143:       PetscCall(ISDestroy(&row));
144:       PetscCall(ISDestroy(&col));
145:     }
146:   }
147:   diag = a->diags;

149:   PetscCall(VecSet(xx, 0.0));
150:   PetscCall(VecGetArray(xx, &x));
151:   PetscCall(VecGetArrayRead(bb, &b));

153:   /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
154:   while (its--) {
155:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
156:       for (i = 0; i < mbs; i++) {
157:         n   = a->i[i + 1] - a->i[i];
158:         idx = a->j + a->i[i];
159:         v   = a->a + a->i[i];

161:         PetscCall(VecSet(left, 0.0));
162:         for (j = 0; j < n; j++) {
163:           if (idx[j] != i) {
164:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
165:             PetscCall(MatMultAdd(v[j], right, left, left));
166:             PetscCall(VecResetArray(right));
167:           }
168:         }
169:         PetscCall(VecPlaceArray(right, b + i * bs));
170:         PetscCall(VecAYPX(left, -1.0, right));
171:         PetscCall(VecResetArray(right));

173:         PetscCall(VecPlaceArray(right, x + i * bs));
174:         PetscCall(MatSolve(diag[i], left, right));
175:         PetscCall(VecResetArray(right));
176:       }
177:     }
178:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
179:       for (i = mbs - 1; i >= 0; i--) {
180:         n   = a->i[i + 1] - a->i[i];
181:         idx = a->j + a->i[i];
182:         v   = a->a + a->i[i];

184:         PetscCall(VecSet(left, 0.0));
185:         for (j = 0; j < n; j++) {
186:           if (idx[j] != i) {
187:             PetscCall(VecPlaceArray(right, x + idx[j] * bs));
188:             PetscCall(MatMultAdd(v[j], right, left, left));
189:             PetscCall(VecResetArray(right));
190:           }
191:         }
192:         PetscCall(VecPlaceArray(right, b + i * bs));
193:         PetscCall(VecAYPX(left, -1.0, right));
194:         PetscCall(VecResetArray(right));

196:         PetscCall(VecPlaceArray(right, x + i * bs));
197:         PetscCall(MatSolve(diag[i], left, right));
198:         PetscCall(VecResetArray(right));
199:       }
200:     }
201:   }
202:   PetscCall(VecRestoreArray(xx, &x));
203:   PetscCall(VecRestoreArrayRead(bb, &b));
204:   PetscFunctionReturn(PETSC_SUCCESS);
205: }

207: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
208: {
209:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
210:   PetscInt     *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
211:   PetscInt     *imax = a->imax, *ai = a->i, *ailen = a->ilen;
212:   PetscInt     *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
213:   PetscInt      ridx, cidx;
214:   PetscBool     roworiented = a->roworiented;
215:   MatScalar     value;
216:   Mat          *ap, *aa = a->a;

218:   PetscFunctionBegin;
219:   for (k = 0; k < m; k++) { /* loop over added rows */
220:     row  = im[k];
221:     brow = row / bs;
222:     if (row < 0) continue;
223:     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
224:     rp   = aj + ai[brow];
225:     ap   = aa + ai[brow];
226:     rmax = imax[brow];
227:     nrow = ailen[brow];
228:     low  = 0;
229:     high = nrow;
230:     for (l = 0; l < n; l++) { /* loop over added columns */
231:       if (in[l] < 0) continue;
232:       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
233:       col  = in[l];
234:       bcol = col / bs;
235:       if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
236:       ridx = row % bs;
237:       cidx = col % bs;
238:       if (roworiented) value = v[l + k * n];
239:       else value = v[k + l * m];

241:       if (col <= lastcol) low = 0;
242:       else high = nrow;
243:       lastcol = col;
244:       while (high - low > 7) {
245:         t = (low + high) / 2;
246:         if (rp[t] > bcol) high = t;
247:         else low = t;
248:       }
249:       for (i = low; i < high; i++) {
250:         if (rp[i] > bcol) break;
251:         if (rp[i] == bcol) goto noinsert1;
252:       }
253:       if (nonew == 1) goto noinsert1;
254:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
255:       MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
256:       N = nrow++ - 1;
257:       high++;
258:       /* shift up all the later entries in this row */
259:       for (ii = N; ii >= i; ii--) {
260:         rp[ii + 1] = rp[ii];
261:         ap[ii + 1] = ap[ii];
262:       }
263:       if (N >= i) ap[i] = NULL;
264:       rp[i] = bcol;
265:       a->nz++;
266:       A->nonzerostate++;
267:     noinsert1:;
268:       if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
269:       PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
270:       low = i;
271:     }
272:     ailen[brow] = nrow;
273:   }
274:   PetscFunctionReturn(PETSC_SUCCESS);
275: }

277: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
278: {
279:   Mat                tmpA;
280:   PetscInt           i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
281:   const PetscInt    *cols;
282:   const PetscScalar *values;
283:   PetscBool          flg = PETSC_FALSE, notdone;
284:   Mat_SeqAIJ        *a;
285:   Mat_BlockMat      *amat;

287:   PetscFunctionBegin;
288:   /* force binary viewer to load .info file if it has not yet done so */
289:   PetscCall(PetscViewerSetUp(viewer));
290:   PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
291:   PetscCall(MatSetType(tmpA, MATSEQAIJ));
292:   PetscCall(MatLoad_SeqAIJ(tmpA, viewer));

294:   PetscCall(MatGetLocalSize(tmpA, &m, &n));
295:   PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
296:   PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
297:   PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
298:   PetscOptionsEnd();

300:   /* Determine number of nonzero blocks for each block row */
301:   a   = (Mat_SeqAIJ *)tmpA->data;
302:   mbs = m / bs;
303:   PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
304:   PetscCall(PetscArrayzero(lens, mbs));

306:   for (i = 0; i < mbs; i++) {
307:     for (j = 0; j < bs; j++) {
308:       ii[j]    = a->j + a->i[i * bs + j];
309:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
310:     }

312:     currentcol = -1;
313:     while (PETSC_TRUE) {
314:       notdone = PETSC_FALSE;
315:       nextcol = 1000000000;
316:       for (j = 0; j < bs; j++) {
317:         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) {
318:           ii[j]++;
319:           ilens[j]--;
320:         }
321:         if (ilens[j] > 0) {
322:           notdone = PETSC_TRUE;
323:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
324:         }
325:       }
326:       if (!notdone) break;
327:       if (!flg || (nextcol >= i)) lens[i]++;
328:       currentcol = nextcol;
329:     }
330:   }

332:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
333:   PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
334:   if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
335:   amat = (Mat_BlockMat *)(newmat)->data;

337:   /* preallocate the submatrices */
338:   PetscCall(PetscMalloc1(bs, &llens));
339:   for (i = 0; i < mbs; i++) { /* loops for block rows */
340:     for (j = 0; j < bs; j++) {
341:       ii[j]    = a->j + a->i[i * bs + j];
342:       ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
343:     }

345:     currentcol = 1000000000;
346:     for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
347:       if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
348:     }

350:     while (PETSC_TRUE) { /* loops over blocks in block row */
351:       notdone = PETSC_FALSE;
352:       nextcol = 1000000000;
353:       PetscCall(PetscArrayzero(llens, bs));
354:       for (j = 0; j < bs; j++) {                                /* loop over rows in block */
355:         while ((ilens[j] > 0 && ii[j][0] / bs <= currentcol)) { /* loop over columns in row */
356:           ii[j]++;
357:           ilens[j]--;
358:           llens[j]++;
359:         }
360:         if (ilens[j] > 0) {
361:           notdone = PETSC_TRUE;
362:           nextcol = PetscMin(nextcol, ii[j][0] / bs);
363:         }
364:       }
365:       PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
366:       if (!flg || currentcol >= i) {
367:         amat->j[cnt] = currentcol;
368:         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
369:       }

371:       if (!notdone) break;
372:       currentcol = nextcol;
373:     }
374:     amat->ilen[i] = lens[i];
375:   }

377:   PetscCall(PetscFree3(lens, ii, ilens));
378:   PetscCall(PetscFree(llens));

380:   /* copy over the matrix, one row at a time */
381:   for (i = 0; i < m; i++) {
382:     PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
383:     PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
384:     PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
385:   }
386:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
387:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
392: {
393:   Mat_BlockMat     *a = (Mat_BlockMat *)A->data;
394:   const char       *name;
395:   PetscViewerFormat format;

397:   PetscFunctionBegin;
398:   PetscCall(PetscObjectGetName((PetscObject)A, &name));
399:   PetscCall(PetscViewerGetFormat(viewer, &format));
400:   if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
401:     PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
402:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
403:   }
404:   PetscFunctionReturn(PETSC_SUCCESS);
405: }

407: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
408: {
409:   Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
410:   PetscInt      i;

412:   PetscFunctionBegin;
413:   PetscCall(VecDestroy(&bmat->right));
414:   PetscCall(VecDestroy(&bmat->left));
415:   PetscCall(VecDestroy(&bmat->middle));
416:   PetscCall(VecDestroy(&bmat->workb));
417:   if (bmat->diags) {
418:     for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
419:   }
420:   if (bmat->a) {
421:     for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
422:   }
423:   PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
424:   PetscCall(PetscFree(mat->data));
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }

428: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
429: {
430:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
431:   PetscScalar  *xx, *yy;
432:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
433:   Mat          *aa;

435:   PetscFunctionBegin;
436:   /*
437:      Standard CSR multiply except each entry is a Mat
438:   */
439:   PetscCall(VecGetArray(x, &xx));

441:   PetscCall(VecSet(y, 0.0));
442:   PetscCall(VecGetArray(y, &yy));
443:   aj = bmat->j;
444:   aa = bmat->a;
445:   ii = bmat->i;
446:   for (i = 0; i < m; i++) {
447:     jrow = ii[i];
448:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
449:     n = ii[i + 1] - jrow;
450:     for (j = 0; j < n; j++) {
451:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
452:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
453:       PetscCall(VecResetArray(bmat->right));
454:       jrow++;
455:     }
456:     PetscCall(VecResetArray(bmat->left));
457:   }
458:   PetscCall(VecRestoreArray(x, &xx));
459:   PetscCall(VecRestoreArray(y, &yy));
460:   PetscFunctionReturn(PETSC_SUCCESS);
461: }

463: PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
464: {
465:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
466:   PetscScalar  *xx, *yy;
467:   PetscInt     *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
468:   Mat          *aa;

470:   PetscFunctionBegin;
471:   /*
472:      Standard CSR multiply except each entry is a Mat
473:   */
474:   PetscCall(VecGetArray(x, &xx));

476:   PetscCall(VecSet(y, 0.0));
477:   PetscCall(VecGetArray(y, &yy));
478:   aj = bmat->j;
479:   aa = bmat->a;
480:   ii = bmat->i;
481:   for (i = 0; i < m; i++) {
482:     jrow = ii[i];
483:     n    = ii[i + 1] - jrow;
484:     PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
485:     PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
486:     /* if we ALWAYS required a diagonal entry then could remove this if test */
487:     if (aj[jrow] == i) {
488:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
489:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
490:       PetscCall(VecResetArray(bmat->right));
491:       jrow++;
492:       n--;
493:     }
494:     for (j = 0; j < n; j++) {
495:       PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
496:       PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
497:       PetscCall(VecResetArray(bmat->right));

499:       PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
500:       PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
501:       PetscCall(VecResetArray(bmat->right));
502:       jrow++;
503:     }
504:     PetscCall(VecResetArray(bmat->left));
505:     PetscCall(VecResetArray(bmat->middle));
506:   }
507:   PetscCall(VecRestoreArray(x, &xx));
508:   PetscCall(VecRestoreArray(y, &yy));
509:   PetscFunctionReturn(PETSC_SUCCESS);
510: }

512: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
513: {
514:   PetscFunctionBegin;
515:   PetscFunctionReturn(PETSC_SUCCESS);
516: }

518: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
519: {
520:   PetscFunctionBegin;
521:   PetscFunctionReturn(PETSC_SUCCESS);
522: }

524: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
525: {
526:   PetscFunctionBegin;
527:   PetscFunctionReturn(PETSC_SUCCESS);
528: }

530: /*
531:      Adds diagonal pointers to sparse matrix structure.
532: */
533: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
534: {
535:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
536:   PetscInt      i, j, mbs = A->rmap->n / A->rmap->bs;

538:   PetscFunctionBegin;
539:   if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
540:   for (i = 0; i < mbs; i++) {
541:     a->diag[i] = a->i[i + 1];
542:     for (j = a->i[i]; j < a->i[i + 1]; j++) {
543:       if (a->j[j] == i) {
544:         a->diag[i] = j;
545:         break;
546:       }
547:     }
548:   }
549:   PetscFunctionReturn(PETSC_SUCCESS);
550: }

552: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
553: {
554:   Mat_BlockMat *a = (Mat_BlockMat *)A->data;
555:   Mat_SeqAIJ   *c;
556:   PetscInt      i, k, first, step, lensi, nrows, ncols;
557:   PetscInt     *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
558:   PetscScalar  *a_new;
559:   Mat           C, *aa = a->a;
560:   PetscBool     stride, equal;

562:   PetscFunctionBegin;
563:   PetscCall(ISEqual(isrow, iscol, &equal));
564:   PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
565:   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
566:   PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
567:   PetscCall(ISStrideGetInfo(iscol, &first, &step));
568:   PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");

570:   PetscCall(ISGetLocalSize(isrow, &nrows));
571:   ncols = nrows;

573:   /* create submatrix */
574:   if (scall == MAT_REUSE_MATRIX) {
575:     PetscInt n_cols, n_rows;
576:     C = *B;
577:     PetscCall(MatGetSize(C, &n_rows, &n_cols));
578:     PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
579:     PetscCall(MatZeroEntries(C));
580:   } else {
581:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
582:     PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
583:     if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
584:     else PetscCall(MatSetType(C, MATSEQAIJ));
585:     PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
586:     PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
587:   }
588:   c = (Mat_SeqAIJ *)C->data;

590:   /* loop over rows inserting into submatrix */
591:   a_new = c->a;
592:   j_new = c->j;
593:   i_new = c->i;

595:   for (i = 0; i < nrows; i++) {
596:     lensi = ailen[i];
597:     for (k = 0; k < lensi; k++) {
598:       *j_new++ = *aj++;
599:       PetscCall(MatGetValue(*aa++, first, first, a_new++));
600:     }
601:     i_new[i + 1] = i_new[i] + lensi;
602:     c->ilen[i]   = lensi;
603:   }

605:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
606:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
607:   *B = C;
608:   PetscFunctionReturn(PETSC_SUCCESS);
609: }

611: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
612: {
613:   Mat_BlockMat *a      = (Mat_BlockMat *)A->data;
614:   PetscInt      fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
615:   PetscInt      m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
616:   Mat          *aa = a->a, *ap;

618:   PetscFunctionBegin;
619:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

621:   if (m) rmax = ailen[0]; /* determine row with most nonzeros */
622:   for (i = 1; i < m; i++) {
623:     /* move each row back by the amount of empty slots (fshift) before it*/
624:     fshift += imax[i - 1] - ailen[i - 1];
625:     rmax = PetscMax(rmax, ailen[i]);
626:     if (fshift) {
627:       ip = aj + ai[i];
628:       ap = aa + ai[i];
629:       N  = ailen[i];
630:       for (j = 0; j < N; j++) {
631:         ip[j - fshift] = ip[j];
632:         ap[j - fshift] = ap[j];
633:       }
634:     }
635:     ai[i] = ai[i - 1] + ailen[i - 1];
636:   }
637:   if (m) {
638:     fshift += imax[m - 1] - ailen[m - 1];
639:     ai[m] = ai[m - 1] + ailen[m - 1];
640:   }
641:   /* reset ilen and imax for each row */
642:   for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
643:   a->nz = ai[m];
644:   for (i = 0; i < a->nz; i++) {
645:     PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
646:     PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
647:     PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
648:   }
649:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
650:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
651:   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));

653:   A->info.mallocs += a->reallocs;
654:   a->reallocs         = 0;
655:   A->info.nz_unneeded = (double)fshift;
656:   a->rmax             = rmax;
657:   PetscCall(MatMarkDiagonal_BlockMat(A));
658:   PetscFunctionReturn(PETSC_SUCCESS);
659: }

661: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
662: {
663:   PetscFunctionBegin;
664:   if (opt == MAT_SYMMETRIC && flg) {
665:     A->ops->sor  = MatSOR_BlockMat_Symmetric;
666:     A->ops->mult = MatMult_BlockMat_Symmetric;
667:   } else {
668:     PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
669:   }
670:   PetscFunctionReturn(PETSC_SUCCESS);
671: }

673: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
674:                                        NULL,
675:                                        NULL,
676:                                        MatMult_BlockMat,
677:                                        /*  4*/ MatMultAdd_BlockMat,
678:                                        MatMultTranspose_BlockMat,
679:                                        MatMultTransposeAdd_BlockMat,
680:                                        NULL,
681:                                        NULL,
682:                                        NULL,
683:                                        /* 10*/ NULL,
684:                                        NULL,
685:                                        NULL,
686:                                        MatSOR_BlockMat,
687:                                        NULL,
688:                                        /* 15*/ NULL,
689:                                        NULL,
690:                                        NULL,
691:                                        NULL,
692:                                        NULL,
693:                                        /* 20*/ NULL,
694:                                        MatAssemblyEnd_BlockMat,
695:                                        MatSetOption_BlockMat,
696:                                        NULL,
697:                                        /* 24*/ NULL,
698:                                        NULL,
699:                                        NULL,
700:                                        NULL,
701:                                        NULL,
702:                                        /* 29*/ NULL,
703:                                        NULL,
704:                                        NULL,
705:                                        NULL,
706:                                        NULL,
707:                                        /* 34*/ NULL,
708:                                        NULL,
709:                                        NULL,
710:                                        NULL,
711:                                        NULL,
712:                                        /* 39*/ NULL,
713:                                        NULL,
714:                                        NULL,
715:                                        NULL,
716:                                        NULL,
717:                                        /* 44*/ NULL,
718:                                        NULL,
719:                                        MatShift_Basic,
720:                                        NULL,
721:                                        NULL,
722:                                        /* 49*/ NULL,
723:                                        NULL,
724:                                        NULL,
725:                                        NULL,
726:                                        NULL,
727:                                        /* 54*/ NULL,
728:                                        NULL,
729:                                        NULL,
730:                                        NULL,
731:                                        NULL,
732:                                        /* 59*/ MatCreateSubMatrix_BlockMat,
733:                                        MatDestroy_BlockMat,
734:                                        MatView_BlockMat,
735:                                        NULL,
736:                                        NULL,
737:                                        /* 64*/ NULL,
738:                                        NULL,
739:                                        NULL,
740:                                        NULL,
741:                                        NULL,
742:                                        /* 69*/ NULL,
743:                                        NULL,
744:                                        NULL,
745:                                        NULL,
746:                                        NULL,
747:                                        /* 74*/ NULL,
748:                                        NULL,
749:                                        NULL,
750:                                        NULL,
751:                                        NULL,
752:                                        /* 79*/ NULL,
753:                                        NULL,
754:                                        NULL,
755:                                        NULL,
756:                                        MatLoad_BlockMat,
757:                                        /* 84*/ NULL,
758:                                        NULL,
759:                                        NULL,
760:                                        NULL,
761:                                        NULL,
762:                                        /* 89*/ NULL,
763:                                        NULL,
764:                                        NULL,
765:                                        NULL,
766:                                        NULL,
767:                                        /* 94*/ NULL,
768:                                        NULL,
769:                                        NULL,
770:                                        NULL,
771:                                        NULL,
772:                                        /* 99*/ NULL,
773:                                        NULL,
774:                                        NULL,
775:                                        NULL,
776:                                        NULL,
777:                                        /*104*/ NULL,
778:                                        NULL,
779:                                        NULL,
780:                                        NULL,
781:                                        NULL,
782:                                        /*109*/ NULL,
783:                                        NULL,
784:                                        NULL,
785:                                        NULL,
786:                                        NULL,
787:                                        /*114*/ NULL,
788:                                        NULL,
789:                                        NULL,
790:                                        NULL,
791:                                        NULL,
792:                                        /*119*/ NULL,
793:                                        NULL,
794:                                        NULL,
795:                                        NULL,
796:                                        NULL,
797:                                        /*124*/ NULL,
798:                                        NULL,
799:                                        NULL,
800:                                        NULL,
801:                                        NULL,
802:                                        /*129*/ NULL,
803:                                        NULL,
804:                                        NULL,
805:                                        NULL,
806:                                        NULL,
807:                                        /*134*/ NULL,
808:                                        NULL,
809:                                        NULL,
810:                                        NULL,
811:                                        NULL,
812:                                        /*139*/ NULL,
813:                                        NULL,
814:                                        NULL,
815:                                        NULL,
816:                                        NULL,
817:                                        /*144*/ NULL,
818:                                        NULL,
819:                                        NULL,
820:                                        NULL,
821:                                        NULL,
822:                                        NULL,
823:                                        /*150*/ NULL,
824:                                        NULL};

826: /*@C
827:    MatBlockMatSetPreallocation - For good matrix assembly performance
828:    the user should preallocate the matrix storage by setting the parameter nz
829:    (or the array nnz).  By setting these parameters accurately, performance
830:    during matrix assembly can be increased by more than a factor of 50.

832:    Collective

834:    Input Parameters:
835: +  B - The matrix
836: .  bs - size of each block in matrix
837: .  nz - number of nonzeros per block row (same for all rows)
838: -  nnz - array containing the number of nonzeros in the various block rows
839:          (possibly different for each row) or `NULL`

841:    Level: intermediate

843:    Notes:
844:      If `nnz` is given then `nz` is ignored

846:    Specify the preallocated storage with either `nz` or `nnz` (not both).
847:    Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
848:    allocation.

850: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
851: @*/
852: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
853: {
854:   PetscFunctionBegin;
855:   PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
856:   PetscFunctionReturn(PETSC_SUCCESS);
857: }

859: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
860: {
861:   Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
862:   PetscInt      i;

864:   PetscFunctionBegin;
865:   PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
866:   PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
867:   PetscCall(PetscLayoutSetUp(A->rmap));
868:   PetscCall(PetscLayoutSetUp(A->cmap));
869:   PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));

871:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
872:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
873:   if (nnz) {
874:     for (i = 0; i < A->rmap->n / bs; i++) {
875:       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
876:       PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
877:     }
878:   }
879:   bmat->mbs = A->rmap->n / bs;

881:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
882:   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
883:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));

885:   if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
886:   if (PetscLikely(nnz)) {
887:     nz = 0;
888:     for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
889:       bmat->imax[i] = nnz[i];
890:       nz += nnz[i];
891:     }
892:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");

894:   /* bmat->ilen will count nonzeros in each row so far. */
895:   PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));

897:   /* allocate the matrix space */
898:   PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
899:   PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
900:   bmat->i[0] = 0;
901:   for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
902:   bmat->singlemalloc = PETSC_TRUE;
903:   bmat->free_a       = PETSC_TRUE;
904:   bmat->free_ij      = PETSC_TRUE;

906:   bmat->nz            = 0;
907:   bmat->maxnz         = nz;
908:   A->info.nz_unneeded = (double)bmat->maxnz;
909:   PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
910:   PetscFunctionReturn(PETSC_SUCCESS);
911: }

913: /*MC
914:    MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
915:                  consisting of (usually) sparse blocks.

917:   Level: advanced

919: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
920: M*/

922: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
923: {
924:   Mat_BlockMat *b;

926:   PetscFunctionBegin;
927:   PetscCall(PetscNew(&b));
928:   A->data = (void *)b;
929:   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));

931:   A->assembled    = PETSC_TRUE;
932:   A->preallocated = PETSC_FALSE;
933:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));

935:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
936:   PetscFunctionReturn(PETSC_SUCCESS);
937: }

939: /*@C
940:    MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object

942:   Collective

944:    Input Parameters:
945: +  comm - MPI communicator
946: .  m - number of rows
947: .  n  - number of columns
948: .  bs - size of each submatrix
949: .  nz  - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
950: -  nnz - expected number of nonzers per block row if known (use `NULL` otherwise)

952:    Output Parameter:
953: .  A - the matrix

955:    Level: intermediate

957:    Notes:
958:     Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object.  Each `Mat` must
959:    have the same size and be sequential.  The local and global sizes must be compatible with this decomposition.

961:    For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.

963:    Developer Note:
964:    I don't like the name, it is not `MATNESTMAT`

966: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
967: @*/
968: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
969: {
970:   PetscFunctionBegin;
971:   PetscCall(MatCreate(comm, A));
972:   PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
973:   PetscCall(MatSetType(*A, MATBLOCKMAT));
974:   PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
975:   PetscFunctionReturn(PETSC_SUCCESS);
976: }