Actual source code: bjacobi.c


  2: /*
  3:    Defines a block Jacobi preconditioner.
  4: */

  6: #include <../src/ksp/pc/impls/bjacobi/bjacobi.h>

  8: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
  9: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
 10: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);

 12: static PetscErrorCode PCSetUp_BJacobi(PC pc)
 13: {
 14:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
 15:   Mat         mat = pc->mat, pmat = pc->pmat;
 16:   PetscBool   hasop;
 17:   PetscInt    N, M, start, i, sum, end;
 18:   PetscInt    bs, i_start = -1, i_end = -1;
 19:   PetscMPIInt rank, size;

 21:   PetscFunctionBegin;
 22:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
 23:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
 24:   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));
 25:   PetscCall(MatGetBlockSize(pc->pmat, &bs));

 27:   if (jac->n > 0 && jac->n < size) {
 28:     PetscCall(PCSetUp_BJacobi_Multiproc(pc));
 29:     PetscFunctionReturn(PETSC_SUCCESS);
 30:   }

 32:   /*    Determines the number of blocks assigned to each processor */
 33:   /*   local block count  given */
 34:   if (jac->n_local > 0 && jac->n < 0) {
 35:     PetscCall(MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
 36:     if (jac->l_lens) { /* check that user set these correctly */
 37:       sum = 0;
 38:       for (i = 0; i < jac->n_local; i++) {
 39:         PetscCheck(jac->l_lens[i] / bs * bs == jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
 40:         sum += jac->l_lens[i];
 41:       }
 42:       PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local lens set incorrectly");
 43:     } else {
 44:       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
 45:       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
 46:     }
 47:   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
 48:     /* global blocks given: determine which ones are local */
 49:     if (jac->g_lens) {
 50:       /* check if the g_lens is has valid entries */
 51:       for (i = 0; i < jac->n; i++) {
 52:         PetscCheck(jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Zero block not allowed");
 53:         PetscCheck(jac->g_lens[i] / bs * bs == jac->g_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Mat blocksize doesn't match block Jacobi layout");
 54:       }
 55:       if (size == 1) {
 56:         jac->n_local = jac->n;
 57:         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
 58:         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local));
 59:         /* check that user set these correctly */
 60:         sum = 0;
 61:         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
 62:         PetscCheck(sum == M, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Global lens set incorrectly");
 63:       } else {
 64:         PetscCall(MatGetOwnershipRange(pc->pmat, &start, &end));
 65:         /* loop over blocks determining first one owned by me */
 66:         sum = 0;
 67:         for (i = 0; i < jac->n + 1; i++) {
 68:           if (sum == start) {
 69:             i_start = i;
 70:             goto start_1;
 71:           }
 72:           if (i < jac->n) sum += jac->g_lens[i];
 73:         }
 74:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
 75:       start_1:
 76:         for (i = i_start; i < jac->n + 1; i++) {
 77:           if (sum == end) {
 78:             i_end = i;
 79:             goto end_1;
 80:           }
 81:           if (i < jac->n) sum += jac->g_lens[i];
 82:         }
 83:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
 84:       end_1:
 85:         jac->n_local = i_end - i_start;
 86:         PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
 87:         PetscCall(PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local));
 88:       }
 89:     } else { /* no global blocks given, determine then using default layout */
 90:       jac->n_local = jac->n / size + ((jac->n % size) > rank);
 91:       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
 92:       for (i = 0; i < jac->n_local; i++) {
 93:         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
 94:         PetscCheck(jac->l_lens[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Too many blocks given");
 95:       }
 96:     }
 97:   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
 98:     jac->n       = size;
 99:     jac->n_local = 1;
100:     PetscCall(PetscMalloc1(1, &jac->l_lens));
101:     jac->l_lens[0] = M;
102:   } else { /* jac->n > 0 && jac->n_local > 0 */
103:     if (!jac->l_lens) {
104:       PetscCall(PetscMalloc1(jac->n_local, &jac->l_lens));
105:       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
106:     }
107:   }
108:   PetscCheck(jac->n_local >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of blocks is less than number of processors");

110:   /*    Determines mat and pmat */
111:   PetscCall(MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop));
112:   if (!hasop && size == 1) {
113:     mat  = pc->mat;
114:     pmat = pc->pmat;
115:   } else {
116:     if (pc->useAmat) {
117:       /* use block from Amat matrix, not Pmat for local MatMult() */
118:       PetscCall(MatGetDiagonalBlock(pc->mat, &mat));
119:     }
120:     if (pc->pmat != pc->mat || !pc->useAmat) {
121:       PetscCall(MatGetDiagonalBlock(pc->pmat, &pmat));
122:     } else pmat = mat;
123:   }

125:   /*
126:      Setup code depends on the number of blocks
127:   */
128:   if (jac->n_local == 1) {
129:     PetscCall(PCSetUp_BJacobi_Singleblock(pc, mat, pmat));
130:   } else {
131:     PetscCall(PCSetUp_BJacobi_Multiblock(pc, mat, pmat));
132:   }
133:   PetscFunctionReturn(PETSC_SUCCESS);
134: }

136: /* Default destroy, if it has never been setup */
137: static PetscErrorCode PCDestroy_BJacobi(PC pc)
138: {
139:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

141:   PetscFunctionBegin;
142:   PetscCall(PetscFree(jac->g_lens));
143:   PetscCall(PetscFree(jac->l_lens));
144:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL));
145:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL));
146:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL));
147:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL));
148:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL));
149:   PetscCall(PetscFree(pc->data));
150:   PetscFunctionReturn(PETSC_SUCCESS);
151: }

153: static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject)
154: {
155:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
156:   PetscInt    blocks, i;
157:   PetscBool   flg;

159:   PetscFunctionBegin;
160:   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
161:   PetscCall(PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg));
162:   if (flg) PetscCall(PCBJacobiSetTotalBlocks(pc, blocks, NULL));
163:   PetscCall(PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg));
164:   if (flg) PetscCall(PCBJacobiSetLocalBlocks(pc, blocks, NULL));
165:   if (jac->ksp) {
166:     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
167:      * unless we had already been called. */
168:     for (i = 0; i < jac->n_local; i++) PetscCall(KSPSetFromOptions(jac->ksp[i]));
169:   }
170:   PetscOptionsHeadEnd();
171:   PetscFunctionReturn(PETSC_SUCCESS);
172: }

174: #include <petscdraw.h>
175: static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
176: {
177:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
178:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
179:   PetscMPIInt           rank;
180:   PetscInt              i;
181:   PetscBool             iascii, isstring, isdraw;
182:   PetscViewer           sviewer;
183:   PetscViewerFormat     format;
184:   const char           *prefix;

186:   PetscFunctionBegin;
187:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
188:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
189:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
190:   if (iascii) {
191:     if (pc->useAmat) PetscCall(PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n));
192:     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n));
193:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
194:     PetscCall(PetscViewerGetFormat(viewer, &format));
195:     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
196:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n"));
197:       PetscCall(PCGetOptionsPrefix(pc, &prefix));
198:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : ""));
199:       if (jac->ksp && !jac->psubcomm) {
200:         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
201:         if (rank == 0) {
202:           PetscCall(PetscViewerASCIIPushTab(viewer));
203:           PetscCall(KSPView(jac->ksp[0], sviewer));
204:           PetscCall(PetscViewerASCIIPopTab(viewer));
205:         }
206:         PetscCall(PetscViewerFlush(sviewer));
207:         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
208:         PetscCall(PetscViewerFlush(viewer));
209:         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
210:         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
211:       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
212:         PetscCall(PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
213:         if (!mpjac->psubcomm->color) {
214:           PetscCall(PetscViewerASCIIPushTab(viewer));
215:           PetscCall(KSPView(*(jac->ksp), sviewer));
216:           PetscCall(PetscViewerASCIIPopTab(viewer));
217:         }
218:         PetscCall(PetscViewerFlush(sviewer));
219:         PetscCall(PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer));
220:         PetscCall(PetscViewerFlush(viewer));
221:         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
222:         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
223:       } else {
224:         PetscCall(PetscViewerFlush(viewer));
225:       }
226:     } else {
227:       PetscInt n_global;
228:       PetscCall(MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
229:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
230:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n"));
231:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local));
232:       PetscCall(PetscViewerASCIIPushTab(viewer));
233:       PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
234:       for (i = 0; i < jac->n_local; i++) {
235:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i));
236:         PetscCall(KSPView(jac->ksp[i], sviewer));
237:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n"));
238:       }
239:       PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
240:       PetscCall(PetscViewerASCIIPopTab(viewer));
241:       PetscCall(PetscViewerFlush(viewer));
242:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
243:     }
244:   } else if (isstring) {
245:     PetscCall(PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n));
246:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
247:     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], sviewer));
248:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
249:   } else if (isdraw) {
250:     PetscDraw draw;
251:     char      str[25];
252:     PetscReal x, y, bottom, h;

254:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
255:     PetscCall(PetscDrawGetCurrentPoint(draw, &x, &y));
256:     PetscCall(PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n));
257:     PetscCall(PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h));
258:     bottom = y - h;
259:     PetscCall(PetscDrawPushCurrentPoint(draw, x, bottom));
260:     /* warning the communicator on viewer is different then on ksp in parallel */
261:     if (jac->ksp) PetscCall(KSPView(jac->ksp[0], viewer));
262:     PetscCall(PetscDrawPopCurrentPoint(draw));
263:   }
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
268: {
269:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

271:   PetscFunctionBegin;
272:   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call KSPSetUp() or PCSetUp() first");

274:   if (n_local) *n_local = jac->n_local;
275:   if (first_local) *first_local = jac->first_local;
276:   if (ksp) *ksp = jac->ksp;
277:   PetscFunctionReturn(PETSC_SUCCESS);
278: }

280: static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, PetscInt *lens)
281: {
282:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

284:   PetscFunctionBegin;
285:   PetscCheck(pc->setupcalled <= 0 || jac->n == blocks, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
286:   jac->n = blocks;
287:   if (!lens) jac->g_lens = NULL;
288:   else {
289:     PetscCall(PetscMalloc1(blocks, &jac->g_lens));
290:     PetscCall(PetscArraycpy(jac->g_lens, lens, blocks));
291:   }
292:   PetscFunctionReturn(PETSC_SUCCESS);
293: }

295: static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
296: {
297:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

299:   PetscFunctionBegin;
300:   *blocks = jac->n;
301:   if (lens) *lens = jac->g_lens;
302:   PetscFunctionReturn(PETSC_SUCCESS);
303: }

305: static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
306: {
307:   PC_BJacobi *jac;

309:   PetscFunctionBegin;
310:   jac = (PC_BJacobi *)pc->data;

312:   jac->n_local = blocks;
313:   if (!lens) jac->l_lens = NULL;
314:   else {
315:     PetscCall(PetscMalloc1(blocks, &jac->l_lens));
316:     PetscCall(PetscArraycpy(jac->l_lens, lens, blocks));
317:   }
318:   PetscFunctionReturn(PETSC_SUCCESS);
319: }

321: static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
322: {
323:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

325:   PetscFunctionBegin;
326:   *blocks = jac->n_local;
327:   if (lens) *lens = jac->l_lens;
328:   PetscFunctionReturn(PETSC_SUCCESS);
329: }

331: /*@C
332:    PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
333:    this processor.

335:    Not Collective

337:    Input Parameter:
338: .  pc - the preconditioner context

340:    Output Parameters:
341: +  n_local - the number of blocks on this processor, or NULL
342: .  first_local - the global number of the first block on this processor, or NULL
343: -  ksp - the array of KSP contexts

345:    Notes:
346:    After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.

348:    Currently for some matrix implementations only 1 block per processor
349:    is supported.

351:    You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.

353:    Fortran Usage:
354:    You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.

356:    You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
357:    `KSP` array must be.

359:    Level: advanced

361: .seealso: `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
362: @*/
363: PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
364: {
365:   PetscFunctionBegin;
367:   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
368:   PetscFunctionReturn(PETSC_SUCCESS);
369: }

371: /*@
372:    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
373:    Jacobi preconditioner.

375:    Collective

377:    Input Parameters:
378: +  pc - the preconditioner context
379: .  blocks - the number of blocks
380: -  lens - [optional] integer array containing the size of each block

382:    Options Database Key:
383: .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks

385:    Note:
386:    Currently only a limited number of blocking configurations are supported.
387:    All processors sharing the `PC` must call this routine with the same data.

389:    Level: intermediate

391: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
392: @*/
393: PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
394: {
395:   PetscFunctionBegin;
397:   PetscCheck(blocks > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Must have positive blocks");
398:   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
399:   PetscFunctionReturn(PETSC_SUCCESS);
400: }

402: /*@C
403:    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
404:    Jacobi, `PCBJACOBI`, preconditioner.

406:    Not Collective

408:    Input Parameter:
409: .  pc - the preconditioner context

411:    Output parameters:
412: +  blocks - the number of blocks
413: -  lens - integer array containing the size of each block

415:    Level: intermediate

417: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
418: @*/
419: PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
420: {
421:   PetscFunctionBegin;
424:   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }

428: /*@
429:    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
430:    Jacobi, `PCBJACOBI`,  preconditioner.

432:    Not Collective

434:    Input Parameters:
435: +  pc - the preconditioner context
436: .  blocks - the number of blocks
437: -  lens - [optional] integer array containing size of each block

439:    Options Database Key:
440: .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks

442:    Note:
443:    Currently only a limited number of blocking configurations are supported.

445:    Level: intermediate

447: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
448: @*/
449: PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
450: {
451:   PetscFunctionBegin;
453:   PetscCheck(blocks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must have nonegative blocks");
454:   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
455:   PetscFunctionReturn(PETSC_SUCCESS);
456: }

458: /*@C
459:    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
460:    Jacobi, `PCBJACOBI`, preconditioner.

462:    Not Collective

464:    Input Parameters:
465: +  pc - the preconditioner context
466: .  blocks - the number of blocks
467: -  lens - [optional] integer array containing size of each block

469:    Note:
470:    Currently only a limited number of blocking configurations are supported.

472:    Level: intermediate

474: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
475: @*/
476: PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
477: {
478:   PetscFunctionBegin;
481:   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
482:   PetscFunctionReturn(PETSC_SUCCESS);
483: }

485: /*MC
486:    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
487:            its own `KSP` object.

489:    Options Database Keys:
490: +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
491: -  -pc_bjacobi_blocks <n> - use n total blocks

493:    Notes:
494:     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block

496:     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.

498:      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
499:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

501:      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
502:          and set the options directly on the resulting `KSP` object (you can access its `PC`
503:          `KSPGetPC()`)

505:      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
506:          performance.  Different block partitioning may lead to additional data transfers
507:          between host and GPU that lead to degraded performance.

509:      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.

511:    Level: beginner

513: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
514:           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
515:           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
516: M*/

518: PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
519: {
520:   PetscMPIInt rank;
521:   PC_BJacobi *jac;

523:   PetscFunctionBegin;
524:   PetscCall(PetscNew(&jac));
525:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));

527:   pc->ops->apply           = NULL;
528:   pc->ops->matapply        = NULL;
529:   pc->ops->applytranspose  = NULL;
530:   pc->ops->setup           = PCSetUp_BJacobi;
531:   pc->ops->destroy         = PCDestroy_BJacobi;
532:   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
533:   pc->ops->view            = PCView_BJacobi;
534:   pc->ops->applyrichardson = NULL;

536:   pc->data         = (void *)jac;
537:   jac->n           = -1;
538:   jac->n_local     = -1;
539:   jac->first_local = rank;
540:   jac->ksp         = NULL;
541:   jac->g_lens      = NULL;
542:   jac->l_lens      = NULL;
543:   jac->psubcomm    = NULL;

545:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi));
546:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi));
547:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi));
548:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi));
549:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi));
550:   PetscFunctionReturn(PETSC_SUCCESS);
551: }

553: /*
554:         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
555: */
556: static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
557: {
558:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
559:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;

561:   PetscFunctionBegin;
562:   PetscCall(KSPReset(jac->ksp[0]));
563:   PetscCall(VecDestroy(&bjac->x));
564:   PetscCall(VecDestroy(&bjac->y));
565:   PetscFunctionReturn(PETSC_SUCCESS);
566: }

568: static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
569: {
570:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
571:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;

573:   PetscFunctionBegin;
574:   PetscCall(PCReset_BJacobi_Singleblock(pc));
575:   PetscCall(KSPDestroy(&jac->ksp[0]));
576:   PetscCall(PetscFree(jac->ksp));
577:   PetscCall(PetscFree(bjac));
578:   PetscCall(PCDestroy_BJacobi(pc));
579:   PetscFunctionReturn(PETSC_SUCCESS);
580: }

582: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
583: {
584:   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
585:   KSP                subksp = jac->ksp[0];
586:   KSPConvergedReason reason;

588:   PetscFunctionBegin;
589:   PetscCall(KSPSetUp(subksp));
590:   PetscCall(KSPGetConvergedReason(subksp, &reason));
591:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
592:   PetscFunctionReturn(PETSC_SUCCESS);
593: }

595: static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
596: {
597:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
598:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;

600:   PetscFunctionBegin;
601:   PetscCall(VecGetLocalVectorRead(x, bjac->x));
602:   PetscCall(VecGetLocalVector(y, bjac->y));
603:   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
604:      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
605:      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
606:   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
607:   PetscCall(KSPSolve(jac->ksp[0], bjac->x, bjac->y));
608:   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
609:   PetscCall(VecRestoreLocalVectorRead(x, bjac->x));
610:   PetscCall(VecRestoreLocalVector(y, bjac->y));
611:   PetscFunctionReturn(PETSC_SUCCESS);
612: }

614: static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
615: {
616:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
617:   Mat         sX, sY;

619:   PetscFunctionBegin;
620:   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
621:      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
622:      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
623:   PetscCall(KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner));
624:   PetscCall(MatDenseGetLocalMatrix(X, &sX));
625:   PetscCall(MatDenseGetLocalMatrix(Y, &sY));
626:   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
627:   PetscFunctionReturn(PETSC_SUCCESS);
628: }

630: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
631: {
632:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
633:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
634:   PetscScalar            *y_array;
635:   const PetscScalar      *x_array;
636:   PC                      subpc;

638:   PetscFunctionBegin;
639:   /*
640:       The VecPlaceArray() is to avoid having to copy the
641:     y vector into the bjac->x vector. The reason for
642:     the bjac->x vector is that we need a sequential vector
643:     for the sequential solve.
644:   */
645:   PetscCall(VecGetArrayRead(x, &x_array));
646:   PetscCall(VecGetArray(y, &y_array));
647:   PetscCall(VecPlaceArray(bjac->x, x_array));
648:   PetscCall(VecPlaceArray(bjac->y, y_array));
649:   /* apply the symmetric left portion of the inner PC operator */
650:   /* note this by-passes the inner KSP and its options completely */
651:   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
652:   PetscCall(PCApplySymmetricLeft(subpc, bjac->x, bjac->y));
653:   PetscCall(VecResetArray(bjac->x));
654:   PetscCall(VecResetArray(bjac->y));
655:   PetscCall(VecRestoreArrayRead(x, &x_array));
656:   PetscCall(VecRestoreArray(y, &y_array));
657:   PetscFunctionReturn(PETSC_SUCCESS);
658: }

660: static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
661: {
662:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
663:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
664:   PetscScalar            *y_array;
665:   const PetscScalar      *x_array;
666:   PC                      subpc;

668:   PetscFunctionBegin;
669:   /*
670:       The VecPlaceArray() is to avoid having to copy the
671:     y vector into the bjac->x vector. The reason for
672:     the bjac->x vector is that we need a sequential vector
673:     for the sequential solve.
674:   */
675:   PetscCall(VecGetArrayRead(x, &x_array));
676:   PetscCall(VecGetArray(y, &y_array));
677:   PetscCall(VecPlaceArray(bjac->x, x_array));
678:   PetscCall(VecPlaceArray(bjac->y, y_array));

680:   /* apply the symmetric right portion of the inner PC operator */
681:   /* note this by-passes the inner KSP and its options completely */

683:   PetscCall(KSPGetPC(jac->ksp[0], &subpc));
684:   PetscCall(PCApplySymmetricRight(subpc, bjac->x, bjac->y));

686:   PetscCall(VecResetArray(bjac->x));
687:   PetscCall(VecResetArray(bjac->y));
688:   PetscCall(VecRestoreArrayRead(x, &x_array));
689:   PetscCall(VecRestoreArray(y, &y_array));
690:   PetscFunctionReturn(PETSC_SUCCESS);
691: }

693: static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
694: {
695:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
696:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
697:   PetscScalar            *y_array;
698:   const PetscScalar      *x_array;

700:   PetscFunctionBegin;
701:   /*
702:       The VecPlaceArray() is to avoid having to copy the
703:     y vector into the bjac->x vector. The reason for
704:     the bjac->x vector is that we need a sequential vector
705:     for the sequential solve.
706:   */
707:   PetscCall(VecGetArrayRead(x, &x_array));
708:   PetscCall(VecGetArray(y, &y_array));
709:   PetscCall(VecPlaceArray(bjac->x, x_array));
710:   PetscCall(VecPlaceArray(bjac->y, y_array));
711:   PetscCall(KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y));
712:   PetscCall(KSPCheckSolve(jac->ksp[0], pc, bjac->y));
713:   PetscCall(VecResetArray(bjac->x));
714:   PetscCall(VecResetArray(bjac->y));
715:   PetscCall(VecRestoreArrayRead(x, &x_array));
716:   PetscCall(VecRestoreArray(y, &y_array));
717:   PetscFunctionReturn(PETSC_SUCCESS);
718: }

720: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
721: {
722:   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
723:   PetscInt                m;
724:   KSP                     ksp;
725:   PC_BJacobi_Singleblock *bjac;
726:   PetscBool               wasSetup = PETSC_TRUE;
727:   VecType                 vectype;
728:   const char             *prefix;

730:   PetscFunctionBegin;
731:   if (!pc->setupcalled) {
732:     if (!jac->ksp) {
733:       wasSetup = PETSC_FALSE;

735:       PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
736:       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
737:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
738:       PetscCall(KSPSetType(ksp, KSPPREONLY));
739:       PetscCall(PCGetOptionsPrefix(pc, &prefix));
740:       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
741:       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));

743:       pc->ops->reset               = PCReset_BJacobi_Singleblock;
744:       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
745:       pc->ops->apply               = PCApply_BJacobi_Singleblock;
746:       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
747:       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
748:       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
749:       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
750:       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;

752:       PetscCall(PetscMalloc1(1, &jac->ksp));
753:       jac->ksp[0] = ksp;

755:       PetscCall(PetscNew(&bjac));
756:       jac->data = (void *)bjac;
757:     } else {
758:       ksp  = jac->ksp[0];
759:       bjac = (PC_BJacobi_Singleblock *)jac->data;
760:     }

762:     /*
763:       The reason we need to generate these vectors is to serve
764:       as the right-hand side and solution vector for the solve on the
765:       block. We do not need to allocate space for the vectors since
766:       that is provided via VecPlaceArray() just before the call to
767:       KSPSolve() on the block.
768:     */
769:     PetscCall(MatGetSize(pmat, &m, &m));
770:     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x));
771:     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y));
772:     PetscCall(MatGetVecType(pmat, &vectype));
773:     PetscCall(VecSetType(bjac->x, vectype));
774:     PetscCall(VecSetType(bjac->y, vectype));
775:   } else {
776:     ksp  = jac->ksp[0];
777:     bjac = (PC_BJacobi_Singleblock *)jac->data;
778:   }
779:   PetscCall(KSPGetOptionsPrefix(ksp, &prefix));
780:   if (pc->useAmat) {
781:     PetscCall(KSPSetOperators(ksp, mat, pmat));
782:     PetscCall(MatSetOptionsPrefix(mat, prefix));
783:   } else {
784:     PetscCall(KSPSetOperators(ksp, pmat, pmat));
785:   }
786:   PetscCall(MatSetOptionsPrefix(pmat, prefix));
787:   if (!wasSetup && pc->setfromoptionscalled) {
788:     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
789:     PetscCall(KSPSetFromOptions(ksp));
790:   }
791:   PetscFunctionReturn(PETSC_SUCCESS);
792: }

794: static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
795: {
796:   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
797:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
798:   PetscInt               i;

800:   PetscFunctionBegin;
801:   if (bjac && bjac->pmat) {
802:     PetscCall(MatDestroyMatrices(jac->n_local, &bjac->pmat));
803:     if (pc->useAmat) PetscCall(MatDestroyMatrices(jac->n_local, &bjac->mat));
804:   }

806:   for (i = 0; i < jac->n_local; i++) {
807:     PetscCall(KSPReset(jac->ksp[i]));
808:     if (bjac && bjac->x) {
809:       PetscCall(VecDestroy(&bjac->x[i]));
810:       PetscCall(VecDestroy(&bjac->y[i]));
811:       PetscCall(ISDestroy(&bjac->is[i]));
812:     }
813:   }
814:   PetscCall(PetscFree(jac->l_lens));
815:   PetscCall(PetscFree(jac->g_lens));
816:   PetscFunctionReturn(PETSC_SUCCESS);
817: }

819: static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
820: {
821:   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
822:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
823:   PetscInt               i;

825:   PetscFunctionBegin;
826:   PetscCall(PCReset_BJacobi_Multiblock(pc));
827:   if (bjac) {
828:     PetscCall(PetscFree2(bjac->x, bjac->y));
829:     PetscCall(PetscFree(bjac->starts));
830:     PetscCall(PetscFree(bjac->is));
831:   }
832:   PetscCall(PetscFree(jac->data));
833:   for (i = 0; i < jac->n_local; i++) PetscCall(KSPDestroy(&jac->ksp[i]));
834:   PetscCall(PetscFree(jac->ksp));
835:   PetscCall(PCDestroy_BJacobi(pc));
836:   PetscFunctionReturn(PETSC_SUCCESS);
837: }

839: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
840: {
841:   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
842:   PetscInt           i, n_local = jac->n_local;
843:   KSPConvergedReason reason;

845:   PetscFunctionBegin;
846:   for (i = 0; i < n_local; i++) {
847:     PetscCall(KSPSetUp(jac->ksp[i]));
848:     PetscCall(KSPGetConvergedReason(jac->ksp[i], &reason));
849:     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
850:   }
851:   PetscFunctionReturn(PETSC_SUCCESS);
852: }

854: static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
855: {
856:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
857:   PetscInt               i, n_local = jac->n_local;
858:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
859:   PetscScalar           *yin;
860:   const PetscScalar     *xin;

862:   PetscFunctionBegin;
863:   PetscCall(VecGetArrayRead(x, &xin));
864:   PetscCall(VecGetArray(y, &yin));
865:   for (i = 0; i < n_local; i++) {
866:     /*
867:        To avoid copying the subvector from x into a workspace we instead
868:        make the workspace vector array point to the subpart of the array of
869:        the global vector.
870:     */
871:     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
872:     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));

874:     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
875:     PetscCall(KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]));
876:     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
877:     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));

879:     PetscCall(VecResetArray(bjac->x[i]));
880:     PetscCall(VecResetArray(bjac->y[i]));
881:   }
882:   PetscCall(VecRestoreArrayRead(x, &xin));
883:   PetscCall(VecRestoreArray(y, &yin));
884:   PetscFunctionReturn(PETSC_SUCCESS);
885: }

887: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
888: {
889:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
890:   PetscInt               i, n_local = jac->n_local;
891:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
892:   PetscScalar           *yin;
893:   const PetscScalar     *xin;
894:   PC                     subpc;

896:   PetscFunctionBegin;
897:   PetscCall(VecGetArrayRead(x, &xin));
898:   PetscCall(VecGetArray(y, &yin));
899:   for (i = 0; i < n_local; i++) {
900:     /*
901:        To avoid copying the subvector from x into a workspace we instead
902:        make the workspace vector array point to the subpart of the array of
903:        the global vector.
904:     */
905:     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
906:     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));

908:     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
909:     /* apply the symmetric left portion of the inner PC operator */
910:     /* note this by-passes the inner KSP and its options completely */
911:     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
912:     PetscCall(PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]));
913:     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));

915:     PetscCall(VecResetArray(bjac->x[i]));
916:     PetscCall(VecResetArray(bjac->y[i]));
917:   }
918:   PetscCall(VecRestoreArrayRead(x, &xin));
919:   PetscCall(VecRestoreArray(y, &yin));
920:   PetscFunctionReturn(PETSC_SUCCESS);
921: }

923: static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
924: {
925:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
926:   PetscInt               i, n_local = jac->n_local;
927:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
928:   PetscScalar           *yin;
929:   const PetscScalar     *xin;
930:   PC                     subpc;

932:   PetscFunctionBegin;
933:   PetscCall(VecGetArrayRead(x, &xin));
934:   PetscCall(VecGetArray(y, &yin));
935:   for (i = 0; i < n_local; i++) {
936:     /*
937:        To avoid copying the subvector from x into a workspace we instead
938:        make the workspace vector array point to the subpart of the array of
939:        the global vector.
940:     */
941:     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
942:     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));

944:     PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
945:     /* apply the symmetric left portion of the inner PC operator */
946:     /* note this by-passes the inner KSP and its options completely */
947:     PetscCall(KSPGetPC(jac->ksp[i], &subpc));
948:     PetscCall(PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]));
949:     PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));

951:     PetscCall(VecResetArray(bjac->x[i]));
952:     PetscCall(VecResetArray(bjac->y[i]));
953:   }
954:   PetscCall(VecRestoreArrayRead(x, &xin));
955:   PetscCall(VecRestoreArray(y, &yin));
956:   PetscFunctionReturn(PETSC_SUCCESS);
957: }

959: static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
960: {
961:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
962:   PetscInt               i, n_local = jac->n_local;
963:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
964:   PetscScalar           *yin;
965:   const PetscScalar     *xin;

967:   PetscFunctionBegin;
968:   PetscCall(VecGetArrayRead(x, &xin));
969:   PetscCall(VecGetArray(y, &yin));
970:   for (i = 0; i < n_local; i++) {
971:     /*
972:        To avoid copying the subvector from x into a workspace we instead
973:        make the workspace vector array point to the subpart of the array of
974:        the global vector.
975:     */
976:     PetscCall(VecPlaceArray(bjac->x[i], xin + bjac->starts[i]));
977:     PetscCall(VecPlaceArray(bjac->y[i], yin + bjac->starts[i]));

979:     PetscCall(PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));
980:     PetscCall(KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]));
981:     PetscCall(KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]));
982:     PetscCall(PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0));

984:     PetscCall(VecResetArray(bjac->x[i]));
985:     PetscCall(VecResetArray(bjac->y[i]));
986:   }
987:   PetscCall(VecRestoreArrayRead(x, &xin));
988:   PetscCall(VecRestoreArray(y, &yin));
989:   PetscFunctionReturn(PETSC_SUCCESS);
990: }

992: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
993: {
994:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
995:   PetscInt               m, n_local, N, M, start, i;
996:   const char            *prefix;
997:   KSP                    ksp;
998:   Vec                    x, y;
999:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
1000:   PC                     subpc;
1001:   IS                     is;
1002:   MatReuse               scall;
1003:   VecType                vectype;

1005:   PetscFunctionBegin;
1006:   PetscCall(MatGetLocalSize(pc->pmat, &M, &N));

1008:   n_local = jac->n_local;

1010:   if (pc->useAmat) {
1011:     PetscBool same;
1012:     PetscCall(PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same));
1013:     PetscCheck(same, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "Matrices not of same type");
1014:   }

1016:   if (!pc->setupcalled) {
1017:     scall = MAT_INITIAL_MATRIX;

1019:     if (!jac->ksp) {
1020:       pc->ops->reset               = PCReset_BJacobi_Multiblock;
1021:       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
1022:       pc->ops->apply               = PCApply_BJacobi_Multiblock;
1023:       pc->ops->matapply            = NULL;
1024:       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
1025:       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
1026:       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
1027:       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;

1029:       PetscCall(PetscNew(&bjac));
1030:       PetscCall(PetscMalloc1(n_local, &jac->ksp));
1031:       PetscCall(PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y));
1032:       PetscCall(PetscMalloc1(n_local, &bjac->starts));

1034:       jac->data = (void *)bjac;
1035:       PetscCall(PetscMalloc1(n_local, &bjac->is));

1037:       for (i = 0; i < n_local; i++) {
1038:         PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp));
1039:         PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
1040:         PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
1041:         PetscCall(KSPSetType(ksp, KSPPREONLY));
1042:         PetscCall(KSPGetPC(ksp, &subpc));
1043:         PetscCall(PCGetOptionsPrefix(pc, &prefix));
1044:         PetscCall(KSPSetOptionsPrefix(ksp, prefix));
1045:         PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));

1047:         jac->ksp[i] = ksp;
1048:       }
1049:     } else {
1050:       bjac = (PC_BJacobi_Multiblock *)jac->data;
1051:     }

1053:     start = 0;
1054:     PetscCall(MatGetVecType(pmat, &vectype));
1055:     for (i = 0; i < n_local; i++) {
1056:       m = jac->l_lens[i];
1057:       /*
1058:       The reason we need to generate these vectors is to serve
1059:       as the right-hand side and solution vector for the solve on the
1060:       block. We do not need to allocate space for the vectors since
1061:       that is provided via VecPlaceArray() just before the call to
1062:       KSPSolve() on the block.

1064:       */
1065:       PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &x));
1066:       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y));
1067:       PetscCall(VecSetType(x, vectype));
1068:       PetscCall(VecSetType(y, vectype));

1070:       bjac->x[i]      = x;
1071:       bjac->y[i]      = y;
1072:       bjac->starts[i] = start;

1074:       PetscCall(ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is));
1075:       bjac->is[i] = is;

1077:       start += m;
1078:     }
1079:   } else {
1080:     bjac = (PC_BJacobi_Multiblock *)jac->data;
1081:     /*
1082:        Destroy the blocks from the previous iteration
1083:     */
1084:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1085:       PetscCall(MatDestroyMatrices(n_local, &bjac->pmat));
1086:       if (pc->useAmat) PetscCall(MatDestroyMatrices(n_local, &bjac->mat));
1087:       scall = MAT_INITIAL_MATRIX;
1088:     } else scall = MAT_REUSE_MATRIX;
1089:   }

1091:   PetscCall(MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat));
1092:   if (pc->useAmat) PetscCall(MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat));
1093:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1094:      different boundary conditions for the submatrices than for the global problem) */
1095:   PetscCall(PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP));

1097:   for (i = 0; i < n_local; i++) {
1098:     PetscCall(KSPGetOptionsPrefix(jac->ksp[i], &prefix));
1099:     if (pc->useAmat) {
1100:       PetscCall(KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]));
1101:       PetscCall(MatSetOptionsPrefix(bjac->mat[i], prefix));
1102:     } else {
1103:       PetscCall(KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]));
1104:     }
1105:     PetscCall(MatSetOptionsPrefix(bjac->pmat[i], prefix));
1106:     if (pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[i]));
1107:   }
1108:   PetscFunctionReturn(PETSC_SUCCESS);
1109: }

1111: /*
1112:       These are for a single block with multiple processes
1113: */
1114: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1115: {
1116:   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1117:   KSP                subksp = jac->ksp[0];
1118:   KSPConvergedReason reason;

1120:   PetscFunctionBegin;
1121:   PetscCall(KSPSetUp(subksp));
1122:   PetscCall(KSPGetConvergedReason(subksp, &reason));
1123:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1124:   PetscFunctionReturn(PETSC_SUCCESS);
1125: }

1127: static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1128: {
1129:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1130:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;

1132:   PetscFunctionBegin;
1133:   PetscCall(VecDestroy(&mpjac->ysub));
1134:   PetscCall(VecDestroy(&mpjac->xsub));
1135:   PetscCall(MatDestroy(&mpjac->submats));
1136:   if (jac->ksp) PetscCall(KSPReset(jac->ksp[0]));
1137:   PetscFunctionReturn(PETSC_SUCCESS);
1138: }

1140: static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1141: {
1142:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1143:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;

1145:   PetscFunctionBegin;
1146:   PetscCall(PCReset_BJacobi_Multiproc(pc));
1147:   PetscCall(KSPDestroy(&jac->ksp[0]));
1148:   PetscCall(PetscFree(jac->ksp));
1149:   PetscCall(PetscSubcommDestroy(&mpjac->psubcomm));

1151:   PetscCall(PetscFree(mpjac));
1152:   PetscCall(PCDestroy_BJacobi(pc));
1153:   PetscFunctionReturn(PETSC_SUCCESS);
1154: }

1156: static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1157: {
1158:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1159:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1160:   PetscScalar          *yarray;
1161:   const PetscScalar    *xarray;
1162:   KSPConvergedReason    reason;

1164:   PetscFunctionBegin;
1165:   /* place x's and y's local arrays into xsub and ysub */
1166:   PetscCall(VecGetArrayRead(x, &xarray));
1167:   PetscCall(VecGetArray(y, &yarray));
1168:   PetscCall(VecPlaceArray(mpjac->xsub, xarray));
1169:   PetscCall(VecPlaceArray(mpjac->ysub, yarray));

1171:   /* apply preconditioner on each matrix block */
1172:   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1173:   PetscCall(KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub));
1174:   PetscCall(KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub));
1175:   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0));
1176:   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1177:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;

1179:   PetscCall(VecResetArray(mpjac->xsub));
1180:   PetscCall(VecResetArray(mpjac->ysub));
1181:   PetscCall(VecRestoreArrayRead(x, &xarray));
1182:   PetscCall(VecRestoreArray(y, &yarray));
1183:   PetscFunctionReturn(PETSC_SUCCESS);
1184: }

1186: static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1187: {
1188:   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1189:   KSPConvergedReason reason;
1190:   Mat                sX, sY;
1191:   const PetscScalar *x;
1192:   PetscScalar       *y;
1193:   PetscInt           m, N, lda, ldb;

1195:   PetscFunctionBegin;
1196:   /* apply preconditioner on each matrix block */
1197:   PetscCall(MatGetLocalSize(X, &m, NULL));
1198:   PetscCall(MatGetSize(X, NULL, &N));
1199:   PetscCall(MatDenseGetLDA(X, &lda));
1200:   PetscCall(MatDenseGetLDA(Y, &ldb));
1201:   PetscCall(MatDenseGetArrayRead(X, &x));
1202:   PetscCall(MatDenseGetArrayWrite(Y, &y));
1203:   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX));
1204:   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY));
1205:   PetscCall(MatDenseSetLDA(sX, lda));
1206:   PetscCall(MatDenseSetLDA(sY, ldb));
1207:   PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1208:   PetscCall(KSPMatSolve(jac->ksp[0], sX, sY));
1209:   PetscCall(KSPCheckSolve(jac->ksp[0], pc, NULL));
1210:   PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0));
1211:   PetscCall(MatDestroy(&sY));
1212:   PetscCall(MatDestroy(&sX));
1213:   PetscCall(MatDenseRestoreArrayWrite(Y, &y));
1214:   PetscCall(MatDenseRestoreArrayRead(X, &x));
1215:   PetscCall(KSPGetConvergedReason(jac->ksp[0], &reason));
1216:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1217:   PetscFunctionReturn(PETSC_SUCCESS);
1218: }

1220: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1221: {
1222:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1223:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1224:   PetscInt              m, n;
1225:   MPI_Comm              comm, subcomm = 0;
1226:   const char           *prefix;
1227:   PetscBool             wasSetup = PETSC_TRUE;
1228:   VecType               vectype;

1230:   PetscFunctionBegin;
1231:   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1232:   PetscCheck(jac->n_local <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only a single block in a subcommunicator is supported");
1233:   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1234:   if (!pc->setupcalled) {
1235:     wasSetup = PETSC_FALSE;
1236:     PetscCall(PetscNew(&mpjac));
1237:     jac->data = (void *)mpjac;

1239:     /* initialize datastructure mpjac */
1240:     if (!jac->psubcomm) {
1241:       /* Create default contiguous subcommunicatiors if user does not provide them */
1242:       PetscCall(PetscSubcommCreate(comm, &jac->psubcomm));
1243:       PetscCall(PetscSubcommSetNumber(jac->psubcomm, jac->n));
1244:       PetscCall(PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS));
1245:     }
1246:     mpjac->psubcomm = jac->psubcomm;
1247:     subcomm         = PetscSubcommChild(mpjac->psubcomm);

1249:     /* Get matrix blocks of pmat */
1250:     PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));

1252:     /* create a new PC that processors in each subcomm have copy of */
1253:     PetscCall(PetscMalloc1(1, &jac->ksp));
1254:     PetscCall(KSPCreate(subcomm, &jac->ksp[0]));
1255:     PetscCall(KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure));
1256:     PetscCall(PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1));
1257:     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1258:     PetscCall(KSPGetPC(jac->ksp[0], &mpjac->pc));

1260:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
1261:     PetscCall(KSPSetOptionsPrefix(jac->ksp[0], prefix));
1262:     PetscCall(KSPAppendOptionsPrefix(jac->ksp[0], "sub_"));
1263:     PetscCall(KSPGetOptionsPrefix(jac->ksp[0], &prefix));
1264:     PetscCall(MatSetOptionsPrefix(mpjac->submats, prefix));

1266:     /* create dummy vectors xsub and ysub */
1267:     PetscCall(MatGetLocalSize(mpjac->submats, &m, &n));
1268:     PetscCall(VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub));
1269:     PetscCall(VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub));
1270:     PetscCall(MatGetVecType(mpjac->submats, &vectype));
1271:     PetscCall(VecSetType(mpjac->xsub, vectype));
1272:     PetscCall(VecSetType(mpjac->ysub, vectype));

1274:     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1275:     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1276:     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1277:     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1278:     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1279:   } else { /* pc->setupcalled */
1280:     subcomm = PetscSubcommChild(mpjac->psubcomm);
1281:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1282:       /* destroy old matrix blocks, then get new matrix blocks */
1283:       if (mpjac->submats) PetscCall(MatDestroy(&mpjac->submats));
1284:       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats));
1285:     } else {
1286:       PetscCall(MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats));
1287:     }
1288:     PetscCall(KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats));
1289:   }

1291:   if (!wasSetup && pc->setfromoptionscalled) PetscCall(KSPSetFromOptions(jac->ksp[0]));
1292:   PetscFunctionReturn(PETSC_SUCCESS);
1293: }