Actual source code: pcmpi.c

  1: /*
  2:     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
  3:     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.

  5:     That program may use OpenMP to compute the right hand side and matrix for the linear system

  7:     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD

  9:     The resulting KSP and PC can only be controlled via the options database, though some common commands
 10:     could be passed through the server.

 12: */
 13: #include <petsc/private/pcimpl.h>
 14: #include <petsc/private/kspimpl.h>
 15: #include <petsc.h>

 17: #define PC_MPI_MAX_RANKS  256
 18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD

 20: typedef struct {
 21:   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
 22:   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
 23:   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
 24:   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
 25:   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
 26: } PC_MPI;

 28: typedef enum {
 29:   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
 30:   PCMPI_CREATE,
 31:   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
 32:   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
 33:   PCMPI_SOLVE,
 34:   PCMPI_VIEW,
 35:   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
 36: } PCMPICommand;

 38: static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
 39: static PetscBool PCMPICommSet = PETSC_FALSE;
 40: static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
 41: static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;

 43: static PetscErrorCode PCMPICommsCreate(void)
 44: {
 45:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 46:   PetscMPIInt size, rank, i;

 48:   PetscFunctionBegin;
 49:   PetscCallMPI(MPI_Comm_size(comm, &size));
 50:   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
 51:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 52:   /* comm for size 1 is useful only for debugging */
 53:   for (i = 0; i < size; i++) {
 54:     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
 55:     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
 56:     PCMPISolveCounts[i] = 0;
 57:     PCMPIKSPCounts[i]   = 0;
 58:     PCMPIIterations[i]  = 0;
 59:     PCMPISizes[i]       = 0;
 60:   }
 61:   PCMPICommSet = PETSC_TRUE;
 62:   PetscFunctionReturn(PETSC_SUCCESS);
 63: }

 65: PetscErrorCode PCMPICommsDestroy(void)
 66: {
 67:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 68:   PetscMPIInt size, rank, i;

 70:   PetscFunctionBegin;
 71:   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
 72:   PetscCallMPI(MPI_Comm_size(comm, &size));
 73:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 74:   for (i = 0; i < size; i++) {
 75:     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
 76:   }
 77:   PCMPICommSet = PETSC_FALSE;
 78:   PetscFunctionReturn(PETSC_SUCCESS);
 79: }

 81: static PetscErrorCode PCMPICreate(PC pc)
 82: {
 83:   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
 84:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 85:   KSP         ksp;
 86:   PetscInt    N[2], mincntperrank = 0;
 87:   PetscMPIInt size;
 88:   Mat         sA;
 89:   char       *prefix;
 90:   PetscMPIInt len = 0;

 92:   PetscFunctionBegin;
 93:   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
 94:   PetscCallMPI(MPI_Comm_size(comm, &size));
 95:   if (pc) {
 96:     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
 97:     PetscCall(PCGetOperators(pc, &sA, &sA));
 98:     PetscCall(MatGetSize(sA, &N[0], &N[1]));
 99:   }
100:   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));

102:   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103:   if (km) mincntperrank = km->mincntperrank;
104:   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105:   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106:   if (comm == MPI_COMM_NULL) {
107:     ksp = NULL;
108:     PetscFunctionReturn(PETSC_SUCCESS);
109:   }
110:   PetscCall(PetscLogStagePush(PCMPIStage));
111:   PetscCall(KSPCreate(comm, &ksp));
112:   PetscCall(PetscLogStagePop());
113:   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
114:   if (pc) {
115:     size_t slen;

117:     PetscCallMPI(MPI_Comm_size(comm, &size));
118:     PCMPIKSPCounts[size - 1]++;
119:     PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
120:     PetscCall(PetscStrlen(prefix, &slen));
121:     len = (PetscMPIInt)slen;
122:   }
123:   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
124:   if (len) {
125:     if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
126:     PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
127:     PetscCall(KSPSetOptionsPrefix(ksp, prefix));
128:   }
129:   PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: static PetscErrorCode PCMPISetMat(PC pc)
134: {
135:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
136:   Mat                A;
137:   PetscInt           m, n, *ia, *ja, j, bs;
138:   Mat                sA;
139:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
140:   KSP                ksp;
141:   PetscLayout        layout;
142:   const PetscInt    *IA = NULL, *JA = NULL;
143:   const PetscInt    *range;
144:   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
145:   PetscScalar       *a;
146:   const PetscScalar *sa               = NULL;
147:   PetscInt           matproperties[7] = {0, 0, 0, 0, 0, 0, 0};

149:   PetscFunctionBegin;
150:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
151:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
152:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
153:   if (pc) {
154:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

156:     PetscCallMPI(MPI_Comm_size(comm, &size));
157:     PCMPIMatCounts[size - 1]++;
158:     PetscCall(PCGetOperators(pc, &sA, &sA));
159:     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
160:     PetscCall(MatGetBlockSize(sA, &bs));
161:     matproperties[2] = bs;
162:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
163:     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
164:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
165:     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
166:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
167:     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
168:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
169:     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
170:   }
171:   PetscCallMPI(MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm));

173:   /* determine ownership ranges of matrix columns */
174:   PetscCall(PetscLayoutCreate(comm, &layout));
175:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
176:   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
177:   PetscCall(PetscLayoutSetUp(layout));
178:   PetscCall(PetscLayoutGetLocalSize(layout, &n));
179:   PetscCall(PetscLayoutDestroy(&layout));

181:   /* determine ownership ranges of matrix rows */
182:   PetscCall(PetscLayoutCreate(comm, &layout));
183:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
184:   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
185:   PetscCall(PetscLayoutSetUp(layout));
186:   PetscCall(PetscLayoutGetLocalSize(layout, &m));

188:   /* copy over the matrix nonzero structure and values */
189:   if (pc) {
190:     NZ      = km->NZ;
191:     NZdispl = km->NZdispl;
192:     PetscCall(PetscLayoutGetRanges(layout, &range));
193:     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
194:     for (i = 0; i < size; i++) {
195:       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
196:       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
197:     }
198:     displi[0]  = 0;
199:     NZdispl[0] = 0;
200:     for (j = 1; j < size; j++) {
201:       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
202:       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
203:     }
204:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
205:   }
206:   PetscCall(PetscLayoutDestroy(&layout));
207:   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));

209:   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
210:   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
211:   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
212:   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));

214:   if (pc) {
215:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
216:     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
217:   }

219:   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
220:   ia[0] = 0;
221:   PetscCall(PetscLogStagePush(PCMPIStage));
222:   PetscCall(MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A));
223:   PetscCall(MatSetBlockSize(A, matproperties[2]));
224:   PetscCall(MatSetOptionsPrefix(A, "mpi_"));
225:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
226:   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
227:   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
228:   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));

230:   PetscCall(PetscFree3(ia, ja, a));
231:   PetscCall(KSPSetOperators(ksp, A, A));
232:   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
233:   PetscCall(PetscLogStagePop());
234:   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
235:     const PetscInt *range;

237:     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
238:     for (i = 0; i < size; i++) {
239:       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
240:       km->displ[i]     = (PetscMPIInt)range[i];
241:     }
242:   }
243:   PetscCall(MatDestroy(&A));
244:   PetscCall(KSPSetFromOptions(ksp));
245:   PetscFunctionReturn(PETSC_SUCCESS);
246: }

248: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
249: {
250:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
251:   KSP                ksp;
252:   Mat                sA, A;
253:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
254:   PetscScalar       *a;
255:   PetscCount         nz;
256:   const PetscScalar *sa = NULL;
257:   PetscMPIInt        size;
258:   PetscInt           matproperties[4] = {0, 0, 0, 0};

260:   PetscFunctionBegin;
261:   if (pc) {
262:     PetscCall(PCGetOperators(pc, &sA, &sA));
263:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
264:   }
265:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
266:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
267:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
268:   PetscCallMPI(MPI_Comm_size(comm, &size));
269:   PCMPIMatCounts[size - 1]++;
270:   PetscCall(KSPGetOperators(ksp, NULL, &A));
271:   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
272:   PetscCall(PetscMalloc1(nz, &a));
273:   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
274:   if (pc) {
275:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

277:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));

279:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
280:     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
281:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
282:     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
283:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
284:     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
285:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
286:     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
287:   }
288:   PetscCall(MatUpdateMPIAIJWithArray(A, a));
289:   PetscCall(PetscFree(a));
290:   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
291:   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
292:   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
293:   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
294:   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
295:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
296:   PetscFunctionReturn(PETSC_SUCCESS);
297: }

299: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
300: {
301:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
302:   KSP                ksp;
303:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
304:   const PetscScalar *sb   = NULL, *x;
305:   PetscScalar       *b, *sx = NULL;
306:   PetscInt           its, n;
307:   PetscMPIInt        size;

309:   PetscFunctionBegin;
310:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
311:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
312:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));

314:   /* TODO: optimize code to not require building counts/displ every time */

316:   /* scatterv rhs */
317:   PetscCallMPI(MPI_Comm_size(comm, &size));
318:   if (pc) {
319:     PetscInt N;

321:     PCMPISolveCounts[size - 1]++;
322:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
323:     ;
324:     PCMPISizes[size - 1] += N;
325:     PetscCall(VecGetArrayRead(B, &sb));
326:   }
327:   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
328:   PetscCall(VecGetArray(ksp->vec_rhs, &b));
329:   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
330:   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
331:   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));

333:   PetscCall(PetscLogStagePush(PCMPIStage));
334:   PetscCall(KSPSolve(ksp, NULL, NULL));
335:   PetscCall(PetscLogStagePop());
336:   PetscCall(KSPGetIterationNumber(ksp, &its));
337:   PCMPIIterations[size - 1] += its;

339:   /* gather solution */
340:   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
341:   if (pc) PetscCall(VecGetArray(X, &sx));
342:   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
343:   if (pc) PetscCall(VecRestoreArray(X, &sx));
344:   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
345:   PetscFunctionReturn(PETSC_SUCCESS);
346: }

348: static PetscErrorCode PCMPIDestroy(PC pc)
349: {
350:   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
351:   KSP      ksp;
352:   MPI_Comm comm = PC_MPI_COMM_WORLD;

354:   PetscFunctionBegin;
355:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
356:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
357:   PetscCall(KSPDestroy(&ksp));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }

361: /*@C
362:      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
363:      parallel `KSP` solves and management of parallel `KSP` objects.

365:      Logically Collective on all MPI ranks except 0

367:    Options Database Keys:
368: +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
369: -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server

371:      Level: developer

373:      Note:
374:       This is normally started automatically in `PetscInitialize()` when the option is provided

376:      Developer Notes:
377:        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
378:        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
379:        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.

381:        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?

383: .seealso: `PCMPIServerEnd()`, `PCMPI`
384: @*/
385: PetscErrorCode PCMPIServerBegin(void)
386: {
387:   PetscMPIInt rank;

389:   PetscFunctionBegin;
390:   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
391:   if (PetscDefined(USE_SINGLE_LIBRARY)) {
392:     PetscCall(VecInitializePackage());
393:     PetscCall(MatInitializePackage());
394:     PetscCall(DMInitializePackage());
395:     PetscCall(PCInitializePackage());
396:     PetscCall(KSPInitializePackage());
397:     PetscCall(SNESInitializePackage());
398:     PetscCall(TSInitializePackage());
399:     PetscCall(TaoInitializePackage());
400:   }

402:   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
403:   if (rank == 0) {
404:     PETSC_COMM_WORLD = PETSC_COMM_SELF;
405:     PetscFunctionReturn(PETSC_SUCCESS);
406:   }

408:   while (PETSC_TRUE) {
409:     PCMPICommand request = PCMPI_CREATE;
410:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
411:     switch (request) {
412:     case PCMPI_CREATE:
413:       PetscCall(PCMPICreate(NULL));
414:       break;
415:     case PCMPI_SET_MAT:
416:       PetscCall(PCMPISetMat(NULL));
417:       break;
418:     case PCMPI_UPDATE_MAT_VALUES:
419:       PetscCall(PCMPIUpdateMatValues(NULL));
420:       break;
421:     case PCMPI_VIEW:
422:       // PetscCall(PCMPIView(NULL));
423:       break;
424:     case PCMPI_SOLVE:
425:       PetscCall(PCMPISolve(NULL, NULL, NULL));
426:       break;
427:     case PCMPI_DESTROY:
428:       PetscCall(PCMPIDestroy(NULL));
429:       break;
430:     case PCMPI_EXIT:
431:       PetscCall(PetscFinalize());
432:       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
433:       break;
434:     default:
435:       break;
436:     }
437:   }
438:   PetscFunctionReturn(PETSC_SUCCESS);
439: }

441: /*@C
442:      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
443:      parallel KSP solves and management of parallel `KSP` objects.

445:      Logically Collective on all MPI ranks except 0

447:      Level: developer

449:      Note:
450:       This is normally ended automatically in `PetscFinalize()` when the option is provided

452: .seealso: `PCMPIServerBegin()`, `PCMPI`
453: @*/
454: PetscErrorCode PCMPIServerEnd(void)
455: {
456:   PCMPICommand request = PCMPI_EXIT;

458:   PetscFunctionBegin;
459:   if (PetscGlobalRank == 0) {
460:     PetscViewer       viewer = NULL;
461:     PetscViewerFormat format;

463:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
464:     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
465:     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
466:     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
467:     PetscOptionsEnd();
468:     if (viewer) {
469:       PetscBool isascii;

471:       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
472:       if (isascii) {
473:         PetscMPIInt size;
474:         PetscMPIInt i;

476:         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
477:         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
478:         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
479:         if (PCMPIKSPCountsSeq) {
480:           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
481:         }
482:         for (i = 0; i < size; i++) {
483:           if (PCMPIKSPCounts[i]) {
484:             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
485:           }
486:         }
487:       }
488:       PetscCall(PetscViewerDestroy(&viewer));
489:     }
490:   }
491:   PetscCall(PCMPICommsDestroy());
492:   PetscFunctionReturn(PETSC_SUCCESS);
493: }

495: /*
496:     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
497:     because, for example, the problem is small. This version is more efficient because it does not require copying any data
498: */
499: static PetscErrorCode PCSetUp_Seq(PC pc)
500: {
501:   PC_MPI     *km = (PC_MPI *)pc->data;
502:   Mat         sA;
503:   const char *prefix;

505:   PetscFunctionBegin;
506:   PetscCall(PCGetOperators(pc, NULL, &sA));
507:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
508:   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
509:   PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
510:   PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
511:   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
512:   PetscCall(KSPSetFromOptions(km->ksps[0]));
513:   PetscCall(KSPSetUp(km->ksps[0]));
514:   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
515:   PCMPIKSPCountsSeq++;
516:   PetscFunctionReturn(PETSC_SUCCESS);
517: }

519: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
520: {
521:   PC_MPI  *km = (PC_MPI *)pc->data;
522:   PetscInt its, n;
523:   Mat      A;

525:   PetscFunctionBegin;
526:   PetscCall(KSPSolve(km->ksps[0], b, x));
527:   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
528:   PCMPISolveCountsSeq++;
529:   PCMPIIterationsSeq += its;
530:   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
531:   PetscCall(MatGetSize(A, &n, NULL));
532:   PCMPISizesSeq += n;
533:   PetscFunctionReturn(PETSC_SUCCESS);
534: }

536: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
537: {
538:   PC_MPI     *km = (PC_MPI *)pc->data;
539:   const char *prefix;

541:   PetscFunctionBegin;
542:   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
543:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
544:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
545:   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
546:   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
547:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
548:   PetscFunctionReturn(PETSC_SUCCESS);
549: }

551: static PetscErrorCode PCDestroy_Seq(PC pc)
552: {
553:   PC_MPI *km = (PC_MPI *)pc->data;

555:   PetscFunctionBegin;
556:   PetscCall(KSPDestroy(&km->ksps[0]));
557:   PetscCall(PetscFree(pc->data));
558:   PetscFunctionReturn(PETSC_SUCCESS);
559: }

561: /*
562:      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
563:      right hand side to the parallel PC
564: */
565: static PetscErrorCode PCSetUp_MPI(PC pc)
566: {
567:   PC_MPI      *km = (PC_MPI *)pc->data;
568:   PetscMPIInt  rank, size;
569:   PCMPICommand request;
570:   PetscBool    newmatrix = PETSC_FALSE;

572:   PetscFunctionBegin;
573:   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
574:   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
575:   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));

577:   if (!pc->setupcalled) {
578:     if (!km->alwaysuseserver) {
579:       PetscInt n;
580:       Mat      sA;
581:       /* short circuit for small systems */
582:       PetscCall(PCGetOperators(pc, &sA, &sA));
583:       PetscCall(MatGetSize(sA, &n, NULL));
584:       if (n < 2 * km->mincntperrank - 1 || size == 1) {
585:         pc->ops->setup   = NULL;
586:         pc->ops->apply   = PCApply_Seq;
587:         pc->ops->destroy = PCDestroy_Seq;
588:         pc->ops->view    = PCView_Seq;
589:         PetscCall(PCSetUp_Seq(pc));
590:         PetscFunctionReturn(PETSC_SUCCESS);
591:       }
592:     }

594:     request = PCMPI_CREATE;
595:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
596:     PetscCall(PCMPICreate(pc));
597:     newmatrix = PETSC_TRUE;
598:   }
599:   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;

601:   if (newmatrix) {
602:     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
603:     request = PCMPI_SET_MAT;
604:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
605:     PetscCall(PCMPISetMat(pc));
606:   } else {
607:     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"));
608:     request = PCMPI_UPDATE_MAT_VALUES;
609:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
610:     PetscCall(PCMPIUpdateMatValues(pc));
611:   }
612:   PetscFunctionReturn(PETSC_SUCCESS);
613: }

615: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
616: {
617:   PCMPICommand request = PCMPI_SOLVE;

619:   PetscFunctionBegin;
620:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
621:   PetscCall(PCMPISolve(pc, b, x));
622:   PetscFunctionReturn(PETSC_SUCCESS);
623: }

625: PetscErrorCode PCDestroy_MPI(PC pc)
626: {
627:   PCMPICommand request = PCMPI_DESTROY;

629:   PetscFunctionBegin;
630:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
631:   PetscCall(PCMPIDestroy(pc));
632:   PetscCall(PetscFree(pc->data));
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }

636: /*
637:      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
638: */
639: PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
640: {
641:   PC_MPI     *km = (PC_MPI *)pc->data;
642:   MPI_Comm    comm;
643:   PetscMPIInt size;
644:   const char *prefix;

646:   PetscFunctionBegin;
647:   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
648:   PetscCallMPI(MPI_Comm_size(comm, &size));
649:   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
650:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
651:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
652:   if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
653:   else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
654:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
655:   PetscFunctionReturn(PETSC_SUCCESS);
656: }

658: PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
659: {
660:   PC_MPI *km = (PC_MPI *)pc->data;

662:   PetscFunctionBegin;
663:   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
664:   PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
665:   PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
666:   PetscOptionsHeadEnd();
667:   PetscFunctionReturn(PETSC_SUCCESS);
668: }

670: /*MC
671:      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process

673:    Options Database Keys:
674: +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
675: .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
676: .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
677: -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes

679:    Level: beginner

681:    Notes:
682:    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_

684:    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
685:    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.

687:    It can be particularly useful for user OpenMP code or potentially user GPU code.

689:    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
690:    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.

692:    The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
693:    because they are not the actual solver objects.

695:    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
696:    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.

698: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
699: M*/
700: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
701: {
702:   PC_MPI *km;

704:   PetscFunctionBegin;
705:   PetscCall(PetscNew(&km));
706:   pc->data = (void *)km;

708:   km->mincntperrank = 10000;

710:   pc->ops->setup          = PCSetUp_MPI;
711:   pc->ops->apply          = PCApply_MPI;
712:   pc->ops->destroy        = PCDestroy_MPI;
713:   pc->ops->view           = PCView_MPI;
714:   pc->ops->setfromoptions = PCSetFromOptions_MPI;
715:   PetscFunctionReturn(PETSC_SUCCESS);
716: }