Actual source code: pcmpi.c
1: /*
2: This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3: It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
5: That program may use OpenMP to compute the right hand side and matrix for the linear system
7: The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
9: The resulting KSP and PC can only be controlled via the options database, though some common commands
10: could be passed through the server.
12: */
13: #include <petsc/private/pcimpl.h>
14: #include <petsc/private/kspimpl.h>
15: #include <petsc.h>
17: #define PC_MPI_MAX_RANKS 256
18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20: typedef struct {
21: KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22: PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23: PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24: PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25: PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26: } PC_MPI;
28: typedef enum {
29: PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30: PCMPI_CREATE,
31: PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
32: PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33: PCMPI_SOLVE,
34: PCMPI_VIEW,
35: PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36: } PCMPICommand;
38: static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
39: static PetscBool PCMPICommSet = PETSC_FALSE;
40: static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41: static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
43: static PetscErrorCode PCMPICommsCreate(void)
44: {
45: MPI_Comm comm = PC_MPI_COMM_WORLD;
46: PetscMPIInt size, rank, i;
48: PetscFunctionBegin;
49: PetscCallMPI(MPI_Comm_size(comm, &size));
50: PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51: PetscCallMPI(MPI_Comm_rank(comm, &rank));
52: /* comm for size 1 is useful only for debugging */
53: for (i = 0; i < size; i++) {
54: PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55: PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56: PCMPISolveCounts[i] = 0;
57: PCMPIKSPCounts[i] = 0;
58: PCMPIIterations[i] = 0;
59: PCMPISizes[i] = 0;
60: }
61: PCMPICommSet = PETSC_TRUE;
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: PetscErrorCode PCMPICommsDestroy(void)
66: {
67: MPI_Comm comm = PC_MPI_COMM_WORLD;
68: PetscMPIInt size, rank, i;
70: PetscFunctionBegin;
71: if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72: PetscCallMPI(MPI_Comm_size(comm, &size));
73: PetscCallMPI(MPI_Comm_rank(comm, &rank));
74: for (i = 0; i < size; i++) {
75: if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76: }
77: PCMPICommSet = PETSC_FALSE;
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: static PetscErrorCode PCMPICreate(PC pc)
82: {
83: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
84: MPI_Comm comm = PC_MPI_COMM_WORLD;
85: KSP ksp;
86: PetscInt N[2], mincntperrank = 0;
87: PetscMPIInt size;
88: Mat sA;
89: char *prefix;
90: PetscMPIInt len = 0;
92: PetscFunctionBegin;
93: if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94: PetscCallMPI(MPI_Comm_size(comm, &size));
95: if (pc) {
96: if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97: PetscCall(PCGetOperators(pc, &sA, &sA));
98: PetscCall(MatGetSize(sA, &N[0], &N[1]));
99: }
100: PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
102: /* choose a suitable sized MPI_Comm for the problem to be solved on */
103: if (km) mincntperrank = km->mincntperrank;
104: PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105: comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106: if (comm == MPI_COMM_NULL) {
107: ksp = NULL;
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
110: PetscCall(PetscLogStagePush(PCMPIStage));
111: PetscCall(KSPCreate(comm, &ksp));
112: PetscCall(PetscLogStagePop());
113: PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
114: if (pc) {
115: size_t slen;
117: PetscCallMPI(MPI_Comm_size(comm, &size));
118: PCMPIKSPCounts[size - 1]++;
119: PetscCall(PCGetOptionsPrefix(pc, (const char **)&prefix));
120: PetscCall(PetscStrlen(prefix, &slen));
121: len = (PetscMPIInt)slen;
122: }
123: PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
124: if (len) {
125: if (!pc) PetscCall(PetscMalloc1(len + 1, &prefix));
126: PetscCallMPI(MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm));
127: PetscCall(KSPSetOptionsPrefix(ksp, prefix));
128: }
129: PetscCall(KSPAppendOptionsPrefix(ksp, "mpi_"));
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: static PetscErrorCode PCMPISetMat(PC pc)
134: {
135: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
136: Mat A;
137: PetscInt m, n, *ia, *ja, j, bs;
138: Mat sA;
139: MPI_Comm comm = PC_MPI_COMM_WORLD;
140: KSP ksp;
141: PetscLayout layout;
142: const PetscInt *IA = NULL, *JA = NULL;
143: const PetscInt *range;
144: PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
145: PetscScalar *a;
146: const PetscScalar *sa = NULL;
147: PetscInt matproperties[7] = {0, 0, 0, 0, 0, 0, 0};
149: PetscFunctionBegin;
150: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
151: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
152: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
153: if (pc) {
154: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
156: PetscCallMPI(MPI_Comm_size(comm, &size));
157: PCMPIMatCounts[size - 1]++;
158: PetscCall(PCGetOperators(pc, &sA, &sA));
159: PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
160: PetscCall(MatGetBlockSize(sA, &bs));
161: matproperties[2] = bs;
162: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
163: matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
164: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
165: matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
166: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
167: matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
168: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
169: matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
170: }
171: PetscCallMPI(MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm));
173: /* determine ownership ranges of matrix columns */
174: PetscCall(PetscLayoutCreate(comm, &layout));
175: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
176: PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
177: PetscCall(PetscLayoutSetUp(layout));
178: PetscCall(PetscLayoutGetLocalSize(layout, &n));
179: PetscCall(PetscLayoutDestroy(&layout));
181: /* determine ownership ranges of matrix rows */
182: PetscCall(PetscLayoutCreate(comm, &layout));
183: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
184: PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
185: PetscCall(PetscLayoutSetUp(layout));
186: PetscCall(PetscLayoutGetLocalSize(layout, &m));
188: /* copy over the matrix nonzero structure and values */
189: if (pc) {
190: NZ = km->NZ;
191: NZdispl = km->NZdispl;
192: PetscCall(PetscLayoutGetRanges(layout, &range));
193: PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
194: for (i = 0; i < size; i++) {
195: sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
196: NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
197: }
198: displi[0] = 0;
199: NZdispl[0] = 0;
200: for (j = 1; j < size; j++) {
201: displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
202: NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
203: }
204: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
205: }
206: PetscCall(PetscLayoutDestroy(&layout));
207: PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
209: PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
210: PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
211: PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
212: PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
214: if (pc) {
215: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
216: PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
217: }
219: for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
220: ia[0] = 0;
221: PetscCall(PetscLogStagePush(PCMPIStage));
222: PetscCall(MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A));
223: PetscCall(MatSetBlockSize(A, matproperties[2]));
224: PetscCall(MatSetOptionsPrefix(A, "mpi_"));
225: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
226: if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
227: if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
228: if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
230: PetscCall(PetscFree3(ia, ja, a));
231: PetscCall(KSPSetOperators(ksp, A, A));
232: if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
233: PetscCall(PetscLogStagePop());
234: if (pc) { /* needed for scatterv/gatherv of rhs and solution */
235: const PetscInt *range;
237: PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
238: for (i = 0; i < size; i++) {
239: km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
240: km->displ[i] = (PetscMPIInt)range[i];
241: }
242: }
243: PetscCall(MatDestroy(&A));
244: PetscCall(KSPSetFromOptions(ksp));
245: PetscFunctionReturn(PETSC_SUCCESS);
246: }
248: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
249: {
250: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
251: KSP ksp;
252: Mat sA, A;
253: MPI_Comm comm = PC_MPI_COMM_WORLD;
254: PetscScalar *a;
255: PetscCount nz;
256: const PetscScalar *sa = NULL;
257: PetscMPIInt size;
258: PetscInt matproperties[4] = {0, 0, 0, 0};
260: PetscFunctionBegin;
261: if (pc) {
262: PetscCall(PCGetOperators(pc, &sA, &sA));
263: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
264: }
265: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
266: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
267: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
268: PetscCallMPI(MPI_Comm_size(comm, &size));
269: PCMPIMatCounts[size - 1]++;
270: PetscCall(KSPGetOperators(ksp, NULL, &A));
271: PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
272: PetscCall(PetscMalloc1(nz, &a));
273: PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
274: if (pc) {
275: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
277: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
279: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
280: matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
281: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
282: matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
283: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
284: matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
285: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
286: matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
287: }
288: PetscCall(MatUpdateMPIAIJWithArray(A, a));
289: PetscCall(PetscFree(a));
290: PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
291: /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
292: if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
293: if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
294: if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
295: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
296: PetscFunctionReturn(PETSC_SUCCESS);
297: }
299: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
300: {
301: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
302: KSP ksp;
303: MPI_Comm comm = PC_MPI_COMM_WORLD;
304: const PetscScalar *sb = NULL, *x;
305: PetscScalar *b, *sx = NULL;
306: PetscInt its, n;
307: PetscMPIInt size;
309: PetscFunctionBegin;
310: PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
311: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
312: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
314: /* TODO: optimize code to not require building counts/displ every time */
316: /* scatterv rhs */
317: PetscCallMPI(MPI_Comm_size(comm, &size));
318: if (pc) {
319: PetscInt N;
321: PCMPISolveCounts[size - 1]++;
322: PetscCall(MatGetSize(pc->pmat, &N, NULL));
323: ;
324: PCMPISizes[size - 1] += N;
325: PetscCall(VecGetArrayRead(B, &sb));
326: }
327: PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
328: PetscCall(VecGetArray(ksp->vec_rhs, &b));
329: PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
330: PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
331: if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
333: PetscCall(PetscLogStagePush(PCMPIStage));
334: PetscCall(KSPSolve(ksp, NULL, NULL));
335: PetscCall(PetscLogStagePop());
336: PetscCall(KSPGetIterationNumber(ksp, &its));
337: PCMPIIterations[size - 1] += its;
339: /* gather solution */
340: PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
341: if (pc) PetscCall(VecGetArray(X, &sx));
342: PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
343: if (pc) PetscCall(VecRestoreArray(X, &sx));
344: PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
345: PetscFunctionReturn(PETSC_SUCCESS);
346: }
348: static PetscErrorCode PCMPIDestroy(PC pc)
349: {
350: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
351: KSP ksp;
352: MPI_Comm comm = PC_MPI_COMM_WORLD;
354: PetscFunctionBegin;
355: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
356: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
357: PetscCall(KSPDestroy(&ksp));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
361: /*@C
362: PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
363: parallel `KSP` solves and management of parallel `KSP` objects.
365: Logically Collective on all MPI ranks except 0
367: Options Database Keys:
368: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
369: - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
371: Level: developer
373: Note:
374: This is normally started automatically in `PetscInitialize()` when the option is provided
376: Developer Notes:
377: When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
378: written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
379: (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
381: Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
383: .seealso: `PCMPIServerEnd()`, `PCMPI`
384: @*/
385: PetscErrorCode PCMPIServerBegin(void)
386: {
387: PetscMPIInt rank;
389: PetscFunctionBegin;
390: PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server"));
391: if (PetscDefined(USE_SINGLE_LIBRARY)) {
392: PetscCall(VecInitializePackage());
393: PetscCall(MatInitializePackage());
394: PetscCall(DMInitializePackage());
395: PetscCall(PCInitializePackage());
396: PetscCall(KSPInitializePackage());
397: PetscCall(SNESInitializePackage());
398: PetscCall(TSInitializePackage());
399: PetscCall(TaoInitializePackage());
400: }
402: PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
403: if (rank == 0) {
404: PETSC_COMM_WORLD = PETSC_COMM_SELF;
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: while (PETSC_TRUE) {
409: PCMPICommand request = PCMPI_CREATE;
410: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
411: switch (request) {
412: case PCMPI_CREATE:
413: PetscCall(PCMPICreate(NULL));
414: break;
415: case PCMPI_SET_MAT:
416: PetscCall(PCMPISetMat(NULL));
417: break;
418: case PCMPI_UPDATE_MAT_VALUES:
419: PetscCall(PCMPIUpdateMatValues(NULL));
420: break;
421: case PCMPI_VIEW:
422: // PetscCall(PCMPIView(NULL));
423: break;
424: case PCMPI_SOLVE:
425: PetscCall(PCMPISolve(NULL, NULL, NULL));
426: break;
427: case PCMPI_DESTROY:
428: PetscCall(PCMPIDestroy(NULL));
429: break;
430: case PCMPI_EXIT:
431: PetscCall(PetscFinalize());
432: exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
433: break;
434: default:
435: break;
436: }
437: }
438: PetscFunctionReturn(PETSC_SUCCESS);
439: }
441: /*@C
442: PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
443: parallel KSP solves and management of parallel `KSP` objects.
445: Logically Collective on all MPI ranks except 0
447: Level: developer
449: Note:
450: This is normally ended automatically in `PetscFinalize()` when the option is provided
452: .seealso: `PCMPIServerBegin()`, `PCMPI`
453: @*/
454: PetscErrorCode PCMPIServerEnd(void)
455: {
456: PCMPICommand request = PCMPI_EXIT;
458: PetscFunctionBegin;
459: if (PetscGlobalRank == 0) {
460: PetscViewer viewer = NULL;
461: PetscViewerFormat format;
463: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
464: PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
465: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
466: PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
467: PetscOptionsEnd();
468: if (viewer) {
469: PetscBool isascii;
471: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
472: if (isascii) {
473: PetscMPIInt size;
474: PetscMPIInt i;
476: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
477: PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
478: PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n"));
479: if (PCMPIKSPCountsSeq) {
480: PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
481: }
482: for (i = 0; i < size; i++) {
483: if (PCMPIKSPCounts[i]) {
484: PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
485: }
486: }
487: }
488: PetscCall(PetscViewerDestroy(&viewer));
489: }
490: }
491: PetscCall(PCMPICommsDestroy());
492: PetscFunctionReturn(PETSC_SUCCESS);
493: }
495: /*
496: This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
497: because, for example, the problem is small. This version is more efficient because it does not require copying any data
498: */
499: static PetscErrorCode PCSetUp_Seq(PC pc)
500: {
501: PC_MPI *km = (PC_MPI *)pc->data;
502: Mat sA;
503: const char *prefix;
505: PetscFunctionBegin;
506: PetscCall(PCGetOperators(pc, NULL, &sA));
507: PetscCall(PCGetOptionsPrefix(pc, &prefix));
508: PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
509: PetscCall(KSPSetOptionsPrefix(km->ksps[0], prefix));
510: PetscCall(KSPAppendOptionsPrefix(km->ksps[0], "mpi_"));
511: PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
512: PetscCall(KSPSetFromOptions(km->ksps[0]));
513: PetscCall(KSPSetUp(km->ksps[0]));
514: PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
515: PCMPIKSPCountsSeq++;
516: PetscFunctionReturn(PETSC_SUCCESS);
517: }
519: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
520: {
521: PC_MPI *km = (PC_MPI *)pc->data;
522: PetscInt its, n;
523: Mat A;
525: PetscFunctionBegin;
526: PetscCall(KSPSolve(km->ksps[0], b, x));
527: PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
528: PCMPISolveCountsSeq++;
529: PCMPIIterationsSeq += its;
530: PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
531: PetscCall(MatGetSize(A, &n, NULL));
532: PCMPISizesSeq += n;
533: PetscFunctionReturn(PETSC_SUCCESS);
534: }
536: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
537: {
538: PC_MPI *km = (PC_MPI *)pc->data;
539: const char *prefix;
541: PetscFunctionBegin;
542: PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
543: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
544: PetscCall(PCGetOptionsPrefix(pc, &prefix));
545: if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
546: else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
547: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
548: PetscFunctionReturn(PETSC_SUCCESS);
549: }
551: static PetscErrorCode PCDestroy_Seq(PC pc)
552: {
553: PC_MPI *km = (PC_MPI *)pc->data;
555: PetscFunctionBegin;
556: PetscCall(KSPDestroy(&km->ksps[0]));
557: PetscCall(PetscFree(pc->data));
558: PetscFunctionReturn(PETSC_SUCCESS);
559: }
561: /*
562: PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
563: right hand side to the parallel PC
564: */
565: static PetscErrorCode PCSetUp_MPI(PC pc)
566: {
567: PC_MPI *km = (PC_MPI *)pc->data;
568: PetscMPIInt rank, size;
569: PCMPICommand request;
570: PetscBool newmatrix = PETSC_FALSE;
572: PetscFunctionBegin;
573: PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
574: PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
575: PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
577: if (!pc->setupcalled) {
578: if (!km->alwaysuseserver) {
579: PetscInt n;
580: Mat sA;
581: /* short circuit for small systems */
582: PetscCall(PCGetOperators(pc, &sA, &sA));
583: PetscCall(MatGetSize(sA, &n, NULL));
584: if (n < 2 * km->mincntperrank - 1 || size == 1) {
585: pc->ops->setup = NULL;
586: pc->ops->apply = PCApply_Seq;
587: pc->ops->destroy = PCDestroy_Seq;
588: pc->ops->view = PCView_Seq;
589: PetscCall(PCSetUp_Seq(pc));
590: PetscFunctionReturn(PETSC_SUCCESS);
591: }
592: }
594: request = PCMPI_CREATE;
595: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
596: PetscCall(PCMPICreate(pc));
597: newmatrix = PETSC_TRUE;
598: }
599: if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
601: if (newmatrix) {
602: PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
603: request = PCMPI_SET_MAT;
604: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
605: PetscCall(PCMPISetMat(pc));
606: } else {
607: PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n"));
608: request = PCMPI_UPDATE_MAT_VALUES;
609: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
610: PetscCall(PCMPIUpdateMatValues(pc));
611: }
612: PetscFunctionReturn(PETSC_SUCCESS);
613: }
615: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
616: {
617: PCMPICommand request = PCMPI_SOLVE;
619: PetscFunctionBegin;
620: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
621: PetscCall(PCMPISolve(pc, b, x));
622: PetscFunctionReturn(PETSC_SUCCESS);
623: }
625: PetscErrorCode PCDestroy_MPI(PC pc)
626: {
627: PCMPICommand request = PCMPI_DESTROY;
629: PetscFunctionBegin;
630: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
631: PetscCall(PCMPIDestroy(pc));
632: PetscCall(PetscFree(pc->data));
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
636: /*
637: PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
638: */
639: PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
640: {
641: PC_MPI *km = (PC_MPI *)pc->data;
642: MPI_Comm comm;
643: PetscMPIInt size;
644: const char *prefix;
646: PetscFunctionBegin;
647: PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
648: PetscCallMPI(MPI_Comm_size(comm, &size));
649: PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
650: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
651: PetscCall(PCGetOptionsPrefix(pc, &prefix));
652: if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix));
653: else PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n"));
654: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
655: PetscFunctionReturn(PETSC_SUCCESS);
656: }
658: PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
659: {
660: PC_MPI *km = (PC_MPI *)pc->data;
662: PetscFunctionBegin;
663: PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
664: PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
665: PetscCall(PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
666: PetscOptionsHeadEnd();
667: PetscFunctionReturn(PETSC_SUCCESS);
668: }
670: /*MC
671: PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
673: Options Database Keys:
674: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
675: . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
676: . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
677: - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
679: Level: beginner
681: Notes:
682: The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
684: The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
685: potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
687: It can be particularly useful for user OpenMP code or potentially user GPU code.
689: When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
690: and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
692: The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
693: because they are not the actual solver objects.
695: When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
696: stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
698: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
699: M*/
700: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
701: {
702: PC_MPI *km;
704: PetscFunctionBegin;
705: PetscCall(PetscNew(&km));
706: pc->data = (void *)km;
708: km->mincntperrank = 10000;
710: pc->ops->setup = PCSetUp_MPI;
711: pc->ops->apply = PCApply_MPI;
712: pc->ops->destroy = PCDestroy_MPI;
713: pc->ops->view = PCView_MPI;
714: pc->ops->setfromoptions = PCSetFromOptions_MPI;
715: PetscFunctionReturn(PETSC_SUCCESS);
716: }