Actual source code: redistribute.c
2: /*
3: This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4: */
5: #include <petsc/private/pcimpl.h>
6: #include <petscksp.h>
8: typedef struct {
9: KSP ksp;
10: Vec x, b;
11: VecScatter scatter;
12: IS is;
13: PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */
14: PetscScalar *diag;
15: Vec work;
16: PetscBool zerodiag;
17: } PC_Redistribute;
19: static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
20: {
21: PC_Redistribute *red = (PC_Redistribute *)pc->data;
22: PetscBool iascii, isstring;
23: PetscInt ncnt, N;
25: PetscFunctionBegin;
26: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
27: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
28: if (iascii) {
29: PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
30: PetscCall(MatGetSize(pc->pmat, &N, NULL));
31: PetscCall(PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N))));
32: PetscCall(PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n"));
33: PetscCall(KSPView(red->ksp, viewer));
34: } else if (isstring) {
35: PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner"));
36: PetscCall(KSPView(red->ksp, viewer));
37: }
38: PetscFunctionReturn(PETSC_SUCCESS);
39: }
41: static PetscErrorCode PCSetUp_Redistribute(PC pc)
42: {
43: PC_Redistribute *red = (PC_Redistribute *)pc->data;
44: MPI_Comm comm;
45: PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
46: PetscLayout map, nmap;
47: PetscMPIInt size, tag, n;
48: PETSC_UNUSED PetscMPIInt imdex;
49: PetscInt *source = NULL;
50: PetscMPIInt *sizes = NULL, nrecvs;
51: PetscInt j, nsends;
52: PetscInt *owner = NULL, *starts = NULL, count, slen;
53: PetscInt *rvalues, *svalues, recvtotal;
54: PetscMPIInt *onodes1, *olengths1;
55: MPI_Request *send_waits = NULL, *recv_waits = NULL;
56: MPI_Status recv_status, *send_status;
57: Vec tvec, diag;
58: Mat tmat;
59: const PetscScalar *d, *values;
60: const PetscInt *cols;
62: PetscFunctionBegin;
63: if (pc->setupcalled) {
64: PetscCall(KSPGetOperators(red->ksp, NULL, &tmat));
65: PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat));
66: PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
67: } else {
68: PetscInt NN;
70: PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
71: PetscCallMPI(MPI_Comm_size(comm, &size));
72: PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
74: /* count non-diagonal rows on process */
75: PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend));
76: cnt = 0;
77: for (i = rstart; i < rend; i++) {
78: PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
79: for (PetscInt j = 0; j < nz; j++) {
80: if (values[j] != 0 && cols[j] != i) {
81: cnt++;
82: break;
83: }
84: }
85: PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
86: }
87: PetscCall(PetscMalloc1(cnt, &rows));
88: PetscCall(PetscMalloc1(rend - rstart - cnt, &drows));
90: /* list non-diagonal rows on process */
91: cnt = 0;
92: dcnt = 0;
93: for (i = rstart; i < rend; i++) {
94: PetscBool diagonly = PETSC_TRUE;
95: PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
96: for (PetscInt j = 0; j < nz; j++) {
97: if (values[j] != 0 && cols[j] != i) {
98: diagonly = PETSC_FALSE;
99: break;
100: }
101: }
102: if (!diagonly) rows[cnt++] = i;
103: else drows[dcnt++] = i - rstart;
104: PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
105: }
107: /* create PetscLayout for non-diagonal rows on each process */
108: PetscCall(PetscLayoutCreate(comm, &map));
109: PetscCall(PetscLayoutSetLocalSize(map, cnt));
110: PetscCall(PetscLayoutSetBlockSize(map, 1));
111: PetscCall(PetscLayoutSetUp(map));
112: rstart = map->rstart;
113: rend = map->rend;
115: /* create PetscLayout for load-balanced non-diagonal rows on each process */
116: PetscCall(PetscLayoutCreate(comm, &nmap));
117: PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm));
118: PetscCall(PetscLayoutSetSize(nmap, ncnt));
119: PetscCall(PetscLayoutSetBlockSize(nmap, 1));
120: PetscCall(PetscLayoutSetUp(nmap));
122: PetscCall(MatGetSize(pc->pmat, &NN, NULL));
123: PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN)))));
125: if (size > 1) {
126: /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
127: /*
128: this code is taken from VecScatterCreate_PtoS()
129: Determines what rows need to be moved where to
130: load balance the non-diagonal rows
131: */
132: /* count number of contributors to each processor */
133: PetscCall(PetscMalloc2(size, &sizes, cnt, &owner));
134: PetscCall(PetscArrayzero(sizes, size));
135: j = 0;
136: nsends = 0;
137: for (i = rstart; i < rend; i++) {
138: if (i < nmap->range[j]) j = 0;
139: for (; j < size; j++) {
140: if (i < nmap->range[j + 1]) {
141: if (!sizes[j]++) nsends++;
142: owner[i - rstart] = j;
143: break;
144: }
145: }
146: }
147: /* inform other processors of number of messages and max length*/
148: PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs));
149: PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1));
150: PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1));
151: recvtotal = 0;
152: for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];
154: /* post receives: rvalues - rows I will own; count - nu */
155: PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits));
156: count = 0;
157: for (i = 0; i < nrecvs; i++) {
158: PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i));
159: count += olengths1[i];
160: }
162: /* do sends:
163: 1) starts[i] gives the starting index in svalues for stuff going to
164: the ith processor
165: */
166: PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts));
167: starts[0] = 0;
168: for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
169: for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
170: for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
171: red->drows = drows;
172: red->dcnt = dcnt;
173: PetscCall(PetscFree(rows));
175: starts[0] = 0;
176: for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
177: count = 0;
178: for (i = 0; i < size; i++) {
179: if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++));
180: }
182: /* wait on receives */
183: count = nrecvs;
184: slen = 0;
185: while (count) {
186: PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status));
187: /* unpack receives into our local space */
188: PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n));
189: slen += n;
190: count--;
191: }
192: PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal);
193: PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is));
195: /* free all work space */
196: PetscCall(PetscFree(olengths1));
197: PetscCall(PetscFree(onodes1));
198: PetscCall(PetscFree3(rvalues, source, recv_waits));
199: PetscCall(PetscFree2(sizes, owner));
200: if (nsends) { /* wait on sends */
201: PetscCall(PetscMalloc1(nsends, &send_status));
202: PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
203: PetscCall(PetscFree(send_status));
204: }
205: PetscCall(PetscFree3(svalues, send_waits, starts));
206: } else {
207: PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is));
208: red->drows = drows;
209: red->dcnt = dcnt;
210: slen = cnt;
211: }
212: PetscCall(PetscLayoutDestroy(&map));
213: PetscCall(PetscLayoutDestroy(&nmap));
215: PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b));
216: PetscCall(VecDuplicate(red->b, &red->x));
217: PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
218: PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter));
219: PetscCall(VecDestroy(&tvec));
220: PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat));
221: PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
222: PetscCall(MatDestroy(&tmat));
223: }
225: /* get diagonal portion of matrix */
226: PetscCall(PetscFree(red->diag));
227: PetscCall(PetscMalloc1(red->dcnt, &red->diag));
228: PetscCall(MatCreateVecs(pc->pmat, &diag, NULL));
229: PetscCall(MatGetDiagonal(pc->pmat, diag));
230: PetscCall(VecGetArrayRead(diag, &d));
231: for (i = 0; i < red->dcnt; i++) {
232: if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]];
233: else {
234: red->zerodiag = PETSC_TRUE;
235: red->diag[i] = 0.0;
236: }
237: }
238: PetscCall(VecRestoreArrayRead(diag, &d));
239: PetscCall(VecDestroy(&diag));
240: PetscCall(KSPSetUp(red->ksp));
241: PetscFunctionReturn(PETSC_SUCCESS);
242: }
244: static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
245: {
246: PC_Redistribute *red = (PC_Redistribute *)pc->data;
247: PetscInt dcnt = red->dcnt, i;
248: const PetscInt *drows = red->drows;
249: PetscScalar *xwork;
250: const PetscScalar *bwork, *diag = red->diag;
252: PetscFunctionBegin;
253: if (!red->work) PetscCall(VecDuplicate(b, &red->work));
254: /* compute the rows of solution that have diagonal entries only */
255: PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */
256: PetscCall(VecGetArray(x, &xwork));
257: PetscCall(VecGetArrayRead(b, &bwork));
258: if (red->zerodiag) {
259: for (i = 0; i < dcnt; i++) {
260: if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) {
261: PetscCheck(!pc->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Linear system is inconsistent, zero matrix row but nonzero right hand side");
262: PetscCall(PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side"));
263: PetscCall(VecSetInf(x));
264: pc->failedreasonrank = PC_INCONSISTENT_RHS;
265: }
266: }
267: }
268: for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
269: PetscCall(PetscLogFlops(dcnt));
270: PetscCall(VecRestoreArray(red->work, &xwork));
271: PetscCall(VecRestoreArrayRead(b, &bwork));
272: /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
273: PetscCall(MatMult(pc->pmat, x, red->work));
274: PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */
276: PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
277: PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
278: PetscCall(KSPSolve(red->ksp, red->b, red->x));
279: PetscCall(KSPCheckSolve(red->ksp, pc, red->x));
280: PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
281: PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: static PetscErrorCode PCDestroy_Redistribute(PC pc)
286: {
287: PC_Redistribute *red = (PC_Redistribute *)pc->data;
289: PetscFunctionBegin;
290: PetscCall(VecScatterDestroy(&red->scatter));
291: PetscCall(ISDestroy(&red->is));
292: PetscCall(VecDestroy(&red->b));
293: PetscCall(VecDestroy(&red->x));
294: PetscCall(KSPDestroy(&red->ksp));
295: PetscCall(VecDestroy(&red->work));
296: PetscCall(PetscFree(red->drows));
297: PetscCall(PetscFree(red->diag));
298: PetscCall(PetscFree(pc->data));
299: PetscFunctionReturn(PETSC_SUCCESS);
300: }
302: static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
303: {
304: PC_Redistribute *red = (PC_Redistribute *)pc->data;
306: PetscFunctionBegin;
307: PetscCall(KSPSetFromOptions(red->ksp));
308: PetscFunctionReturn(PETSC_SUCCESS);
309: }
311: /*@
312: PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`
314: Not Collective
316: Input Parameter:
317: . pc - the preconditioner context
319: Output Parameter:
320: . innerksp - the inner `KSP`
322: Level: advanced
324: .seealso: `KSP`, `PCREDISTRIBUTE`
325: @*/
326: PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
327: {
328: PC_Redistribute *red = (PC_Redistribute *)pc->data;
330: PetscFunctionBegin;
333: *innerksp = red->ksp;
334: PetscFunctionReturn(PETSC_SUCCESS);
335: }
337: /*MC
338: PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
339: applies a `KSP` to that new smaller matrix
341: Level: intermediate
343: Notes:
344: Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_
346: Usually run this with `-ksp_type preonly`
348: If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, `-ksp_type preonly
349: -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc` to take advantage of the symmetry.
351: This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. Rows are moved
352: between MPI processes inside the preconditioner to balance the number of rows on each process.
354: Developer Note:
355: Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
357: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
358: M*/
360: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
361: {
362: PC_Redistribute *red;
363: const char *prefix;
365: PetscFunctionBegin;
366: PetscCall(PetscNew(&red));
367: pc->data = (void *)red;
369: pc->ops->apply = PCApply_Redistribute;
370: pc->ops->applytranspose = NULL;
371: pc->ops->setup = PCSetUp_Redistribute;
372: pc->ops->destroy = PCDestroy_Redistribute;
373: pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
374: pc->ops->view = PCView_Redistribute;
376: PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp));
377: PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure));
378: PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1));
379: PetscCall(PCGetOptionsPrefix(pc, &prefix));
380: PetscCall(KSPSetOptionsPrefix(red->ksp, prefix));
381: PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_"));
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }