Actual source code: redistribute.c


  2: /*
  3:   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
  4: */
  5: #include <petsc/private/pcimpl.h>
  6: #include <petscksp.h>

  8: typedef struct {
  9:   KSP          ksp;
 10:   Vec          x, b;
 11:   VecScatter   scatter;
 12:   IS           is;
 13:   PetscInt     dcnt, *drows; /* these are the local rows that have only diagonal entry */
 14:   PetscScalar *diag;
 15:   Vec          work;
 16:   PetscBool    zerodiag;
 17: } PC_Redistribute;

 19: static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
 20: {
 21:   PC_Redistribute *red = (PC_Redistribute *)pc->data;
 22:   PetscBool        iascii, isstring;
 23:   PetscInt         ncnt, N;

 25:   PetscFunctionBegin;
 26:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
 27:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
 28:   if (iascii) {
 29:     PetscCall(MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
 30:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
 31:     PetscCall(PetscViewerASCIIPrintf(viewer, "    Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N))));
 32:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Redistribute preconditioner: \n"));
 33:     PetscCall(KSPView(red->ksp, viewer));
 34:   } else if (isstring) {
 35:     PetscCall(PetscViewerStringSPrintf(viewer, " Redistribute preconditioner"));
 36:     PetscCall(KSPView(red->ksp, viewer));
 37:   }
 38:   PetscFunctionReturn(PETSC_SUCCESS);
 39: }

 41: static PetscErrorCode PCSetUp_Redistribute(PC pc)
 42: {
 43:   PC_Redistribute         *red = (PC_Redistribute *)pc->data;
 44:   MPI_Comm                 comm;
 45:   PetscInt                 rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
 46:   PetscLayout              map, nmap;
 47:   PetscMPIInt              size, tag, n;
 48:   PETSC_UNUSED PetscMPIInt imdex;
 49:   PetscInt                *source = NULL;
 50:   PetscMPIInt             *sizes  = NULL, nrecvs;
 51:   PetscInt                 j, nsends;
 52:   PetscInt                *owner = NULL, *starts = NULL, count, slen;
 53:   PetscInt                *rvalues, *svalues, recvtotal;
 54:   PetscMPIInt             *onodes1, *olengths1;
 55:   MPI_Request             *send_waits = NULL, *recv_waits = NULL;
 56:   MPI_Status               recv_status, *send_status;
 57:   Vec                      tvec, diag;
 58:   Mat                      tmat;
 59:   const PetscScalar       *d, *values;
 60:   const PetscInt          *cols;

 62:   PetscFunctionBegin;
 63:   if (pc->setupcalled) {
 64:     PetscCall(KSPGetOperators(red->ksp, NULL, &tmat));
 65:     PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat));
 66:     PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
 67:   } else {
 68:     PetscInt NN;

 70:     PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
 71:     PetscCallMPI(MPI_Comm_size(comm, &size));
 72:     PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));

 74:     /* count non-diagonal rows on process */
 75:     PetscCall(MatGetOwnershipRange(pc->mat, &rstart, &rend));
 76:     cnt = 0;
 77:     for (i = rstart; i < rend; i++) {
 78:       PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
 79:       for (PetscInt j = 0; j < nz; j++) {
 80:         if (values[j] != 0 && cols[j] != i) {
 81:           cnt++;
 82:           break;
 83:         }
 84:       }
 85:       PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
 86:     }
 87:     PetscCall(PetscMalloc1(cnt, &rows));
 88:     PetscCall(PetscMalloc1(rend - rstart - cnt, &drows));

 90:     /* list non-diagonal rows on process */
 91:     cnt  = 0;
 92:     dcnt = 0;
 93:     for (i = rstart; i < rend; i++) {
 94:       PetscBool diagonly = PETSC_TRUE;
 95:       PetscCall(MatGetRow(pc->mat, i, &nz, &cols, &values));
 96:       for (PetscInt j = 0; j < nz; j++) {
 97:         if (values[j] != 0 && cols[j] != i) {
 98:           diagonly = PETSC_FALSE;
 99:           break;
100:         }
101:       }
102:       if (!diagonly) rows[cnt++] = i;
103:       else drows[dcnt++] = i - rstart;
104:       PetscCall(MatRestoreRow(pc->mat, i, &nz, &cols, &values));
105:     }

107:     /* create PetscLayout for non-diagonal rows on each process */
108:     PetscCall(PetscLayoutCreate(comm, &map));
109:     PetscCall(PetscLayoutSetLocalSize(map, cnt));
110:     PetscCall(PetscLayoutSetBlockSize(map, 1));
111:     PetscCall(PetscLayoutSetUp(map));
112:     rstart = map->rstart;
113:     rend   = map->rend;

115:     /* create PetscLayout for load-balanced non-diagonal rows on each process */
116:     PetscCall(PetscLayoutCreate(comm, &nmap));
117:     PetscCall(MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm));
118:     PetscCall(PetscLayoutSetSize(nmap, ncnt));
119:     PetscCall(PetscLayoutSetBlockSize(nmap, 1));
120:     PetscCall(PetscLayoutSetUp(nmap));

122:     PetscCall(MatGetSize(pc->pmat, &NN, NULL));
123:     PetscCall(PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN)))));

125:     if (size > 1) {
126:       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
127:       /*
128:        this code is taken from VecScatterCreate_PtoS()
129:        Determines what rows need to be moved where to
130:        load balance the non-diagonal rows
131:        */
132:       /*  count number of contributors to each processor */
133:       PetscCall(PetscMalloc2(size, &sizes, cnt, &owner));
134:       PetscCall(PetscArrayzero(sizes, size));
135:       j      = 0;
136:       nsends = 0;
137:       for (i = rstart; i < rend; i++) {
138:         if (i < nmap->range[j]) j = 0;
139:         for (; j < size; j++) {
140:           if (i < nmap->range[j + 1]) {
141:             if (!sizes[j]++) nsends++;
142:             owner[i - rstart] = j;
143:             break;
144:           }
145:         }
146:       }
147:       /* inform other processors of number of messages and max length*/
148:       PetscCall(PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs));
149:       PetscCall(PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1));
150:       PetscCall(PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1));
151:       recvtotal = 0;
152:       for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];

154:       /* post receives:  rvalues - rows I will own; count - nu */
155:       PetscCall(PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits));
156:       count = 0;
157:       for (i = 0; i < nrecvs; i++) {
158:         PetscCallMPI(MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i));
159:         count += olengths1[i];
160:       }

162:       /* do sends:
163:        1) starts[i] gives the starting index in svalues for stuff going to
164:        the ith processor
165:        */
166:       PetscCall(PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts));
167:       starts[0] = 0;
168:       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
169:       for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
170:       for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
171:       red->drows = drows;
172:       red->dcnt  = dcnt;
173:       PetscCall(PetscFree(rows));

175:       starts[0] = 0;
176:       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
177:       count = 0;
178:       for (i = 0; i < size; i++) {
179:         if (sizes[i]) PetscCallMPI(MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++));
180:       }

182:       /*  wait on receives */
183:       count = nrecvs;
184:       slen  = 0;
185:       while (count) {
186:         PetscCallMPI(MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status));
187:         /* unpack receives into our local space */
188:         PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, &n));
189:         slen += n;
190:         count--;
191:       }
192:       PetscCheck(slen == recvtotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Total message lengths %" PetscInt_FMT " not expected %" PetscInt_FMT, slen, recvtotal);
193:       PetscCall(ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is));

195:       /* free all work space */
196:       PetscCall(PetscFree(olengths1));
197:       PetscCall(PetscFree(onodes1));
198:       PetscCall(PetscFree3(rvalues, source, recv_waits));
199:       PetscCall(PetscFree2(sizes, owner));
200:       if (nsends) { /* wait on sends */
201:         PetscCall(PetscMalloc1(nsends, &send_status));
202:         PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
203:         PetscCall(PetscFree(send_status));
204:       }
205:       PetscCall(PetscFree3(svalues, send_waits, starts));
206:     } else {
207:       PetscCall(ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is));
208:       red->drows = drows;
209:       red->dcnt  = dcnt;
210:       slen       = cnt;
211:     }
212:     PetscCall(PetscLayoutDestroy(&map));
213:     PetscCall(PetscLayoutDestroy(&nmap));

215:     PetscCall(VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b));
216:     PetscCall(VecDuplicate(red->b, &red->x));
217:     PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
218:     PetscCall(VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter));
219:     PetscCall(VecDestroy(&tvec));
220:     PetscCall(MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat));
221:     PetscCall(KSPSetOperators(red->ksp, tmat, tmat));
222:     PetscCall(MatDestroy(&tmat));
223:   }

225:   /* get diagonal portion of matrix */
226:   PetscCall(PetscFree(red->diag));
227:   PetscCall(PetscMalloc1(red->dcnt, &red->diag));
228:   PetscCall(MatCreateVecs(pc->pmat, &diag, NULL));
229:   PetscCall(MatGetDiagonal(pc->pmat, diag));
230:   PetscCall(VecGetArrayRead(diag, &d));
231:   for (i = 0; i < red->dcnt; i++) {
232:     if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]];
233:     else {
234:       red->zerodiag = PETSC_TRUE;
235:       red->diag[i]  = 0.0;
236:     }
237:   }
238:   PetscCall(VecRestoreArrayRead(diag, &d));
239:   PetscCall(VecDestroy(&diag));
240:   PetscCall(KSPSetUp(red->ksp));
241:   PetscFunctionReturn(PETSC_SUCCESS);
242: }

244: static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
245: {
246:   PC_Redistribute   *red   = (PC_Redistribute *)pc->data;
247:   PetscInt           dcnt  = red->dcnt, i;
248:   const PetscInt    *drows = red->drows;
249:   PetscScalar       *xwork;
250:   const PetscScalar *bwork, *diag = red->diag;

252:   PetscFunctionBegin;
253:   if (!red->work) PetscCall(VecDuplicate(b, &red->work));
254:   /* compute the rows of solution that have diagonal entries only */
255:   PetscCall(VecSet(x, 0.0)); /* x = diag(A)^{-1} b */
256:   PetscCall(VecGetArray(x, &xwork));
257:   PetscCall(VecGetArrayRead(b, &bwork));
258:   if (red->zerodiag) {
259:     for (i = 0; i < dcnt; i++) {
260:       if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) {
261:         PetscCheck(!pc->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Linear system is inconsistent, zero matrix row but nonzero right hand side");
262:         PetscCall(PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side"));
263:         PetscCall(VecSetInf(x));
264:         pc->failedreasonrank = PC_INCONSISTENT_RHS;
265:       }
266:     }
267:   }
268:   for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
269:   PetscCall(PetscLogFlops(dcnt));
270:   PetscCall(VecRestoreArray(red->work, &xwork));
271:   PetscCall(VecRestoreArrayRead(b, &bwork));
272:   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
273:   PetscCall(MatMult(pc->pmat, x, red->work));
274:   PetscCall(VecAYPX(red->work, -1.0, b)); /* red->work = b - A x */

276:   PetscCall(VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
277:   PetscCall(VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD));
278:   PetscCall(KSPSolve(red->ksp, red->b, red->x));
279:   PetscCall(KSPCheckSolve(red->ksp, pc, red->x));
280:   PetscCall(VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
281:   PetscCall(VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE));
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: static PetscErrorCode PCDestroy_Redistribute(PC pc)
286: {
287:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

289:   PetscFunctionBegin;
290:   PetscCall(VecScatterDestroy(&red->scatter));
291:   PetscCall(ISDestroy(&red->is));
292:   PetscCall(VecDestroy(&red->b));
293:   PetscCall(VecDestroy(&red->x));
294:   PetscCall(KSPDestroy(&red->ksp));
295:   PetscCall(VecDestroy(&red->work));
296:   PetscCall(PetscFree(red->drows));
297:   PetscCall(PetscFree(red->diag));
298:   PetscCall(PetscFree(pc->data));
299:   PetscFunctionReturn(PETSC_SUCCESS);
300: }

302: static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
303: {
304:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

306:   PetscFunctionBegin;
307:   PetscCall(KSPSetFromOptions(red->ksp));
308:   PetscFunctionReturn(PETSC_SUCCESS);
309: }

311: /*@
312:    PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`

314:    Not Collective

316:    Input Parameter:
317: .  pc - the preconditioner context

319:    Output Parameter:
320: .  innerksp - the inner `KSP`

322:    Level: advanced

324: .seealso: `KSP`, `PCREDISTRIBUTE`
325: @*/
326: PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
327: {
328:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

330:   PetscFunctionBegin;
333:   *innerksp = red->ksp;
334:   PetscFunctionReturn(PETSC_SUCCESS);
335: }

337: /*MC
338:      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
339:      applies a `KSP` to that new smaller matrix

341:      Level: intermediate

343:      Notes:
344:      Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_

346:      Usually run this with `-ksp_type preonly`

348:      If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, `-ksp_type preonly
349:      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc` to take advantage of the symmetry.

351:      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. Rows are moved
352:      between MPI processes inside the preconditioner to balance the number of rows on each process.

354:      Developer Note:
355:      Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.

357: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
358: M*/

360: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
361: {
362:   PC_Redistribute *red;
363:   const char      *prefix;

365:   PetscFunctionBegin;
366:   PetscCall(PetscNew(&red));
367:   pc->data = (void *)red;

369:   pc->ops->apply          = PCApply_Redistribute;
370:   pc->ops->applytranspose = NULL;
371:   pc->ops->setup          = PCSetUp_Redistribute;
372:   pc->ops->destroy        = PCDestroy_Redistribute;
373:   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
374:   pc->ops->view           = PCView_Redistribute;

376:   PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp));
377:   PetscCall(KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure));
378:   PetscCall(PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1));
379:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
380:   PetscCall(KSPSetOptionsPrefix(red->ksp, prefix));
381:   PetscCall(KSPAppendOptionsPrefix(red->ksp, "redistribute_"));
382:   PetscFunctionReturn(PETSC_SUCCESS);
383: }