Actual source code: pch2opus.c

  1: #include <petsc/private/pcimpl.h>
  2: #include <petsc/private/matimpl.h>
  3: #include <h2opusconf.h>

  5: /* Use GPU only if H2OPUS is configured for GPU */
  6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
  7:   #define PETSC_H2OPUS_USE_GPU
  8: #endif

 10: typedef struct {
 11:   Mat         A;
 12:   Mat         M;
 13:   PetscScalar s0;

 15:   /* sampler for Newton-Schultz */
 16:   Mat      S;
 17:   PetscInt hyperorder;
 18:   Vec      wns[4];
 19:   Mat      wnsmat[4];

 21:   /* convergence testing */
 22:   Mat T;
 23:   Vec w;

 25:   /* Support for PCSetCoordinates */
 26:   PetscInt   sdim;
 27:   PetscInt   nlocc;
 28:   PetscReal *coords;

 30:   /* Newton-Schultz customization */
 31:   PetscInt  maxits;
 32:   PetscReal rtol, atol;
 33:   PetscBool monitor;
 34:   PetscBool useapproximatenorms;
 35:   NormType  normtype;

 37:   /* Used when pmat != MATH2OPUS */
 38:   PetscReal eta;
 39:   PetscInt  leafsize;
 40:   PetscInt  max_rank;
 41:   PetscInt  bs;
 42:   PetscReal mrtol;

 44:   /* CPU/GPU */
 45:   PetscBool forcecpu;
 46:   PetscBool boundtocpu;
 47: } PC_H2OPUS;

 49: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);

 51: static PetscErrorCode PCReset_H2OPUS(PC pc)
 52: {
 53:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

 55:   PetscFunctionBegin;
 56:   pch2opus->sdim  = 0;
 57:   pch2opus->nlocc = 0;
 58:   PetscCall(PetscFree(pch2opus->coords));
 59:   PetscCall(MatDestroy(&pch2opus->A));
 60:   PetscCall(MatDestroy(&pch2opus->M));
 61:   PetscCall(MatDestroy(&pch2opus->T));
 62:   PetscCall(VecDestroy(&pch2opus->w));
 63:   PetscCall(MatDestroy(&pch2opus->S));
 64:   PetscCall(VecDestroy(&pch2opus->wns[0]));
 65:   PetscCall(VecDestroy(&pch2opus->wns[1]));
 66:   PetscCall(VecDestroy(&pch2opus->wns[2]));
 67:   PetscCall(VecDestroy(&pch2opus->wns[3]));
 68:   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
 69:   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
 70:   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
 71:   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
 72:   PetscFunctionReturn(PETSC_SUCCESS);
 73: }

 75: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
 76: {
 77:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
 78:   PetscBool  reset    = PETSC_TRUE;

 80:   PetscFunctionBegin;
 81:   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
 82:     PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
 83:     reset = (PetscBool)!reset;
 84:   }
 85:   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
 86:   if (reset) {
 87:     PetscCall(PCReset_H2OPUS(pc));
 88:     PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
 89:     PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
 90:     pch2opus->sdim  = sdim;
 91:     pch2opus->nlocc = nlocc;
 92:   }
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
 97: {
 98:   PetscFunctionBegin;
 99:   PetscCall(PCReset_H2OPUS(pc));
100:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
101:   PetscCall(PetscFree(pc->data));
102:   PetscFunctionReturn(PETSC_SUCCESS);
103: }

105: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject)
106: {
107:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

109:   PetscFunctionBegin;
110:   PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
111:   PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
112:   PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
113:   PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
114:   PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
115:   PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
116:   PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
117:   PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
118:   PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
119:   PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
120:   PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
121:   PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
122:   PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
123:   PetscOptionsHeadEnd();
124:   PetscFunctionReturn(PETSC_SUCCESS);
125: }

127: typedef struct {
128:   Mat A;
129:   Mat M;
130:   Vec w;
131: } AAtCtx;

133: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
134: {
135:   AAtCtx *aat;

137:   PetscFunctionBegin;
138:   PetscCall(MatShellGetContext(A, (void *)&aat));
139:   /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
140:   PetscCall(MatMultTranspose(aat->A, x, aat->w));
141:   PetscCall(MatMult(aat->A, aat->w, y));
142:   PetscFunctionReturn(PETSC_SUCCESS);
143: }

145: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
146: {
147:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
148:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
149:   PetscInt   M, m;
150:   VecType    vtype;
151:   PetscReal  n;
152:   AAtCtx     aat;

154:   PetscFunctionBegin;
155:   aat.A = A;
156:   aat.M = pch2opus->M; /* unused so far */
157:   PetscCall(MatCreateVecs(A, NULL, &aat.w));
158:   PetscCall(MatGetSize(A, &M, NULL));
159:   PetscCall(MatGetLocalSize(A, &m, NULL));
160:   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
161:   PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
162:   PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt));
163:   PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt));
164:   PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
165:   PetscCall(MatGetVecType(A, &vtype));
166:   PetscCall(MatShellSetVecType(AAt, vtype));
167:   PetscCall(MatNorm(AAt, NORM_1, &n));
168:   pch2opus->s0 = 1. / n;
169:   PetscCall(VecDestroy(&aat.w));
170:   PetscCall(MatDestroy(&AAt));
171:   PetscFunctionReturn(PETSC_SUCCESS);
172: }

174: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
175: {
176:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

178:   PetscFunctionBegin;
179:   if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
180:   else PetscCall(MatMult(pch2opus->M, x, y));
181:   PetscFunctionReturn(PETSC_SUCCESS);
182: }

184: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
185: {
186:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

188:   PetscFunctionBegin;
189:   if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
190:   else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
191:   PetscFunctionReturn(PETSC_SUCCESS);
192: }

194: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
195: {
196:   PetscFunctionBegin;
197:   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
198:   PetscFunctionReturn(PETSC_SUCCESS);
199: }

201: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
202: {
203:   PetscFunctionBegin;
204:   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
205:   PetscFunctionReturn(PETSC_SUCCESS);
206: }

208: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
209: {
210:   PetscFunctionBegin;
211:   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
212:   PetscFunctionReturn(PETSC_SUCCESS);
213: }

215: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
216: {
217:   PetscFunctionBegin;
218:   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
219:   PetscFunctionReturn(PETSC_SUCCESS);
220: }

222: /* used to test the norm of (M^-1 A - I) */
223: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
224: {
225:   PC         pc;
226:   Mat        A;
227:   PC_H2OPUS *pch2opus;
228:   PetscBool  sideleft = PETSC_TRUE;

230:   PetscFunctionBegin;
231:   PetscCall(MatShellGetContext(M, (void *)&pc));
232:   pch2opus = (PC_H2OPUS *)pc->data;
233:   if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
234:   A = pch2opus->A;
235:   PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
236:   if (t) {
237:     if (sideleft) {
238:       PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
239:       PetscCall(MatMultTranspose(A, pch2opus->w, y));
240:     } else {
241:       PetscCall(MatMultTranspose(A, x, pch2opus->w));
242:       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
243:     }
244:   } else {
245:     if (sideleft) {
246:       PetscCall(MatMult(A, x, pch2opus->w));
247:       PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
248:     } else {
249:       PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
250:       PetscCall(MatMult(A, pch2opus->w, y));
251:     }
252:   }
253:   PetscCall(VecAXPY(y, -1.0, x));
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }

257: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
258: {
259:   PetscFunctionBegin;
260:   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
261:   PetscFunctionReturn(PETSC_SUCCESS);
262: }

264: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
265: {
266:   PetscFunctionBegin;
267:   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
268:   PetscFunctionReturn(PETSC_SUCCESS);
269: }

271: /* HyperPower kernel:
272: Y = R = x
273: for i = 1 . . . l - 1 do
274:   R = (I - A * Xk) * R
275:   Y = Y + R
276: Y = Xk * Y
277: */
278: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
279: {
280:   PC         pc;
281:   Mat        A;
282:   PC_H2OPUS *pch2opus;

284:   PetscFunctionBegin;
285:   PetscCall(MatShellGetContext(M, (void *)&pc));
286:   pch2opus = (PC_H2OPUS *)pc->data;
287:   A        = pch2opus->A;
288:   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
289:   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
290:   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
291:   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
292:   PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
293:   PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
294:   PetscCall(VecCopy(x, pch2opus->wns[0]));
295:   PetscCall(VecCopy(x, pch2opus->wns[3]));
296:   if (t) {
297:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
298:       PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
299:       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
300:       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
301:       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
302:     }
303:     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
304:   } else {
305:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
306:       PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
307:       PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
308:       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
309:       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
310:     }
311:     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
312:   }
313:   PetscFunctionReturn(PETSC_SUCCESS);
314: }

316: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
317: {
318:   PetscFunctionBegin;
319:   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
320:   PetscFunctionReturn(PETSC_SUCCESS);
321: }

323: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
324: {
325:   PetscFunctionBegin;
326:   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
327:   PetscFunctionReturn(PETSC_SUCCESS);
328: }

330: /* Hyper power kernel, MatMat version */
331: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
332: {
333:   PC         pc;
334:   Mat        A;
335:   PC_H2OPUS *pch2opus;
336:   PetscInt   i;

338:   PetscFunctionBegin;
339:   PetscCall(MatShellGetContext(M, (void *)&pc));
340:   pch2opus = (PC_H2OPUS *)pc->data;
341:   A        = pch2opus->A;
342:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
343:     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
344:     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
345:   }
346:   if (!pch2opus->wnsmat[0]) {
347:     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
348:     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
349:   }
350:   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
351:     PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
352:     PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
353:   }
354:   if (!pch2opus->wnsmat[2]) {
355:     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
356:     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
357:   }
358:   PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
359:   PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
360:   if (t) {
361:     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
362:       PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]));
363:       PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
364:       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
365:       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
366:     }
367:     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
368:   } else {
369:     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
370:       PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
371:       PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2]));
372:       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
373:       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
374:     }
375:     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
376:   }
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx)
381: {
382:   PetscFunctionBegin;
383:   PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
384:   PetscFunctionReturn(PETSC_SUCCESS);
385: }

387: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
388: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
389: {
390:   PC         pc;
391:   Mat        A;
392:   PC_H2OPUS *pch2opus;

394:   PetscFunctionBegin;
395:   PetscCall(MatShellGetContext(M, (void *)&pc));
396:   pch2opus = (PC_H2OPUS *)pc->data;
397:   A        = pch2opus->A;
398:   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
399:   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
400:   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
401:   if (t) {
402:     PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
403:     PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
404:     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
405:     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
406:   } else {
407:     PetscCall(PCApply_H2OPUS(pc, x, y));
408:     PetscCall(MatMult(A, y, pch2opus->wns[0]));
409:     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
410:     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
411:   }
412:   PetscFunctionReturn(PETSC_SUCCESS);
413: }

415: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
416: {
417:   PetscFunctionBegin;
418:   PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
419:   PetscFunctionReturn(PETSC_SUCCESS);
420: }

422: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
423: {
424:   PetscFunctionBegin;
425:   PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }

429: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
430: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
431: {
432:   PC         pc;
433:   Mat        A;
434:   PC_H2OPUS *pch2opus;

436:   PetscFunctionBegin;
437:   PetscCall(MatShellGetContext(M, (void *)&pc));
438:   pch2opus = (PC_H2OPUS *)pc->data;
439:   A        = pch2opus->A;
440:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
441:     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
442:     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
443:   }
444:   if (!pch2opus->wnsmat[0]) {
445:     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
446:     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
447:   }
448:   if (t) {
449:     PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
450:     PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]));
451:     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
452:     PetscCall(MatScale(Y, 2.));
453:     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
454:   } else {
455:     PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
456:     PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0]));
457:     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
458:     PetscCall(MatScale(Y, 2.));
459:     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
460:   }
461:   PetscFunctionReturn(PETSC_SUCCESS);
462: }

464: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
465: {
466:   PetscFunctionBegin;
467:   PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
468:   PetscFunctionReturn(PETSC_SUCCESS);
469: }

471: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
472: {
473:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
474:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;

476:   PetscFunctionBegin;
477:   if (!pch2opus->S) {
478:     PetscInt M, N, m, n;

480:     PetscCall(MatGetSize(A, &M, &N));
481:     PetscCall(MatGetLocalSize(A, &m, &n));
482:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
483:     PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
484: #if defined(PETSC_H2OPUS_USE_GPU)
485:     PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
486: #endif
487:   }
488:   if (pch2opus->hyperorder >= 2) {
489:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper));
490:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper));
491:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
492:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
493:   } else {
494:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS));
495:     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS));
496:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
497:     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
498:   }
499:   PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
500:   PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
501:   /* XXX */
502:   PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
503:   PetscFunctionReturn(PETSC_SUCCESS);
504: }

506: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
507: {
508:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
509:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
510:   NormType   norm     = pch2opus->normtype;
511:   PetscReal  initerr  = 0.0, err;
512:   PetscBool  ish2opus;

514:   PetscFunctionBegin;
515:   if (!pch2opus->T) {
516:     PetscInt    M, N, m, n;
517:     const char *prefix;

519:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
520:     PetscCall(MatGetSize(A, &M, &N));
521:     PetscCall(MatGetLocalSize(A, &m, &n));
522:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
523:     PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
524:     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI));
525:     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI));
526:     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
527: #if defined(PETSC_H2OPUS_USE_GPU)
528:     PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
529: #endif
530:     PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
531:     PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
532:   }
533:   PetscCall(MatDestroy(&pch2opus->A));
534:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
535:   if (ish2opus) {
536:     PetscCall(PetscObjectReference((PetscObject)A));
537:     pch2opus->A = A;
538:   } else {
539:     const char *prefix;
540:     PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
541:     /* XXX */
542:     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
543:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
544:     PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
545:     PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
546:     PetscCall(MatSetFromOptions(pch2opus->A));
547:     PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
548:     PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
549:     /* XXX */
550:     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));

552:     /* always perform construction on the GPU unless forcecpu is true */
553:     PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
554:   }
555: #if defined(PETSC_H2OPUS_USE_GPU)
556:   pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
557: #endif
558:   PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
559:   if (pch2opus->M) { /* see if we can reuse M as initial guess */
560:     PetscReal reuse;

562:     PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
563:     PetscCall(MatNorm(pch2opus->T, norm, &reuse));
564:     if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
565:   }
566:   if (!pch2opus->M) {
567:     const char *prefix;
568:     PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
569:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
570:     PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
571:     PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
572:     PetscCall(MatSetFromOptions(pch2opus->M));
573:     PetscCall(PCH2OpusSetUpInit(pc));
574:     PetscCall(MatScale(pch2opus->M, pch2opus->s0));
575:   }
576:   /* A and M have the same h2 matrix structure, save on reordering routines */
577:   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
578:   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
579:   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
580:   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
581:   err = initerr;
582:   if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
583:   if (initerr > pch2opus->atol && !pc->failedreason) {
584:     PetscInt i;

586:     PetscCall(PCH2OpusSetUpSampler_Private(pc));
587:     for (i = 0; i < pch2opus->maxits; i++) {
588:       Mat         M;
589:       const char *prefix;

591:       PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
592:       PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
593:       PetscCall(MatSetOptionsPrefix(M, prefix));
594:       PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
595:       PetscCall(MatSetFromOptions(M));
596:       PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
597:       PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
598:       PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
599:       PetscCall(MatDestroy(&pch2opus->M));
600:       pch2opus->M = M;
601:       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
602:       if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
603:       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
604:       if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
605:     }
606:   }
607:   /* cleanup setup workspace */
608:   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
609:   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
610:   PetscCall(VecDestroy(&pch2opus->wns[0]));
611:   PetscCall(VecDestroy(&pch2opus->wns[1]));
612:   PetscCall(VecDestroy(&pch2opus->wns[2]));
613:   PetscCall(VecDestroy(&pch2opus->wns[3]));
614:   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
615:   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
616:   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
617:   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
618:   PetscFunctionReturn(PETSC_SUCCESS);
619: }

621: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
622: {
623:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
624:   PetscBool  isascii;

626:   PetscFunctionBegin;
627:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
628:   if (isascii) {
629:     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
630:       PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
631:       PetscCall(PetscViewerASCIIPushTab(viewer));
632:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
633:       PetscCall(MatView(pch2opus->A, viewer));
634:       PetscCall(PetscViewerPopFormat(viewer));
635:       PetscCall(PetscViewerASCIIPopTab(viewer));
636:     }
637:     if (pch2opus->M) {
638:       PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
639:       PetscCall(PetscViewerASCIIPushTab(viewer));
640:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
641:       PetscCall(MatView(pch2opus->M, viewer));
642:       PetscCall(PetscViewerPopFormat(viewer));
643:       PetscCall(PetscViewerASCIIPopTab(viewer));
644:     }
645:     PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
646:   }
647:   PetscFunctionReturn(PETSC_SUCCESS);
648: }

650: /*MC
651:      PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.

653:    Options Database Keys:
654: +   -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
655: .   -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
656: .   -pc_h2opus_monitor - monitor Newton-Schultz convergence
657: .   -pc_h2opus_atol - absolute tolerance
658: .   -pc_h2opus_rtol - relative tolerance
659: .   -pc_h2opus_norm_type - normtype
660: .   -pc_h2opus_hyperorder - Hyper power order of sampling
661: .   -pc_h2opus_leafsize - leaf size when constructed from kernel
662: .   -pc_h2opus_eta - admissibility condition tolerance
663: .   -pc_h2opus_maxrank - maximum rank when constructed from matvecs
664: .   -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
665: .   -pc_h2opus_mrtol - relative tolerance for construction from sampling
666: -   -pc_h2opus_forcecpu - force construction of preconditioner on CPU

668:    Level: intermediate

670: .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
671: M*/

673: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
674: {
675:   PC_H2OPUS *pch2opus;

677:   PetscFunctionBegin;
678:   PetscCall(PetscNew(&pch2opus));
679:   pc->data = (void *)pch2opus;

681:   pch2opus->atol       = 1.e-2;
682:   pch2opus->rtol       = 1.e-6;
683:   pch2opus->maxits     = 50;
684:   pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
685:   pch2opus->normtype   = NORM_2;

687:   /* these are needed when we are sampling the pmat */
688:   pch2opus->eta      = PETSC_DECIDE;
689:   pch2opus->leafsize = PETSC_DECIDE;
690:   pch2opus->max_rank = PETSC_DECIDE;
691:   pch2opus->bs       = PETSC_DECIDE;
692:   pch2opus->mrtol    = PETSC_DECIDE;
693: #if defined(PETSC_H2OPUS_USE_GPU)
694:   pch2opus->boundtocpu = PETSC_FALSE;
695: #else
696:   pch2opus->boundtocpu = PETSC_TRUE;
697: #endif
698:   pc->ops->destroy        = PCDestroy_H2OPUS;
699:   pc->ops->setup          = PCSetUp_H2OPUS;
700:   pc->ops->apply          = PCApply_H2OPUS;
701:   pc->ops->matapply       = PCApplyMat_H2OPUS;
702:   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
703:   pc->ops->reset          = PCReset_H2OPUS;
704:   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
705:   pc->ops->view           = PCView_H2OPUS;

707:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
708:   PetscFunctionReturn(PETSC_SUCCESS);
709: }