Actual source code: pch2opus.c
1: #include <petsc/private/pcimpl.h>
2: #include <petsc/private/matimpl.h>
3: #include <h2opusconf.h>
5: /* Use GPU only if H2OPUS is configured for GPU */
6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7: #define PETSC_H2OPUS_USE_GPU
8: #endif
10: typedef struct {
11: Mat A;
12: Mat M;
13: PetscScalar s0;
15: /* sampler for Newton-Schultz */
16: Mat S;
17: PetscInt hyperorder;
18: Vec wns[4];
19: Mat wnsmat[4];
21: /* convergence testing */
22: Mat T;
23: Vec w;
25: /* Support for PCSetCoordinates */
26: PetscInt sdim;
27: PetscInt nlocc;
28: PetscReal *coords;
30: /* Newton-Schultz customization */
31: PetscInt maxits;
32: PetscReal rtol, atol;
33: PetscBool monitor;
34: PetscBool useapproximatenorms;
35: NormType normtype;
37: /* Used when pmat != MATH2OPUS */
38: PetscReal eta;
39: PetscInt leafsize;
40: PetscInt max_rank;
41: PetscInt bs;
42: PetscReal mrtol;
44: /* CPU/GPU */
45: PetscBool forcecpu;
46: PetscBool boundtocpu;
47: } PC_H2OPUS;
49: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
51: static PetscErrorCode PCReset_H2OPUS(PC pc)
52: {
53: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
55: PetscFunctionBegin;
56: pch2opus->sdim = 0;
57: pch2opus->nlocc = 0;
58: PetscCall(PetscFree(pch2opus->coords));
59: PetscCall(MatDestroy(&pch2opus->A));
60: PetscCall(MatDestroy(&pch2opus->M));
61: PetscCall(MatDestroy(&pch2opus->T));
62: PetscCall(VecDestroy(&pch2opus->w));
63: PetscCall(MatDestroy(&pch2opus->S));
64: PetscCall(VecDestroy(&pch2opus->wns[0]));
65: PetscCall(VecDestroy(&pch2opus->wns[1]));
66: PetscCall(VecDestroy(&pch2opus->wns[2]));
67: PetscCall(VecDestroy(&pch2opus->wns[3]));
68: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
69: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
70: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
71: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
72: PetscFunctionReturn(PETSC_SUCCESS);
73: }
75: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
76: {
77: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
78: PetscBool reset = PETSC_TRUE;
80: PetscFunctionBegin;
81: if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
82: PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
83: reset = (PetscBool)!reset;
84: }
85: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
86: if (reset) {
87: PetscCall(PCReset_H2OPUS(pc));
88: PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
89: PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
90: pch2opus->sdim = sdim;
91: pch2opus->nlocc = nlocc;
92: }
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
97: {
98: PetscFunctionBegin;
99: PetscCall(PCReset_H2OPUS(pc));
100: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
101: PetscCall(PetscFree(pc->data));
102: PetscFunctionReturn(PETSC_SUCCESS);
103: }
105: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject)
106: {
107: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
109: PetscFunctionBegin;
110: PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
111: PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
112: PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
113: PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
114: PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
115: PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
116: PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
117: PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
118: PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
119: PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
120: PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
121: PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
122: PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
123: PetscOptionsHeadEnd();
124: PetscFunctionReturn(PETSC_SUCCESS);
125: }
127: typedef struct {
128: Mat A;
129: Mat M;
130: Vec w;
131: } AAtCtx;
133: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
134: {
135: AAtCtx *aat;
137: PetscFunctionBegin;
138: PetscCall(MatShellGetContext(A, (void *)&aat));
139: /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
140: PetscCall(MatMultTranspose(aat->A, x, aat->w));
141: PetscCall(MatMult(aat->A, aat->w, y));
142: PetscFunctionReturn(PETSC_SUCCESS);
143: }
145: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
146: {
147: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
148: Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
149: PetscInt M, m;
150: VecType vtype;
151: PetscReal n;
152: AAtCtx aat;
154: PetscFunctionBegin;
155: aat.A = A;
156: aat.M = pch2opus->M; /* unused so far */
157: PetscCall(MatCreateVecs(A, NULL, &aat.w));
158: PetscCall(MatGetSize(A, &M, NULL));
159: PetscCall(MatGetLocalSize(A, &m, NULL));
160: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
161: PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
162: PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt));
163: PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt));
164: PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
165: PetscCall(MatGetVecType(A, &vtype));
166: PetscCall(MatShellSetVecType(AAt, vtype));
167: PetscCall(MatNorm(AAt, NORM_1, &n));
168: pch2opus->s0 = 1. / n;
169: PetscCall(VecDestroy(&aat.w));
170: PetscCall(MatDestroy(&AAt));
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
175: {
176: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
178: PetscFunctionBegin;
179: if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
180: else PetscCall(MatMult(pch2opus->M, x, y));
181: PetscFunctionReturn(PETSC_SUCCESS);
182: }
184: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
185: {
186: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
188: PetscFunctionBegin;
189: if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
190: else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
191: PetscFunctionReturn(PETSC_SUCCESS);
192: }
194: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
195: {
196: PetscFunctionBegin;
197: PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
198: PetscFunctionReturn(PETSC_SUCCESS);
199: }
201: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
202: {
203: PetscFunctionBegin;
204: PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
209: {
210: PetscFunctionBegin;
211: PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
216: {
217: PetscFunctionBegin;
218: PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
222: /* used to test the norm of (M^-1 A - I) */
223: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
224: {
225: PC pc;
226: Mat A;
227: PC_H2OPUS *pch2opus;
228: PetscBool sideleft = PETSC_TRUE;
230: PetscFunctionBegin;
231: PetscCall(MatShellGetContext(M, (void *)&pc));
232: pch2opus = (PC_H2OPUS *)pc->data;
233: if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
234: A = pch2opus->A;
235: PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
236: if (t) {
237: if (sideleft) {
238: PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
239: PetscCall(MatMultTranspose(A, pch2opus->w, y));
240: } else {
241: PetscCall(MatMultTranspose(A, x, pch2opus->w));
242: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
243: }
244: } else {
245: if (sideleft) {
246: PetscCall(MatMult(A, x, pch2opus->w));
247: PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
248: } else {
249: PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
250: PetscCall(MatMult(A, pch2opus->w, y));
251: }
252: }
253: PetscCall(VecAXPY(y, -1.0, x));
254: PetscFunctionReturn(PETSC_SUCCESS);
255: }
257: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
258: {
259: PetscFunctionBegin;
260: PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
261: PetscFunctionReturn(PETSC_SUCCESS);
262: }
264: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
265: {
266: PetscFunctionBegin;
267: PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
268: PetscFunctionReturn(PETSC_SUCCESS);
269: }
271: /* HyperPower kernel:
272: Y = R = x
273: for i = 1 . . . l - 1 do
274: R = (I - A * Xk) * R
275: Y = Y + R
276: Y = Xk * Y
277: */
278: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
279: {
280: PC pc;
281: Mat A;
282: PC_H2OPUS *pch2opus;
284: PetscFunctionBegin;
285: PetscCall(MatShellGetContext(M, (void *)&pc));
286: pch2opus = (PC_H2OPUS *)pc->data;
287: A = pch2opus->A;
288: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
289: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
290: PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
291: PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
292: PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
293: PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
294: PetscCall(VecCopy(x, pch2opus->wns[0]));
295: PetscCall(VecCopy(x, pch2opus->wns[3]));
296: if (t) {
297: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
298: PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
299: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
300: PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
301: PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
302: }
303: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
304: } else {
305: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
306: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
307: PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
308: PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
309: PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
310: }
311: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
312: }
313: PetscFunctionReturn(PETSC_SUCCESS);
314: }
316: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
317: {
318: PetscFunctionBegin;
319: PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
320: PetscFunctionReturn(PETSC_SUCCESS);
321: }
323: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
324: {
325: PetscFunctionBegin;
326: PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
327: PetscFunctionReturn(PETSC_SUCCESS);
328: }
330: /* Hyper power kernel, MatMat version */
331: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
332: {
333: PC pc;
334: Mat A;
335: PC_H2OPUS *pch2opus;
336: PetscInt i;
338: PetscFunctionBegin;
339: PetscCall(MatShellGetContext(M, (void *)&pc));
340: pch2opus = (PC_H2OPUS *)pc->data;
341: A = pch2opus->A;
342: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
343: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
344: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
345: }
346: if (!pch2opus->wnsmat[0]) {
347: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
348: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
349: }
350: if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
351: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
352: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
353: }
354: if (!pch2opus->wnsmat[2]) {
355: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
356: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
357: }
358: PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
359: PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
360: if (t) {
361: for (i = 0; i < pch2opus->hyperorder - 1; i++) {
362: PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]));
363: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
364: PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
365: PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
366: }
367: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
368: } else {
369: for (i = 0; i < pch2opus->hyperorder - 1; i++) {
370: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
371: PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2]));
372: PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
373: PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
374: }
375: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
376: }
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx)
381: {
382: PetscFunctionBegin;
383: PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
384: PetscFunctionReturn(PETSC_SUCCESS);
385: }
387: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
388: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
389: {
390: PC pc;
391: Mat A;
392: PC_H2OPUS *pch2opus;
394: PetscFunctionBegin;
395: PetscCall(MatShellGetContext(M, (void *)&pc));
396: pch2opus = (PC_H2OPUS *)pc->data;
397: A = pch2opus->A;
398: PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
399: PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
400: PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
401: if (t) {
402: PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
403: PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
404: PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
405: PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
406: } else {
407: PetscCall(PCApply_H2OPUS(pc, x, y));
408: PetscCall(MatMult(A, y, pch2opus->wns[0]));
409: PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
410: PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
411: }
412: PetscFunctionReturn(PETSC_SUCCESS);
413: }
415: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
416: {
417: PetscFunctionBegin;
418: PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
419: PetscFunctionReturn(PETSC_SUCCESS);
420: }
422: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
423: {
424: PetscFunctionBegin;
425: PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
426: PetscFunctionReturn(PETSC_SUCCESS);
427: }
429: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
430: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
431: {
432: PC pc;
433: Mat A;
434: PC_H2OPUS *pch2opus;
436: PetscFunctionBegin;
437: PetscCall(MatShellGetContext(M, (void *)&pc));
438: pch2opus = (PC_H2OPUS *)pc->data;
439: A = pch2opus->A;
440: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
441: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
442: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
443: }
444: if (!pch2opus->wnsmat[0]) {
445: PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
446: PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
447: }
448: if (t) {
449: PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
450: PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]));
451: PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
452: PetscCall(MatScale(Y, 2.));
453: PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
454: } else {
455: PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
456: PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0]));
457: PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
458: PetscCall(MatScale(Y, 2.));
459: PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
460: }
461: PetscFunctionReturn(PETSC_SUCCESS);
462: }
464: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
465: {
466: PetscFunctionBegin;
467: PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
468: PetscFunctionReturn(PETSC_SUCCESS);
469: }
471: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
472: {
473: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
474: Mat A = pc->useAmat ? pc->mat : pc->pmat;
476: PetscFunctionBegin;
477: if (!pch2opus->S) {
478: PetscInt M, N, m, n;
480: PetscCall(MatGetSize(A, &M, &N));
481: PetscCall(MatGetLocalSize(A, &m, &n));
482: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
483: PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
484: #if defined(PETSC_H2OPUS_USE_GPU)
485: PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
486: #endif
487: }
488: if (pch2opus->hyperorder >= 2) {
489: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper));
490: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper));
491: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
492: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
493: } else {
494: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS));
495: PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS));
496: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
497: PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
498: }
499: PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
500: PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
501: /* XXX */
502: PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
503: PetscFunctionReturn(PETSC_SUCCESS);
504: }
506: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
507: {
508: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
509: Mat A = pc->useAmat ? pc->mat : pc->pmat;
510: NormType norm = pch2opus->normtype;
511: PetscReal initerr = 0.0, err;
512: PetscBool ish2opus;
514: PetscFunctionBegin;
515: if (!pch2opus->T) {
516: PetscInt M, N, m, n;
517: const char *prefix;
519: PetscCall(PCGetOptionsPrefix(pc, &prefix));
520: PetscCall(MatGetSize(A, &M, &N));
521: PetscCall(MatGetLocalSize(A, &m, &n));
522: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
523: PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
524: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI));
525: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI));
526: PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS));
527: #if defined(PETSC_H2OPUS_USE_GPU)
528: PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
529: #endif
530: PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
531: PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
532: }
533: PetscCall(MatDestroy(&pch2opus->A));
534: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
535: if (ish2opus) {
536: PetscCall(PetscObjectReference((PetscObject)A));
537: pch2opus->A = A;
538: } else {
539: const char *prefix;
540: PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
541: /* XXX */
542: PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
543: PetscCall(PCGetOptionsPrefix(pc, &prefix));
544: PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
545: PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
546: PetscCall(MatSetFromOptions(pch2opus->A));
547: PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
548: PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
549: /* XXX */
550: PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
552: /* always perform construction on the GPU unless forcecpu is true */
553: PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
554: }
555: #if defined(PETSC_H2OPUS_USE_GPU)
556: pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
557: #endif
558: PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
559: if (pch2opus->M) { /* see if we can reuse M as initial guess */
560: PetscReal reuse;
562: PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
563: PetscCall(MatNorm(pch2opus->T, norm, &reuse));
564: if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
565: }
566: if (!pch2opus->M) {
567: const char *prefix;
568: PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
569: PetscCall(PCGetOptionsPrefix(pc, &prefix));
570: PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
571: PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
572: PetscCall(MatSetFromOptions(pch2opus->M));
573: PetscCall(PCH2OpusSetUpInit(pc));
574: PetscCall(MatScale(pch2opus->M, pch2opus->s0));
575: }
576: /* A and M have the same h2 matrix structure, save on reordering routines */
577: PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
578: PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
579: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
580: if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
581: err = initerr;
582: if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
583: if (initerr > pch2opus->atol && !pc->failedreason) {
584: PetscInt i;
586: PetscCall(PCH2OpusSetUpSampler_Private(pc));
587: for (i = 0; i < pch2opus->maxits; i++) {
588: Mat M;
589: const char *prefix;
591: PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
592: PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
593: PetscCall(MatSetOptionsPrefix(M, prefix));
594: PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
595: PetscCall(MatSetFromOptions(M));
596: PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
597: PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
598: PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
599: PetscCall(MatDestroy(&pch2opus->M));
600: pch2opus->M = M;
601: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
602: if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
603: if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
604: if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
605: }
606: }
607: /* cleanup setup workspace */
608: PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
609: PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
610: PetscCall(VecDestroy(&pch2opus->wns[0]));
611: PetscCall(VecDestroy(&pch2opus->wns[1]));
612: PetscCall(VecDestroy(&pch2opus->wns[2]));
613: PetscCall(VecDestroy(&pch2opus->wns[3]));
614: PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
615: PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
616: PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
617: PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
618: PetscFunctionReturn(PETSC_SUCCESS);
619: }
621: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
622: {
623: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
624: PetscBool isascii;
626: PetscFunctionBegin;
627: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
628: if (isascii) {
629: if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
630: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
631: PetscCall(PetscViewerASCIIPushTab(viewer));
632: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
633: PetscCall(MatView(pch2opus->A, viewer));
634: PetscCall(PetscViewerPopFormat(viewer));
635: PetscCall(PetscViewerASCIIPopTab(viewer));
636: }
637: if (pch2opus->M) {
638: PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
639: PetscCall(PetscViewerASCIIPushTab(viewer));
640: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
641: PetscCall(MatView(pch2opus->M, viewer));
642: PetscCall(PetscViewerPopFormat(viewer));
643: PetscCall(PetscViewerASCIIPopTab(viewer));
644: }
645: PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
646: }
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
650: /*MC
651: PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
653: Options Database Keys:
654: + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
655: . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
656: . -pc_h2opus_monitor - monitor Newton-Schultz convergence
657: . -pc_h2opus_atol - absolute tolerance
658: . -pc_h2opus_rtol - relative tolerance
659: . -pc_h2opus_norm_type - normtype
660: . -pc_h2opus_hyperorder - Hyper power order of sampling
661: . -pc_h2opus_leafsize - leaf size when constructed from kernel
662: . -pc_h2opus_eta - admissibility condition tolerance
663: . -pc_h2opus_maxrank - maximum rank when constructed from matvecs
664: . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
665: . -pc_h2opus_mrtol - relative tolerance for construction from sampling
666: - -pc_h2opus_forcecpu - force construction of preconditioner on CPU
668: Level: intermediate
670: .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
671: M*/
673: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
674: {
675: PC_H2OPUS *pch2opus;
677: PetscFunctionBegin;
678: PetscCall(PetscNew(&pch2opus));
679: pc->data = (void *)pch2opus;
681: pch2opus->atol = 1.e-2;
682: pch2opus->rtol = 1.e-6;
683: pch2opus->maxits = 50;
684: pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
685: pch2opus->normtype = NORM_2;
687: /* these are needed when we are sampling the pmat */
688: pch2opus->eta = PETSC_DECIDE;
689: pch2opus->leafsize = PETSC_DECIDE;
690: pch2opus->max_rank = PETSC_DECIDE;
691: pch2opus->bs = PETSC_DECIDE;
692: pch2opus->mrtol = PETSC_DECIDE;
693: #if defined(PETSC_H2OPUS_USE_GPU)
694: pch2opus->boundtocpu = PETSC_FALSE;
695: #else
696: pch2opus->boundtocpu = PETSC_TRUE;
697: #endif
698: pc->ops->destroy = PCDestroy_H2OPUS;
699: pc->ops->setup = PCSetUp_H2OPUS;
700: pc->ops->apply = PCApply_H2OPUS;
701: pc->ops->matapply = PCApplyMat_H2OPUS;
702: pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
703: pc->ops->reset = PCReset_H2OPUS;
704: pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
705: pc->ops->view = PCView_H2OPUS;
707: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
708: PetscFunctionReturn(PETSC_SUCCESS);
709: }