Actual source code: pipegcr.c
2: #include "petscsys.h"
3: #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>
5: static PetscBool cited = PETSC_FALSE;
6: static const char citation[] = "@article{SSM2016,\n"
7: " author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
8: " title = {Pipelined, Flexible Krylov Subspace Methods},\n"
9: " journal = {SIAM Journal on Scientific Computing},\n"
10: " volume = {38},\n"
11: " number = {5},\n"
12: " pages = {C441-C470},\n"
13: " year = {2016},\n"
14: " doi = {10.1137/15M1049130},\n"
15: " URL = {http://dx.doi.org/10.1137/15M1049130},\n"
16: " eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
17: "}\n";
19: #define KSPPIPEGCR_DEFAULT_MMAX 15
20: #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
21: #define KSPPIPEGCR_DEFAULT_VECB 5
22: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
23: #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE
25: #include <petscksp.h>
27: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
28: {
29: PetscInt i;
30: KSP_PIPEGCR *pipegcr;
31: PetscInt nnewvecs, nvecsprev;
33: PetscFunctionBegin;
34: pipegcr = (KSP_PIPEGCR *)ksp->data;
36: /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
37: if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
38: nvecsprev = pipegcr->nvecs;
39: nnewvecs = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
40: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL));
41: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL));
42: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL));
43: if (pipegcr->unroll_w) { PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL)); }
44: pipegcr->nvecs += nnewvecs;
45: for (i = 0; i < nnewvecs; i++) {
46: pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
47: pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
48: pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
49: if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
50: }
51: pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
52: pipegcr->nchunks++;
53: }
54: PetscFunctionReturn(PETSC_SUCCESS);
55: }
57: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
58: {
59: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
60: Mat A, B;
61: Vec x, r, b, z, w, m, n, p, s, q, t, *redux;
62: PetscInt i, j, k, idx, kdx, mi;
63: PetscScalar alpha = 0.0, gamma, *betas, *dots;
64: PetscReal rnorm = 0.0, delta, *eta, *etas;
66: PetscFunctionBegin;
67: /* !!PS We have not checked these routines for use with complex numbers. The inner products
68: are likely not defined correctly for that case */
69: PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
71: PetscCall(KSPGetOperators(ksp, &A, &B));
72: x = ksp->vec_sol;
73: b = ksp->vec_rhs;
74: r = ksp->work[0];
75: z = ksp->work[1];
76: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
77: m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r)) (pipelining intermediate) */
78: n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
79: p = pipegcr->pvecs[0];
80: s = pipegcr->svecs[0];
81: q = pipegcr->qvecs[0];
82: t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
84: redux = pipegcr->redux;
85: dots = pipegcr->dots;
86: etas = pipegcr->etas;
87: betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
89: /* cycle initial residual */
90: PetscCall(KSP_MatMult(ksp, A, x, r));
91: PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
92: PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
93: PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
95: /* initialization of other variables and pipelining intermediates */
96: PetscCall(VecCopy(z, p));
97: PetscCall(KSP_MatMult(ksp, A, p, s));
99: /* overlap initial computation of delta, gamma */
100: redux[0] = w;
101: redux[1] = r;
102: PetscCall(VecMDotBegin(w, 2, redux, dots)); /* Start split reductions for gamma = (w,r), delta = (w,w) */
103: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s))); /* perform asynchronous reduction */
104: PetscCall(KSP_PCApply(ksp, s, q)); /* q = B(s) */
105: if (pipegcr->unroll_w) { PetscCall(KSP_MatMult(ksp, A, q, t)); /* t = Aq */ }
106: PetscCall(VecMDotEnd(w, 2, redux, dots)); /* Finish split reduction */
107: delta = PetscRealPart(dots[0]);
108: etas[0] = delta;
109: gamma = dots[1];
110: alpha = gamma / delta;
112: i = 0;
113: do {
114: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
115: ksp->its++;
116: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
118: /* update solution, residuals, .. */
119: PetscCall(VecAXPY(x, +alpha, p));
120: PetscCall(VecAXPY(r, -alpha, s));
121: PetscCall(VecAXPY(z, -alpha, q));
122: if (pipegcr->unroll_w) {
123: PetscCall(VecAXPY(w, -alpha, t));
124: } else {
125: PetscCall(KSP_MatMult(ksp, A, z, w));
126: }
128: /* Computations of current iteration done */
129: i++;
131: if (pipegcr->modifypc) PetscCall((*pipegcr->modifypc)(ksp, ksp->its, ksp->rnorm, pipegcr->modifypc_ctx));
133: /* If needbe, allocate a new chunk of vectors */
134: PetscCall(KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb));
136: /* Note that we wrap around and start clobbering old vectors */
137: idx = i % (pipegcr->mmax + 1);
138: p = pipegcr->pvecs[idx];
139: s = pipegcr->svecs[idx];
140: q = pipegcr->qvecs[idx];
141: if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
142: eta = pipegcr->etas + idx;
144: /* number of old directions to orthogonalize against */
145: switch (pipegcr->truncstrat) {
146: case KSP_FCD_TRUNC_TYPE_STANDARD:
147: mi = pipegcr->mmax;
148: break;
149: case KSP_FCD_TRUNC_TYPE_NOTAY:
150: mi = ((i - 1) % pipegcr->mmax) + 1;
151: break;
152: default:
153: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
154: }
156: /* Pick old p,s,q,zeta in a way suitable for VecMDot */
157: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
158: kdx = k % (pipegcr->mmax + 1);
159: pipegcr->pold[j] = pipegcr->pvecs[kdx];
160: pipegcr->sold[j] = pipegcr->svecs[kdx];
161: pipegcr->qold[j] = pipegcr->qvecs[kdx];
162: if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
163: redux[j] = pipegcr->svecs[kdx];
164: }
165: /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
166: redux[j] = r;
167: redux[j + 1] = w;
169: /* Dot products */
170: /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
171: PetscCall(VecMDotBegin(w, j + 2, redux, dots));
172: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w)));
174: /* B(w-r) + u stabilization */
175: PetscCall(VecWAXPY(n, -1.0, r, w)); /* m = u + B(w-r): (a) ntmp = w-r */
176: PetscCall(KSP_PCApply(ksp, n, m)); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
177: PetscCall(VecAXPY(m, 1.0, z)); /* m = u + B(w-r): (c) m = z + mtmp */
178: if (pipegcr->unroll_w) { PetscCall(KSP_MatMult(ksp, A, m, n)); /* n = Am */ }
180: /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
181: PetscCall(VecMDotEnd(w, j + 2, redux, dots));
182: gamma = dots[j];
183: delta = PetscRealPart(dots[j + 1]);
185: /* compute new residual norm.
186: this cannot be done before this point so that the natural norm
187: is available for free and the communication involved is overlapped */
188: switch (ksp->normtype) {
189: case KSP_NORM_PRECONDITIONED:
190: PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
191: break;
192: case KSP_NORM_UNPRECONDITIONED:
193: PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
194: break;
195: case KSP_NORM_NATURAL:
196: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
197: break;
198: case KSP_NORM_NONE:
199: rnorm = 0.0;
200: break;
201: default:
202: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
203: }
205: /* Check for convergence */
206: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
207: ksp->rnorm = rnorm;
208: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
209: PetscCall(KSPLogResidualHistory(ksp, rnorm));
210: PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
211: PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
212: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
214: /* compute new eta and scale beta */
215: *eta = 0.;
216: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
217: kdx = k % (pipegcr->mmax + 1);
218: betas[j] /= -etas[kdx]; /* betak /= etak */
219: *eta -= ((PetscReal)(PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]))) * etas[kdx];
220: /* etaitmp = -betaik^2 * etak */
221: }
222: *eta += delta; /* etai = delta -betaik^2 * etak */
224: /* check breakdown of eta = (s,s) */
225: if (*eta < 0.) {
226: pipegcr->norm_breakdown = PETSC_TRUE;
227: PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
228: break;
229: } else {
230: alpha = gamma / (*eta); /* alpha = gamma/etai */
231: }
233: /* project out stored search directions using classical G-S */
234: PetscCall(VecCopy(z, p));
235: PetscCall(VecCopy(w, s));
236: PetscCall(VecCopy(m, q));
237: if (pipegcr->unroll_w) {
238: PetscCall(VecCopy(n, t));
239: PetscCall(VecMAXPY(t, j, betas, pipegcr->told)); /* ti <- n - sum_k beta_k t_k */
240: }
241: PetscCall(VecMAXPY(p, j, betas, pipegcr->pold)); /* pi <- ui - sum_k beta_k p_k */
242: PetscCall(VecMAXPY(s, j, betas, pipegcr->sold)); /* si <- wi - sum_k beta_k s_k */
243: PetscCall(VecMAXPY(q, j, betas, pipegcr->qold)); /* qi <- m - sum_k beta_k q_k */
245: } while (ksp->its < ksp->max_it);
246: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
247: PetscFunctionReturn(PETSC_SUCCESS);
248: }
250: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
251: {
252: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
253: Mat A, B;
254: Vec x, b, r, z, w;
255: PetscScalar gamma;
256: PetscReal rnorm = 0.0;
257: PetscBool issym;
259: PetscFunctionBegin;
260: PetscCall(PetscCitationsRegister(citation, &cited));
262: PetscCall(KSPGetOperators(ksp, &A, &B));
263: x = ksp->vec_sol;
264: b = ksp->vec_rhs;
265: r = ksp->work[0];
266: z = ksp->work[1];
267: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
269: /* compute initial residual */
270: if (!ksp->guess_zero) {
271: PetscCall(KSP_MatMult(ksp, A, x, r));
272: PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
273: } else {
274: PetscCall(VecCopy(b, r)); /* r <- b */
275: }
277: /* initial residual norm */
278: PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
279: PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
280: PetscCall(VecDot(r, w, &gamma)); /* gamma = (r,w) */
282: switch (ksp->normtype) {
283: case KSP_NORM_PRECONDITIONED:
284: PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
285: break;
286: case KSP_NORM_UNPRECONDITIONED:
287: PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
288: break;
289: case KSP_NORM_NATURAL:
290: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
291: break;
292: case KSP_NORM_NONE:
293: rnorm = 0.0;
294: break;
295: default:
296: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
297: }
299: /* Is A symmetric? */
300: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, ""));
301: if (!issym) PetscCall(PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?"));
303: /* logging */
304: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
305: ksp->its = 0;
306: ksp->rnorm0 = rnorm;
307: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
308: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm0));
309: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm0));
310: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP));
311: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
313: do {
314: PetscCall(KSPSolve_PIPEGCR_cycle(ksp));
315: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
316: if (pipegcr->norm_breakdown) {
317: pipegcr->n_restarts++;
318: pipegcr->norm_breakdown = PETSC_FALSE;
319: }
320: } while (ksp->its < ksp->max_it);
322: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
326: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
327: {
328: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
329: PetscBool isascii, isstring;
330: const char *truncstr;
332: PetscFunctionBegin;
333: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
334: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
336: if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
337: truncstr = "Using standard truncation strategy";
338: } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
339: truncstr = "Using Notay's truncation strategy";
340: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
342: if (isascii) {
343: PetscCall(PetscViewerASCIIPrintf(viewer, " max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax));
344: PetscCall(PetscViewerASCIIPrintf(viewer, " preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1)));
345: PetscCall(PetscViewerASCIIPrintf(viewer, " %s\n", truncstr));
346: PetscCall(PetscViewerASCIIPrintf(viewer, " w unrolling = %s \n", PetscBools[pipegcr->unroll_w]));
347: PetscCall(PetscViewerASCIIPrintf(viewer, " restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts));
348: } else if (isstring) {
349: PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr));
350: }
351: PetscFunctionReturn(PETSC_SUCCESS);
352: }
354: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
355: {
356: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
357: Mat A;
358: PetscBool diagonalscale;
359: const PetscInt nworkstd = 5;
361: PetscFunctionBegin;
362: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
363: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
365: PetscCall(KSPGetOperators(ksp, &A, NULL));
367: /* Allocate "standard" work vectors */
368: PetscCall(KSPSetWorkVecs(ksp, nworkstd));
370: /* Allocated space for pointers to additional work vectors
371: note that mmax is the number of previous directions, so we add 1 for the current direction */
372: PetscCall(PetscMalloc6(pipegcr->mmax + 1, &(pipegcr->pvecs), pipegcr->mmax + 1, &(pipegcr->ppvecs), pipegcr->mmax + 1, &(pipegcr->svecs), pipegcr->mmax + 1, &(pipegcr->psvecs), pipegcr->mmax + 1, &(pipegcr->qvecs), pipegcr->mmax + 1, &(pipegcr->pqvecs)));
373: if (pipegcr->unroll_w) PetscCall(PetscMalloc3(pipegcr->mmax + 1, &(pipegcr->tvecs), pipegcr->mmax + 1, &(pipegcr->ptvecs), pipegcr->mmax + 2, &(pipegcr->told)));
374: PetscCall(PetscMalloc4(pipegcr->mmax + 2, &(pipegcr->pold), pipegcr->mmax + 2, &(pipegcr->sold), pipegcr->mmax + 2, &(pipegcr->qold), pipegcr->mmax + 2, &(pipegcr->chunksizes)));
375: PetscCall(PetscMalloc3(pipegcr->mmax + 2, &(pipegcr->dots), pipegcr->mmax + 1, &(pipegcr->etas), pipegcr->mmax + 2, &(pipegcr->redux)));
376: /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
377: if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1));
379: /* Preallocate additional work vectors */
380: PetscCall(KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc));
381: PetscFunctionReturn(PETSC_SUCCESS);
382: }
384: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
385: {
386: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
388: PetscFunctionBegin;
389: if (pipegcr->modifypc_destroy) PetscCall((*pipegcr->modifypc_destroy)(pipegcr->modifypc_ctx));
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
393: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
394: {
395: PetscInt i;
396: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
398: PetscFunctionBegin;
399: PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work)); /* Destroy "standard" work vecs */
401: /* Destroy vectors for old directions and the arrays that manage pointers to them */
402: if (pipegcr->nvecs) {
403: for (i = 0; i < pipegcr->nchunks; i++) {
404: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]));
405: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]));
406: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]));
407: if (pipegcr->unroll_w) PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]));
408: }
409: }
411: PetscCall(PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs));
412: PetscCall(PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes));
413: PetscCall(PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux));
414: if (pipegcr->unroll_w) PetscCall(PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told));
416: PetscCall(KSPReset_PIPEGCR(ksp));
417: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", NULL));
418: PetscCall(KSPDestroyDefault(ksp));
419: PetscFunctionReturn(PETSC_SUCCESS);
420: }
422: /*@
423: KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector
425: Logically Collective
427: Input Parameters:
428: + ksp - the Krylov space context
429: - unroll_w - use unrolling
431: Level: intermediate
433: Options Database Key:
434: . -ksp_pipegcr_unroll_w <bool> - use unrolling
436: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
437: @*/
438: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
439: {
440: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
442: PetscFunctionBegin;
445: pipegcr->unroll_w = unroll_w;
446: PetscFunctionReturn(PETSC_SUCCESS);
447: }
449: /*@
450: KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector
452: Logically Collective
454: Input Parameter:
455: . ksp - the Krylov space context
457: Output Parameter:
458: . unroll_w - `KSPPIPEGCR` uses unrolling (bool)
460: Level: intermediate
462: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
463: @*/
464: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
465: {
466: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
468: PetscFunctionBegin;
470: *unroll_w = pipegcr->unroll_w;
471: PetscFunctionReturn(PETSC_SUCCESS);
472: }
474: /*@
475: KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization
477: Logically Collective
479: Input Parameters:
480: + ksp - the Krylov space context
481: - mmax - the maximum number of previous directions to orthogonalize against
483: Options Database Key:
484: . -ksp_pipegcr_mmax <N> - maximum number of previous directions
486: Level: intermediate
488: Note:
489: mmax + 1 directions are stored (mmax previous ones along with a current one)
490: and whether all are used in each iteration also depends on the truncation strategy
491: (see `KSPPIPEGCRSetTruncationType`)
493: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
494: @*/
495: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
496: {
497: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
499: PetscFunctionBegin;
502: pipegcr->mmax = mmax;
503: PetscFunctionReturn(PETSC_SUCCESS);
504: }
506: /*@
507: KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store
509: Not Collective
511: Input Parameter:
512: . ksp - the Krylov space context
514: Output Parameter:
515: . mmax - the maximum number of previous directions allowed for orthogonalization
517: Level: intermediate
519: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
520: @*/
522: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
523: {
524: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
526: PetscFunctionBegin;
528: *mmax = pipegcr->mmax;
529: PetscFunctionReturn(PETSC_SUCCESS);
530: }
532: /*@
533: KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`
535: Logically Collective
537: Input Parameters:
538: + ksp - the Krylov space context
539: - nprealloc - the number of vectors to preallocate
541: Level: advanced
543: Options Database Key:
544: . -ksp_pipegcr_nprealloc <N> - number of vectors to preallocate
546: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
547: @*/
548: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
549: {
550: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
552: PetscFunctionBegin;
555: pipegcr->nprealloc = nprealloc;
556: PetscFunctionReturn(PETSC_SUCCESS);
557: }
559: /*@
560: KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`
562: Not Collective
564: Input Parameter:
565: . ksp - the Krylov space context
567: Output Parameter:
568: . nprealloc - the number of directions preallocated
570: Level: advanced
572: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
573: @*/
574: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
575: {
576: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
578: PetscFunctionBegin;
580: *nprealloc = pipegcr->nprealloc;
581: PetscFunctionReturn(PETSC_SUCCESS);
582: }
584: /*@
585: KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization
587: Logically Collective
589: Input Parameters:
590: + ksp - the Krylov space context
591: - truncstrat - the choice of strategy
592: .vb
593: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
594: KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
595: .ve
597: Options Database Key:
598: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
600: Level: intermediate
602: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`
603: @*/
604: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
605: {
606: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
608: PetscFunctionBegin;
611: pipegcr->truncstrat = truncstrat;
612: PetscFunctionReturn(PETSC_SUCCESS);
613: }
615: /*@
616: KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`
618: Not Collective
620: Input Parameter:
621: . ksp - the Krylov space context
623: Output Parameter:
624: . truncstrat - the strategy type
625: .vb
626: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
627: KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
628: .ve
630: Options Database Key:
631: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
633: Level: intermediate
635: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`
636: @*/
637: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
638: {
639: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
641: PetscFunctionBegin;
643: *truncstrat = pipegcr->truncstrat;
644: PetscFunctionReturn(PETSC_SUCCESS);
645: }
647: static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems *PetscOptionsObject)
648: {
649: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
650: PetscInt mmax, nprealloc;
651: PetscBool flg;
653: PetscFunctionBegin;
654: PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
655: PetscCall(PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg));
656: if (flg) PetscCall(KSPPIPEGCRSetMmax(ksp, mmax));
657: PetscCall(PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg));
658: if (flg) PetscCall(KSPPIPEGCRSetNprealloc(ksp, nprealloc));
659: PetscCall(PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL));
660: PetscCall(PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL));
661: PetscOptionsHeadEnd();
662: PetscFunctionReturn(PETSC_SUCCESS);
663: }
666: typedef PetscErrorCode (*KSPPIPEGCRModifyPCFunction)(KSP, PetscInt, PetscReal, void *);
667: typedef PetscErrorCode (*KSPPIPEGCRDestroyFunction)(void *);
669: static PetscErrorCode KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp, KSPPIPEGCRModifyPCFunction function, void *data, KSPPIPEGCRDestroyFunction destroy)
670: {
671: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
673: PetscFunctionBegin;
675: pipegcr->modifypc = function;
676: pipegcr->modifypc_destroy = destroy;
677: pipegcr->modifypc_ctx = data;
678: PetscFunctionReturn(PETSC_SUCCESS);
679: }
681: /*@C
682: KSPPIPEGCRSetModifyPC - Sets the routine used by `KSPPIPEGCR` to modify the preconditioner at each iteration
684: Logically Collective
686: Input Parameters:
687: + ksp - iterative context obtained from KSPCreate()
688: . function - user defined function to modify the preconditioner
689: . ctx - user provided context for the modify preconditioner function
690: - destroy - the function to use to destroy the user provided application context.
692: Calling Sequence of `function`:
693: $ PetscErrorCode function(KSP ksp, PetscInt n, PetscReal rnorm, void *ctx)
694: + ksp - iterative context
695: . n - the total number of PIPEGCR iterations that have occurred
696: . rnorm - 2-norm residual value
697: - ctx - the user provided application context
699: Calling Sequence of `destroy`:
700: $ PetscErrorCode destroy(void *ctx)
702: Level: intermediate
704: Notes:
705: The default modifypc routine is `KSPPIPEGCRModifyPCNoChange()`
707: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRModifyPCNoChange()`
708: @*/
709: PetscErrorCode KSPPIPEGCRSetModifyPC(KSP ksp, PetscErrorCode (*function)(KSP, PetscInt, PetscReal, void *), void *data, PetscErrorCode (*destroy)(void *))
710: {
711: PetscFunctionBegin;
712: PetscUseMethod(ksp, "KSPPIPEGCRSetModifyPC_C", (KSP, PetscErrorCode(*)(KSP, PetscInt, PetscReal, void *), void *data, PetscErrorCode (*)(void *)), (ksp, function, data, destroy));
713: PetscFunctionReturn(PETSC_SUCCESS);
714: }
716: /*MC
717: KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method. [](sec_flexibleksp). [](sec_pipelineksp)
719: Options Database Keys:
720: + -ksp_pipegcr_mmax <N> - the max number of Krylov directions to orthogonalize against
721: . -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
722: . -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
723: - -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against
725: Level: intermediate
727: Notes:
728: The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
729: which may vary from one iteration to the next. Users can can define a method to vary the
730: preconditioner between iterates via `KSPPIPEGCRSetModifyPC()`.
731: Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
732: solution is given by the current estimate for x which was obtained by the last restart
733: iterations of the PIPEGCR algorithm.
734: The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as GCR.
736: Only supports left preconditioning.
738: The natural "norm" for this method is (u,Au), where u is the preconditioned residual. This norm is available at no additional computational cost, as with standard CG.
739: Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
741: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
742: See [](doc_faq_pipelined)
744: Contributed by:
745: Sascha M. Schnepp and Patrick Sanan
747: Reference:
748: P. Sanan, S.M. Schnepp, and D.A. May,
749: "Pipelined, Flexible Krylov Subspace Methods,"
750: SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
751: DOI: 10.1137/15M1049130
753: .seealso: [](ch_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
754: `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`
755: M*/
756: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
757: {
758: KSP_PIPEGCR *pipegcr;
760: PetscFunctionBegin;
761: PetscCall(PetscNew(&pipegcr));
762: pipegcr->mmax = KSPPIPEGCR_DEFAULT_MMAX;
763: pipegcr->nprealloc = KSPPIPEGCR_DEFAULT_NPREALLOC;
764: pipegcr->nvecs = 0;
765: pipegcr->vecb = KSPPIPEGCR_DEFAULT_VECB;
766: pipegcr->nchunks = 0;
767: pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
768: pipegcr->n_restarts = 0;
769: pipegcr->unroll_w = KSPPIPEGCR_DEFAULT_UNROLL_W;
771: ksp->data = (void *)pipegcr;
773: /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
774: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
775: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1));
776: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
777: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
779: ksp->ops->setup = KSPSetUp_PIPEGCR;
780: ksp->ops->solve = KSPSolve_PIPEGCR;
781: ksp->ops->reset = KSPReset_PIPEGCR;
782: ksp->ops->destroy = KSPDestroy_PIPEGCR;
783: ksp->ops->view = KSPView_PIPEGCR;
784: ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
785: ksp->ops->buildsolution = KSPBuildSolutionDefault;
786: ksp->ops->buildresidual = KSPBuildResidualDefault;
788: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", KSPPIPEGCRSetModifyPC_PIPEGCR));
789: PetscFunctionReturn(PETSC_SUCCESS);
790: }