Actual source code: pipelcg.c
1: #include <petsc/private/kspimpl.h>
2: #include <petsc/private/vecimpl.h>
4: #define offset(j) PetscMax(((j) - (2 * l)), 0)
5: #define shift(i, j) ((i)-offset((j)))
6: #define G(i, j) (plcg->G[((j) * (2 * l + 1)) + (shift((i), (j)))])
7: #define G_noshift(i, j) (plcg->G[((j) * (2 * l + 1)) + (i)])
8: #define alpha(i) (plcg->alpha[(i)])
9: #define gamma(i) (plcg->gamma[(i)])
10: #define delta(i) (plcg->delta[(i)])
11: #define sigma(i) (plcg->sigma[(i)])
12: #define req(i) (plcg->req[(i)])
14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
15: struct KSP_CG_PIPE_L_s {
16: PetscInt l; /* pipeline depth */
17: Vec *Z; /* Z vectors (shifted base) */
18: Vec *U; /* U vectors (unpreconditioned shifted base) */
19: Vec *V; /* V vectors (original base) */
20: Vec *Q; /* Q vectors (auxiliary bases) */
21: Vec p; /* work vector */
22: PetscScalar *G; /* such that Z = VG (band matrix)*/
23: PetscScalar *gamma, *delta, *alpha;
24: PetscReal lmin, lmax; /* min and max eigen values estimates to compute base shifts */
25: PetscReal *sigma; /* base shifts */
26: MPI_Request *req; /* request array for asynchronous global collective */
27: PetscBool show_rstrt; /* flag to show restart information in output (default: not shown) */
28: };
30: /*
31: KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.
33: This is called once, usually automatically by KSPSolve() or KSPSetUp()
34: but can be called directly by KSPSetUp()
35: */
36: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
37: {
38: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
39: PetscInt l = plcg->l, max_it = ksp->max_it;
40: MPI_Comm comm;
42: PetscFunctionBegin;
43: comm = PetscObjectComm((PetscObject)ksp);
44: PetscCheck(max_it >= 1, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: max_it argument must be positive.", ((PetscObject)ksp)->type_name);
45: PetscCheck(l >= 1, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: pipel argument must be positive.", ((PetscObject)ksp)->type_name);
46: PetscCheck(l <= max_it, comm, PETSC_ERR_ARG_OUTOFRANGE, "%s: pipel argument must be less than max_it.", ((PetscObject)ksp)->type_name);
48: PetscCall(KSPSetWorkVecs(ksp, 1)); /* get work vectors needed by PIPELCG */
49: plcg->p = ksp->work[0];
51: PetscCall(VecDuplicateVecs(plcg->p, PetscMax(3, l + 1), &plcg->Z));
52: PetscCall(VecDuplicateVecs(plcg->p, 3, &plcg->U));
53: PetscCall(VecDuplicateVecs(plcg->p, 3, &plcg->V));
54: PetscCall(VecDuplicateVecs(plcg->p, 3 * (l - 1) + 1, &plcg->Q));
55: PetscCall(PetscCalloc1(2, &plcg->alpha));
56: PetscCall(PetscCalloc1(l, &plcg->sigma));
58: PetscFunctionReturn(PETSC_SUCCESS);
59: }
61: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
62: {
63: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
64: PetscInt l = plcg->l;
66: PetscFunctionBegin;
67: PetscCall(PetscFree(plcg->sigma));
68: PetscCall(PetscFree(plcg->alpha));
69: PetscCall(VecDestroyVecs(PetscMax(3, l + 1), &plcg->Z));
70: PetscCall(VecDestroyVecs(3, &plcg->U));
71: PetscCall(VecDestroyVecs(3, &plcg->V));
72: PetscCall(VecDestroyVecs(3 * (l - 1) + 1, &plcg->Q));
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
77: {
78: PetscFunctionBegin;
79: PetscCall(KSPReset_PIPELCG(ksp));
80: PetscCall(KSPDestroyDefault(ksp));
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: static PetscErrorCode KSPSetFromOptions_PIPELCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
85: {
86: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
87: PetscBool flag = PETSC_FALSE;
89: PetscFunctionBegin;
90: PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPELCG options");
91: PetscCall(PetscOptionsInt("-ksp_pipelcg_pipel", "Pipeline length", "", plcg->l, &plcg->l, &flag));
92: if (!flag) plcg->l = 1;
93: PetscCall(PetscOptionsReal("-ksp_pipelcg_lmin", "Estimate for smallest eigenvalue", "", plcg->lmin, &plcg->lmin, &flag));
94: if (!flag) plcg->lmin = 0.0;
95: PetscCall(PetscOptionsReal("-ksp_pipelcg_lmax", "Estimate for largest eigenvalue", "", plcg->lmax, &plcg->lmax, &flag));
96: if (!flag) plcg->lmax = 0.0;
97: PetscCall(PetscOptionsBool("-ksp_pipelcg_monitor", "Output information on restarts when they occur? (default: 0)", "", plcg->show_rstrt, &plcg->show_rstrt, &flag));
98: if (!flag) plcg->show_rstrt = PETSC_FALSE;
99: PetscOptionsHeadEnd();
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
104: {
105: PetscFunctionBegin;
106: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
107: PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
108: #else
109: PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
110: *request = MPI_REQUEST_NULL;
111: #endif
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: static PetscErrorCode KSPView_PIPELCG(KSP ksp, PetscViewer viewer)
116: {
117: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
118: PetscBool iascii = PETSC_FALSE, isstring = PETSC_FALSE;
120: PetscFunctionBegin;
121: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
122: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
123: if (iascii) {
124: PetscCall(PetscViewerASCIIPrintf(viewer, " Pipeline depth: %" PetscInt_FMT "\n", plcg->l));
125: PetscCall(PetscViewerASCIIPrintf(viewer, " Minimal eigenvalue estimate %g\n", (double)plcg->lmin));
126: PetscCall(PetscViewerASCIIPrintf(viewer, " Maximal eigenvalue estimate %g\n", (double)plcg->lmax));
127: } else if (isstring) {
128: PetscCall(PetscViewerStringSPrintf(viewer, " Pipeline depth: %" PetscInt_FMT "\n", plcg->l));
129: PetscCall(PetscViewerStringSPrintf(viewer, " Minimal eigenvalue estimate %g\n", (double)plcg->lmin));
130: PetscCall(PetscViewerStringSPrintf(viewer, " Maximal eigenvalue estimate %g\n", (double)plcg->lmax));
131: }
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
136: {
137: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
138: Mat A = NULL, Pmat = NULL;
139: PetscInt it = 0, max_it = ksp->max_it, l = plcg->l, i = 0, j = 0, k = 0;
140: PetscInt start = 0, middle = 0, end = 0;
141: Vec *Z = plcg->Z, *U = plcg->U, *V = plcg->V, *Q = plcg->Q;
142: Vec x = NULL, p = NULL, temp = NULL;
143: PetscScalar sum_dummy = 0.0, eta = 0.0, zeta = 0.0, lambda = 0.0;
144: PetscReal dp = 0.0, tmp = 0.0, beta = 0.0, invbeta2 = 0.0;
145: MPI_Comm comm;
147: PetscFunctionBegin;
148: x = ksp->vec_sol;
149: p = plcg->p;
151: comm = PetscObjectComm((PetscObject)ksp);
152: PetscCall(PCGetOperators(ksp->pc, &A, &Pmat));
154: for (it = 0; it < max_it + l; ++it) {
155: /* ----------------------------------- */
156: /* Multiplication z_{it+1} = Az_{it} */
157: /* ----------------------------------- */
158: /* Shift the U vector pointers */
159: temp = U[2];
160: for (i = 2; i > 0; i--) U[i] = U[i - 1];
161: U[0] = temp;
162: if (it < l) {
163: /* SpMV and Sigma-shift and Prec */
164: PetscCall(MatMult(A, Z[l - it], U[0]));
165: PetscCall(VecAXPY(U[0], -sigma(it), U[1]));
166: PetscCall(KSP_PCApply(ksp, U[0], Z[l - it - 1]));
167: if (it < l - 1) PetscCall(VecCopy(Z[l - it - 1], Q[3 * it]));
168: } else {
169: /* Shift the Z vector pointers */
170: temp = Z[PetscMax(l, 2)];
171: for (i = PetscMax(l, 2); i > 0; --i) Z[i] = Z[i - 1];
172: Z[0] = temp;
173: /* SpMV and Prec */
174: PetscCall(MatMult(A, Z[1], U[0]));
175: PetscCall(KSP_PCApply(ksp, U[0], Z[0]));
176: }
178: /* ----------------------------------- */
179: /* Adjust the G matrix */
180: /* ----------------------------------- */
181: if (it >= l) {
182: if (it == l) {
183: /* MPI_Wait for G(0,0),scale V0 and Z and U and Q vectors with 1/beta */
184: PetscCallMPI(MPI_Wait(&req(0), MPI_STATUS_IGNORE));
185: beta = PetscSqrtReal(PetscRealPart(G(0, 0)));
186: G(0, 0) = 1.0;
187: PetscCall(VecAXPY(V[0], 1.0 / beta, p)); /* this assumes V[0] to be zero initially */
188: for (j = 0; j <= PetscMax(l, 2); ++j) PetscCall(VecScale(Z[j], 1.0 / beta));
189: for (j = 0; j <= 2; ++j) PetscCall(VecScale(U[j], 1.0 / beta));
190: for (j = 0; j < l - 1; ++j) PetscCall(VecScale(Q[3 * j], 1.0 / beta));
191: }
193: /* MPI_Wait until the dot products,started l iterations ago,are completed */
194: PetscCallMPI(MPI_Wait(&req(it - l + 1), MPI_STATUS_IGNORE));
195: if (it >= 2 * l) {
196: for (j = PetscMax(0, it - 3 * l + 1); j <= it - 2 * l; j++) { G(j, it - l + 1) = G(it - 2 * l + 1, j + l); /* exploit symmetry in G matrix */ }
197: }
199: if (it <= 2 * l - 1) {
200: invbeta2 = 1.0 / (beta * beta);
201: /* Scale columns 1 up to l of G with 1/beta^2 */
202: for (j = PetscMax(it - 3 * l + 1, 0); j <= it - l + 1; ++j) G(j, it - l + 1) *= invbeta2;
203: }
205: for (j = PetscMax(it - 2 * l + 2, 0); j <= it - l; ++j) {
206: sum_dummy = 0.0;
207: for (k = PetscMax(it - 3 * l + 1, 0); k <= j - 1; ++k) sum_dummy = sum_dummy + G(k, j) * G(k, it - l + 1);
208: G(j, it - l + 1) = (G(j, it - l + 1) - sum_dummy) / G(j, j);
209: }
211: sum_dummy = 0.0;
212: for (k = PetscMax(it - 3 * l + 1, 0); k <= it - l; ++k) sum_dummy = sum_dummy + G(k, it - l + 1) * G(k, it - l + 1);
214: tmp = PetscRealPart(G(it - l + 1, it - l + 1) - sum_dummy);
215: /* Breakdown check */
216: if (tmp < 0) {
217: if (plcg->show_rstrt) PetscCall(PetscPrintf(comm, "Sqrt breakdown in iteration %" PetscInt_FMT ": sqrt argument is %e. Iteration was restarted.\n", ksp->its + 1, (double)tmp));
218: /* End hanging dot-products in the pipeline before exiting for-loop */
219: start = it - l + 2;
220: end = PetscMin(it + 1, max_it + 1); /* !warning! 'it' can actually be greater than 'max_it' */
221: for (i = start; i < end; ++i) PetscCallMPI(MPI_Wait(&req(i), MPI_STATUS_IGNORE));
222: break;
223: }
224: G(it - l + 1, it - l + 1) = PetscSqrtReal(tmp);
226: if (it < 2 * l) {
227: if (it == l) {
228: gamma(it - l) = (G(it - l, it - l + 1) + sigma(it - l) * G(it - l, it - l)) / G(it - l, it - l);
229: } else {
230: gamma(it - l) = (G(it - l, it - l + 1) + sigma(it - l) * G(it - l, it - l) - delta(it - l - 1) * G(it - l - 1, it - l)) / G(it - l, it - l);
231: }
232: delta(it - l) = G(it - l + 1, it - l + 1) / G(it - l, it - l);
233: } else {
234: gamma(it - l) = (G(it - l, it - l) * gamma(it - 2 * l) + G(it - l, it - l + 1) * delta(it - 2 * l) - G(it - l - 1, it - l) * delta(it - l - 1)) / G(it - l, it - l);
235: delta(it - l) = (G(it - l + 1, it - l + 1) * delta(it - 2 * l)) / G(it - l, it - l);
236: }
238: /* -------------------------------------------------- */
239: /* Recursively compute the next V, Q, Z and U vectors */
240: /* -------------------------------------------------- */
241: /* Shift the V vector pointers */
242: temp = V[2];
243: for (i = 2; i > 0; i--) V[i] = V[i - 1];
244: V[0] = temp;
246: /* Recurrence V vectors */
247: if (l == 1) {
248: PetscCall(VecCopy(Z[1], V[0]));
249: } else {
250: PetscCall(VecCopy(Q[0], V[0]));
251: }
252: if (it == l) {
253: PetscCall(VecAXPY(V[0], sigma(0) - gamma(it - l), V[1]));
254: } else {
255: alpha(0) = sigma(0) - gamma(it - l);
256: alpha(1) = -delta(it - l - 1);
257: PetscCall(VecMAXPY(V[0], 2, &alpha(0), &V[1]));
258: }
259: PetscCall(VecScale(V[0], 1.0 / delta(it - l)));
261: /* Recurrence Q vectors */
262: for (j = 0; j < l - 1; ++j) {
263: /* Shift the Q vector pointers */
264: temp = Q[3 * j + 2];
265: for (i = 2; i > 0; i--) Q[3 * j + i] = Q[3 * j + i - 1];
266: Q[3 * j] = temp;
268: if (j < l - 2) {
269: PetscCall(VecCopy(Q[3 * (j + 1)], Q[3 * j]));
270: } else {
271: PetscCall(VecCopy(Z[1], Q[3 * j]));
272: }
273: if (it == l) {
274: PetscCall(VecAXPY(Q[3 * j], sigma(j + 1) - gamma(it - l), Q[3 * j + 1]));
275: } else {
276: alpha(0) = sigma(j + 1) - gamma(it - l);
277: alpha(1) = -delta(it - l - 1);
278: PetscCall(VecMAXPY(Q[3 * j], 2, &alpha(0), &Q[3 * j + 1]));
279: }
280: PetscCall(VecScale(Q[3 * j], 1.0 / delta(it - l)));
281: }
283: /* Recurrence Z and U vectors */
284: if (it == l) {
285: PetscCall(VecAXPY(Z[0], -gamma(it - l), Z[1]));
286: PetscCall(VecAXPY(U[0], -gamma(it - l), U[1]));
287: } else {
288: alpha(0) = -gamma(it - l);
289: alpha(1) = -delta(it - l - 1);
290: PetscCall(VecMAXPY(Z[0], 2, &alpha(0), &Z[1]));
291: PetscCall(VecMAXPY(U[0], 2, &alpha(0), &U[1]));
292: }
293: PetscCall(VecScale(Z[0], 1.0 / delta(it - l)));
294: PetscCall(VecScale(U[0], 1.0 / delta(it - l)));
295: }
297: /* ---------------------------------------- */
298: /* Compute and communicate the dot products */
299: /* ---------------------------------------- */
300: if (it < l) {
301: for (j = 0; j < it + 2; ++j) { PetscCall((*U[0]->ops->dot_local)(U[0], Z[l - j], &G(j, it + 1))); /* dot-products (U[0],Z[j]) */ }
302: PetscCall(MPIPetsc_Iallreduce(MPI_IN_PLACE, &G(0, it + 1), it + 2, MPIU_SCALAR, MPIU_SUM, comm, &req(it + 1)));
303: } else if ((it >= l) && (it < max_it)) {
304: middle = it - l + 2;
305: end = it + 2;
306: PetscCall((*U[0]->ops->dot_local)(U[0], V[0], &G(it - l + 1, it + 1))); /* dot-product (U[0],V[0]) */
307: for (j = middle; j < end; ++j) { PetscCall((*U[0]->ops->dot_local)(U[0], plcg->Z[it + 1 - j], &G(j, it + 1))); /* dot-products (U[0],Z[j]) */ }
308: PetscCall(MPIPetsc_Iallreduce(MPI_IN_PLACE, &G(it - l + 1, it + 1), l + 1, MPIU_SCALAR, MPIU_SUM, comm, &req(it + 1)));
309: }
311: /* ----------------------------------------- */
312: /* Compute solution vector and residual norm */
313: /* ----------------------------------------- */
314: if (it >= l) {
315: if (it == l) {
316: if (ksp->its != 0) ++ksp->its;
317: eta = gamma(0);
318: zeta = beta;
319: PetscCall(VecCopy(V[1], p));
320: PetscCall(VecScale(p, 1.0 / eta));
321: PetscCall(VecAXPY(x, zeta, p));
322: dp = beta;
323: } else if (it > l) {
324: k = it - l;
325: ++ksp->its;
326: lambda = delta(k - 1) / eta;
327: eta = gamma(k) - lambda * delta(k - 1);
328: zeta = -lambda * zeta;
329: PetscCall(VecScale(p, -delta(k - 1) / eta));
330: PetscCall(VecAXPY(p, 1.0 / eta, V[1]));
331: PetscCall(VecAXPY(x, zeta, p));
332: dp = PetscAbsScalar(zeta);
333: }
334: ksp->rnorm = dp;
335: PetscCall(KSPLogResidualHistory(ksp, dp));
336: PetscCall(KSPMonitor(ksp, ksp->its, dp));
337: PetscCall((*ksp->converged)(ksp, ksp->its, dp, &ksp->reason, ksp->cnvP));
339: if (ksp->its >= max_it && !ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
340: if (ksp->reason) {
341: /* End hanging dot-products in the pipeline before exiting for-loop */
342: start = it - l + 2;
343: end = PetscMin(it + 2, max_it + 1); /* !warning! 'it' can actually be greater than 'max_it' */
344: for (i = start; i < end; ++i) PetscCallMPI(MPI_Wait(&req(i), MPI_STATUS_IGNORE));
345: break;
346: }
347: }
348: } /* End inner for loop */
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
353: {
354: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
355: PetscInt i = 0, j = 0, l = plcg->l, max_it = ksp->max_it;
357: PetscFunctionBegin;
358: for (i = 0; i < PetscMax(3, l + 1); ++i) PetscCall(VecSet(plcg->Z[i], 0.0));
359: for (i = 1; i < 3; ++i) PetscCall(VecSet(plcg->U[i], 0.0));
360: for (i = 0; i < 3; ++i) PetscCall(VecSet(plcg->V[i], 0.0));
361: for (i = 0; i < 3 * (l - 1) + 1; ++i) PetscCall(VecSet(plcg->Q[i], 0.0));
362: for (j = 0; j < (max_it + 1); ++j) {
363: gamma(j) = 0.0;
364: delta(j) = 0.0;
365: for (i = 0; i < (2 * l + 1); ++i) G_noshift(i, j) = 0.0;
366: }
367: PetscFunctionReturn(PETSC_SUCCESS);
368: }
370: /*
371: KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
372: */
373: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
374: {
375: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L *)ksp->data;
376: Mat A = NULL, Pmat = NULL;
377: Vec b = NULL, x = NULL, p = NULL;
378: PetscInt max_it = ksp->max_it, l = plcg->l;
379: PetscInt i = 0, outer_it = 0, curr_guess_zero = 0;
380: PetscReal lmin = plcg->lmin, lmax = plcg->lmax;
381: PetscBool diagonalscale = PETSC_FALSE;
382: MPI_Comm comm;
384: PetscFunctionBegin;
385: comm = PetscObjectComm((PetscObject)ksp);
386: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
387: PetscCheck(!diagonalscale, comm, PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
389: x = ksp->vec_sol;
390: b = ksp->vec_rhs;
391: p = plcg->p;
393: PetscCall(PetscCalloc1((max_it + 1) * (2 * l + 1), &plcg->G));
394: PetscCall(PetscCalloc1(max_it + 1, &plcg->gamma));
395: PetscCall(PetscCalloc1(max_it + 1, &plcg->delta));
396: PetscCall(PetscCalloc1(max_it + 1, &plcg->req));
398: PetscCall(PCGetOperators(ksp->pc, &A, &Pmat));
400: for (i = 0; i < l; ++i) sigma(i) = (0.5 * (lmin + lmax) + (0.5 * (lmax - lmin) * PetscCosReal(PETSC_PI * (2.0 * i + 1.0) / (2.0 * l))));
402: ksp->its = 0;
403: outer_it = 0;
404: curr_guess_zero = !!ksp->guess_zero;
406: while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
407: /* RESTART LOOP */
408: if (!curr_guess_zero) {
409: PetscCall(KSP_MatMult(ksp, A, x, plcg->U[0])); /* u <- b - Ax */
410: PetscCall(VecAYPX(plcg->U[0], -1.0, b));
411: } else {
412: PetscCall(VecCopy(b, plcg->U[0])); /* u <- b (x is 0) */
413: }
414: PetscCall(KSP_PCApply(ksp, plcg->U[0], p)); /* p <- Bu */
416: if (outer_it > 0) {
417: /* Re-initialize Z,U,V,Q,gamma,delta,G after restart occurred */
418: PetscCall(KSPSolve_ReInitData_PIPELCG(ksp));
419: }
421: PetscCall((*plcg->U[0]->ops->dot_local)(plcg->U[0], p, &G(0, 0)));
422: PetscCall(MPIPetsc_Iallreduce(MPI_IN_PLACE, &G(0, 0), 1, MPIU_SCALAR, MPIU_SUM, comm, &req(0)));
423: PetscCall(VecCopy(p, plcg->Z[l]));
425: PetscCall(KSPSolve_InnerLoop_PIPELCG(ksp));
427: if (ksp->reason) break; /* convergence or divergence */
428: ++outer_it;
429: curr_guess_zero = 0;
430: }
432: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
433: PetscCall(PetscFree(plcg->G));
434: PetscCall(PetscFree(plcg->gamma));
435: PetscCall(PetscFree(plcg->delta));
436: PetscCall(PetscFree(plcg->req));
437: PetscFunctionReturn(PETSC_SUCCESS);
438: }
440: /*MC
441: KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method. This method has only a single non-blocking global
442: reduction per iteration, compared to 2 blocking reductions for standard `KSPCG`. The reduction is overlapped by the
443: matrix-vector product and preconditioner application of the next l iterations. The pipeline length l is a parameter
444: of the method. [](sec_pipelineksp)
446: Options Database Keys:
447: + -ksp_pipelcg_pipel - pipelined length
448: . -ksp_pipelcg_lmin - approximation to the smallest eigenvalue of the preconditioned operator (default: 0.0)
449: . -ksp_pipelcg_lmax - approximation to the largest eigenvalue of the preconditioned operator (default: 0.0)
450: - -ksp_pipelcg_monitor - output where/why the method restarts when a sqrt breakdown occurs
452: Level: advanced
454: Notes:
455: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
456: performance of pipelined methods. See [](doc_faq_pipelined)
458: Contributed by:
459: Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
460: funded by Flemish Research Foundation (FWO) grant number 12H4617N.
462: Example usage:
463: .vb
464: KSP tutorials ex2, no preconditioner, pipel = 2, lmin = 0.0, lmax = 8.0 :
465: $mpiexec -n 14 ./ex2 -m 1000 -n 1000 -ksp_type pipelcg -pc_type none -ksp_norm_type natural
466: -ksp_rtol 1e-10 -ksp_max_it 1000 -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 8.0 -log_view
467: SNES tutorials ex48, bjacobi preconditioner, pipel = 3, lmin = 0.0, lmax = 2.0, show restart information :
468: $mpiexec -n 14 ./ex48 -M 150 -P 100 -ksp_type pipelcg -pc_type bjacobi -ksp_rtol 1e-10 -ksp_pipelcg_pipel 3
469: -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 2.0 -ksp_pipelcg_monitor -log_view
470: .ve
472: References:
473: + * - J. Cornelis, S. Cools and W. Vanroose,
474: "The Communication-Hiding Conjugate Gradient Method with Deep Pipelines"
475: Submitted to SIAM Journal on Scientific Computing (SISC), 2018.
476: - * - S. Cools, J. Cornelis and W. Vanroose,
477: "Numerically Stable Recurrence Relations for the Communication Hiding Pipelined Conjugate Gradient Method"
478: Submitted to IEEE Transactions on Parallel and Distributed Systems, 2019.
480: .seealso: [](ch_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSPCG`, `KSPPIPECG`, `KSPPIPECGRR`, `KSPPGMRES`,
481: `KSPPIPEBCGS`, `KSPSetPCSide()`, `KSPGROPPCG`
482: M*/
483: PETSC_EXTERN PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
484: {
485: KSP_CG_PIPE_L *plcg = NULL;
487: PetscFunctionBegin;
488: PetscCall(PetscNew(&plcg));
489: ksp->data = (void *)plcg;
491: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
492: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
494: ksp->ops->setup = KSPSetUp_PIPELCG;
495: ksp->ops->solve = KSPSolve_PIPELCG;
496: ksp->ops->reset = KSPReset_PIPELCG;
497: ksp->ops->destroy = KSPDestroy_PIPELCG;
498: ksp->ops->view = KSPView_PIPELCG;
499: ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
500: ksp->ops->buildsolution = KSPBuildSolutionDefault;
501: ksp->ops->buildresidual = KSPBuildResidualDefault;
502: PetscFunctionReturn(PETSC_SUCCESS);
503: }