Actual source code: pipecgrr.c
2: #include <petsc/private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
11: {
12: PetscFunctionBegin;
13: /* get work vectors needed by PIPECGRR */
14: PetscCall(KSPSetWorkVecs(ksp, 9));
15: PetscFunctionReturn(PETSC_SUCCESS);
16: }
18: /*
19: KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
20: */
21: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
22: {
23: PetscInt i = 0, replace = 0, nsize;
24: PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
25: PetscReal dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
26: PetscReal ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
27: Vec X, B, Z, P, W, Q, U, M, N, R, S;
28: Mat Amat, Pmat;
29: PetscBool diagonalscale;
31: PetscFunctionBegin;
32: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
33: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
35: X = ksp->vec_sol;
36: B = ksp->vec_rhs;
37: M = ksp->work[0];
38: Z = ksp->work[1];
39: P = ksp->work[2];
40: N = ksp->work[3];
41: W = ksp->work[4];
42: Q = ksp->work[5];
43: U = ksp->work[6];
44: R = ksp->work[7];
45: S = ksp->work[8];
47: PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
49: ksp->its = 0;
50: if (!ksp->guess_zero) {
51: PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- b - Ax */
52: PetscCall(VecAYPX(R, -1.0, B));
53: } else {
54: PetscCall(VecCopy(B, R)); /* r <- b (x is 0) */
55: }
57: PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
59: switch (ksp->normtype) {
60: case KSP_NORM_PRECONDITIONED:
61: PetscCall(VecNormBegin(U, NORM_2, &dp)); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
62: PetscCall(VecNormBegin(B, NORM_2, &db));
63: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
64: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
65: PetscCall(VecNormEnd(U, NORM_2, &dp));
66: PetscCall(VecNormEnd(B, NORM_2, &db));
67: break;
68: case KSP_NORM_UNPRECONDITIONED:
69: PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- r'*r = e'*A'*A*e */
70: PetscCall(VecNormBegin(B, NORM_2, &db));
71: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
72: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
73: PetscCall(VecNormEnd(R, NORM_2, &dp));
74: PetscCall(VecNormEnd(B, NORM_2, &db));
75: break;
76: case KSP_NORM_NATURAL:
77: PetscCall(VecDotBegin(R, U, &gamma)); /* gamma <- u'*r */
78: PetscCall(VecNormBegin(B, NORM_2, &db));
79: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
80: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
81: PetscCall(VecDotEnd(R, U, &gamma));
82: PetscCall(VecNormEnd(B, NORM_2, &db));
83: KSPCheckDot(ksp, gamma);
84: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
85: break;
86: case KSP_NORM_NONE:
87: PetscCall(KSP_MatMult(ksp, Amat, U, W));
88: dp = 0.0;
89: break;
90: default:
91: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
92: }
93: PetscCall(KSPLogResidualHistory(ksp, dp));
94: PetscCall(KSPMonitor(ksp, 0, dp));
95: ksp->rnorm = dp;
96: PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /* test for convergence */
97: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
99: PetscCall(MatNorm(Amat, NORM_INFINITY, &Anorm));
100: PetscCall(VecGetSize(B, &nsize));
101: nsi = (PetscReal)nsize;
102: sqn = PetscSqrtReal(nsi);
104: do {
105: if (i > 1) {
106: pnp = dpp;
107: snp = ds;
108: qnp = dq;
109: znp = dz;
110: }
111: if (i > 0) {
112: rnp = dp;
113: unp = du;
114: wnp = dw;
115: xnp = dx;
116: alphap = alpha;
117: betap = beta;
118: }
120: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
121: PetscCall(VecNormBegin(R, NORM_2, &dp));
122: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
123: PetscCall(VecNormBegin(U, NORM_2, &dp));
124: }
125: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
126: PetscCall(VecDotBegin(W, U, &delta));
128: if (i > 0) {
129: PetscCall(VecNormBegin(S, NORM_2, &ds));
130: PetscCall(VecNormBegin(Z, NORM_2, &dz));
131: PetscCall(VecNormBegin(P, NORM_2, &dpp));
132: PetscCall(VecNormBegin(Q, NORM_2, &dq));
133: PetscCall(VecNormBegin(M, NORM_2, &dm));
134: }
135: PetscCall(VecNormBegin(X, NORM_2, &dx));
136: PetscCall(VecNormBegin(U, NORM_2, &du));
137: PetscCall(VecNormBegin(W, NORM_2, &dw));
139: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
140: PetscCall(KSP_PCApply(ksp, W, M)); /* m <- Bw */
141: PetscCall(KSP_MatMult(ksp, Amat, M, N)); /* n <- Am */
143: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
144: PetscCall(VecNormEnd(R, NORM_2, &dp));
145: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
146: PetscCall(VecNormEnd(U, NORM_2, &dp));
147: }
148: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
149: PetscCall(VecDotEnd(W, U, &delta));
151: if (i > 0) {
152: PetscCall(VecNormEnd(S, NORM_2, &ds));
153: PetscCall(VecNormEnd(Z, NORM_2, &dz));
154: PetscCall(VecNormEnd(P, NORM_2, &dpp));
155: PetscCall(VecNormEnd(Q, NORM_2, &dq));
156: PetscCall(VecNormEnd(M, NORM_2, &dm));
157: }
158: PetscCall(VecNormEnd(X, NORM_2, &dx));
159: PetscCall(VecNormEnd(U, NORM_2, &du));
160: PetscCall(VecNormEnd(W, NORM_2, &dw));
162: if (i > 0) {
163: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
164: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
166: ksp->rnorm = dp;
167: PetscCall(KSPLogResidualHistory(ksp, dp));
168: PetscCall(KSPMonitor(ksp, i, dp));
169: PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
170: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
171: }
173: if (i == 0) {
174: alpha = gamma / delta;
175: PetscCall(VecCopy(N, Z)); /* z <- n */
176: PetscCall(VecCopy(M, Q)); /* q <- m */
177: PetscCall(VecCopy(U, P)); /* p <- u */
178: PetscCall(VecCopy(W, S)); /* s <- w */
179: } else {
180: beta = gamma / gammaold;
181: alpha = gamma / (delta - beta / alpha * gamma);
182: PetscCall(VecAYPX(Z, beta, N)); /* z <- n + beta * z */
183: PetscCall(VecAYPX(Q, beta, M)); /* q <- m + beta * q */
184: PetscCall(VecAYPX(P, beta, U)); /* p <- u + beta * p */
185: PetscCall(VecAYPX(S, beta, W)); /* s <- w + beta * s */
186: }
187: PetscCall(VecAXPY(X, alpha, P)); /* x <- x + alpha * p */
188: PetscCall(VecAXPY(U, -alpha, Q)); /* u <- u - alpha * q */
189: PetscCall(VecAXPY(W, -alpha, Z)); /* w <- w - alpha * z */
190: PetscCall(VecAXPY(R, -alpha, S)); /* r <- r - alpha * s */
191: gammaold = gamma;
193: if (i > 0) {
194: errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
195: errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
196: }
197: if (i > 1) {
198: errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
199: errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
200: }
202: if (i > 0) {
203: if (i == 1) {
204: errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
205: errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
206: errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
207: errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
208: } else if (replace == 1) {
209: errrprev = errr;
210: errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
211: errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
212: errw = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
213: errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
214: replace = 0;
215: } else {
216: errrprev = errr;
217: errr = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
218: errs = errw + PetscAbsScalar(betap) * errs + errncs;
219: errw = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
220: errz = PetscAbsScalar(betap) * errz + errncz;
221: }
222: if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
223: PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- Ax - b */
224: PetscCall(VecAYPX(R, -1.0, B));
225: PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
226: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
227: PetscCall(KSP_MatMult(ksp, Amat, P, S)); /* s <- Ap */
228: PetscCall(KSP_PCApply(ksp, S, Q)); /* q <- Bs */
229: PetscCall(KSP_MatMult(ksp, Amat, Q, Z)); /* z <- Aq */
230: replace = 1;
231: }
232: }
234: i++;
235: ksp->its = i;
237: } while (i <= ksp->max_it);
238: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: /*MC
243: KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements. [](sec_pipelineksp)
245: Level: intermediate
247: Notes:
248: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`. The
249: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
251: `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
252: True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
253: iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
255: See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
256: See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.
258: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
259: performance of pipelined methods. See [](doc_faq_pipelined)
261: Contributed by:
262: Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
263: European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
265: Reference:
266: S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
267: propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
268: SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.
270: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
271: M*/
272: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
273: {
274: PetscFunctionBegin;
275: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
276: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
277: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
278: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
280: ksp->ops->setup = KSPSetUp_PIPECGRR;
281: ksp->ops->solve = KSPSolve_PIPECGRR;
282: ksp->ops->destroy = KSPDestroyDefault;
283: ksp->ops->view = NULL;
284: ksp->ops->setfromoptions = NULL;
285: ksp->ops->buildsolution = KSPBuildSolutionDefault;
286: ksp->ops->buildresidual = KSPBuildResidualDefault;
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }