Actual source code: pipebcgs.c
2: /*
3: This file implements pipelined BiCGStab (pipe-BiCGStab).
4: */
5: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
7: static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
8: {
9: PetscFunctionBegin;
10: PetscCall(KSPSetWorkVecs(ksp, 15));
11: PetscFunctionReturn(PETSC_SUCCESS);
12: }
14: /* Only need a few hacks from KSPSolve_BCGS */
15: #include <petsc/private/pcimpl.h>
16: static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
17: {
18: PetscInt i;
19: PetscScalar rho, rhoold, alpha, beta, omega = 0.0, d1, d2, d3;
20: Vec X, B, S, R, RP, Y, Q, P2, Q2, R2, S2, W, Z, W2, Z2, T, V;
21: PetscReal dp = 0.0;
22: KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data;
23: PC pc;
25: PetscFunctionBegin;
26: X = ksp->vec_sol;
27: B = ksp->vec_rhs;
28: R = ksp->work[0];
29: RP = ksp->work[1];
30: S = ksp->work[2];
31: Y = ksp->work[3];
32: Q = ksp->work[4];
33: Q2 = ksp->work[5];
34: P2 = ksp->work[6];
35: R2 = ksp->work[7];
36: S2 = ksp->work[8];
37: W = ksp->work[9];
38: Z = ksp->work[10];
39: W2 = ksp->work[11];
40: Z2 = ksp->work[12];
41: T = ksp->work[13];
42: V = ksp->work[14];
44: if (!ksp->guess_zero) {
45: if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
46: PetscCall(VecCopy(X, bcgs->guess));
47: } else {
48: PetscCall(VecSet(X, 0.0));
49: }
51: /* Compute initial residual */
52: PetscCall(KSPGetPC(ksp, &pc));
53: PetscCall(PCSetUp(pc));
54: if (!ksp->guess_zero) {
55: PetscCall(KSP_MatMult(ksp, pc->mat, X, Q2));
56: PetscCall(VecCopy(B, R));
57: PetscCall(VecAXPY(R, -1.0, Q2));
58: } else {
59: PetscCall(VecCopy(B, R));
60: }
62: /* Test for nothing to do */
63: if (ksp->normtype != KSP_NORM_NONE) {
64: PetscCall(VecNorm(R, NORM_2, &dp));
65: } else dp = 0.0;
66: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
67: ksp->its = 0;
68: ksp->rnorm = dp;
69: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
70: PetscCall(KSPLogResidualHistory(ksp, dp));
71: PetscCall(KSPMonitor(ksp, 0, dp));
72: PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
73: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
75: /* Initialize */
76: PetscCall(VecCopy(R, RP)); /* rp <- r */
78: PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
79: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
80: PetscCall(KSP_PCApply(ksp, R, R2)); /* r2 <- K r */
81: PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w <- A r2 */
82: PetscCall(VecDotEnd(R, RP, &rho));
84: PetscCall(VecDotBegin(W, RP, &d2)); /* d2 <- (w,rp) */
85: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W)));
86: PetscCall(KSP_PCApply(ksp, W, W2)); /* w2 <- K w */
87: PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
88: PetscCall(VecDotEnd(W, RP, &d2));
90: alpha = rho / d2;
91: beta = 0.0;
93: /* Main loop */
94: i = 0;
95: do {
96: if (i == 0) {
97: PetscCall(VecCopy(R2, P2)); /* p2 <- r2 */
98: PetscCall(VecCopy(W, S)); /* s <- w */
99: PetscCall(VecCopy(W2, S2)); /* s2 <- w2 */
100: PetscCall(VecCopy(T, Z)); /* z <- t */
101: } else {
102: PetscCall(VecAXPBYPCZ(P2, 1.0, -beta * omega, beta, R2, S2)); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
103: PetscCall(VecAXPBYPCZ(S, 1.0, -beta * omega, beta, W, Z)); /* s <- beta * s + w - beta * omega * z */
104: PetscCall(VecAXPBYPCZ(S2, 1.0, -beta * omega, beta, W2, Z2)); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
105: PetscCall(VecAXPBYPCZ(Z, 1.0, -beta * omega, beta, T, V)); /* z <- beta * z + t - beta * omega * v */
106: }
107: PetscCall(VecWAXPY(Q, -alpha, S, R)); /* q <- r - alpha s */
108: PetscCall(VecWAXPY(Q2, -alpha, S2, R2)); /* q2 <- r2 - alpha s2 */
109: PetscCall(VecWAXPY(Y, -alpha, Z, W)); /* y <- w - alpha z */
111: PetscCall(VecDotBegin(Q, Y, &d1)); /* d1 <- (q,y) */
112: PetscCall(VecDotBegin(Y, Y, &d2)); /* d2 <- (y,y) */
114: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q)));
115: PetscCall(KSP_PCApply(ksp, Z, Z2)); /* z2 <- K z */
116: PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v <- A z2 */
118: PetscCall(VecDotEnd(Q, Y, &d1));
119: PetscCall(VecDotEnd(Y, Y, &d2));
121: if (d2 == 0.0) {
122: /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
123: PetscCall(VecDot(Q, Q, &d1));
124: if (d1 != 0.0) {
125: ksp->reason = KSP_DIVERGED_BREAKDOWN;
126: break;
127: }
128: PetscCall(VecAXPY(X, alpha, P2)); /* x <- x + alpha p2 */
129: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
130: ksp->its++;
131: ksp->rnorm = 0.0;
132: ksp->reason = KSP_CONVERGED_RTOL;
133: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
134: PetscCall(KSPLogResidualHistory(ksp, dp));
135: PetscCall(KSPMonitor(ksp, i + 1, 0.0));
136: break;
137: }
138: omega = d1 / d2; /* omega <- (y'q) / (y'y) */
139: PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, Q2)); /* x <- alpha * p2 + omega * q2 + x */
140: PetscCall(VecWAXPY(R, -omega, Y, Q)); /* r <- q - omega y */
141: PetscCall(VecWAXPY(R2, -alpha, Z2, W2)); /* r2 <- w2 - alpha z2 */
142: PetscCall(VecAYPX(R2, -omega, Q2)); /* r2 <- q2 - omega r2 */
143: PetscCall(VecWAXPY(W, -alpha, V, T)); /* w <- t - alpha v */
144: PetscCall(VecAYPX(W, -omega, Y)); /* w <- y - omega w */
145: rhoold = rho;
147: if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) { PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- norm(r) */ }
148: PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
149: PetscCall(VecDotBegin(S, RP, &d1)); /* d1 <- (s,rp) */
150: PetscCall(VecDotBegin(W, RP, &d2)); /* d2 <- (w,rp) */
151: PetscCall(VecDotBegin(Z, RP, &d3)); /* d3 <- (z,rp) */
153: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
154: PetscCall(KSP_PCApply(ksp, W, W2)); /* w2 <- K w */
155: PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
157: if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) PetscCall(VecNormEnd(R, NORM_2, &dp));
158: PetscCall(VecDotEnd(R, RP, &rho));
159: PetscCall(VecDotEnd(S, RP, &d1));
160: PetscCall(VecDotEnd(W, RP, &d2));
161: PetscCall(VecDotEnd(Z, RP, &d3));
163: PetscCheck(d2 + beta * d1 - beta * omega * d3 != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Divide by zero");
165: beta = (rho / rhoold) * (alpha / omega);
166: alpha = rho / (d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */
168: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
169: ksp->its++;
171: /* Residual replacement step */
172: if (i > 0 && i % 100 == 0 && i < 1001) {
173: PetscCall(KSP_MatMult(ksp, pc->mat, X, R));
174: PetscCall(VecAYPX(R, -1.0, B)); /* r <- b - Ax */
175: PetscCall(KSP_PCApply(ksp, R, R2)); /* r2 <- K r */
176: PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w <- A r2 */
177: PetscCall(KSP_PCApply(ksp, W, W2)); /* w2 <- K w */
178: PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
179: PetscCall(KSP_MatMult(ksp, pc->mat, P2, S)); /* s <- A p2 */
180: PetscCall(KSP_PCApply(ksp, S, S2)); /* s2 <- K s */
181: PetscCall(KSP_MatMult(ksp, pc->mat, S2, Z)); /* z <- A s2 */
182: PetscCall(KSP_PCApply(ksp, Z, Z2)); /* z2 <- K z */
183: PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v <- A z2 */
184: }
186: ksp->rnorm = dp;
187: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
188: PetscCall(KSPLogResidualHistory(ksp, dp));
189: PetscCall(KSPMonitor(ksp, i + 1, dp));
190: PetscCall((*ksp->converged)(ksp, i + 1, dp, &ksp->reason, ksp->cnvP));
191: if (ksp->reason) break;
192: if (rho == 0.0) {
193: ksp->reason = KSP_DIVERGED_BREAKDOWN;
194: break;
195: }
196: i++;
197: } while (i < ksp->max_it);
199: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
200: PetscFunctionReturn(PETSC_SUCCESS);
201: }
203: /*MC
204: KSPPIPEBCGS - Implements a pipelined BiCGStab method. [](sec_pipelineksp)
206: Level: intermediate
208: Notes:
209: This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard FBCGS. The
210: non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.
212: Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.
214: Like `KSPFBCGS`, the `KSPPIPEBCGS` implementation only allows for right preconditioning.
216: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
217: performance of pipelined methods. See [](doc_faq_pipelined)
219: Contributed by:
220: Siegfried Cools, Universiteit Antwerpen,
221: EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.
223: Reference:
224: . * - S. Cools and W. Vanroose,
225: "The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems",
226: Parallel Computing, 65:1-20, 2017.
228: .seealso: [](ch_ksp), `KSPFBCGS`, `KSPFBCGSR`, `KSPBCGS`, `KSPBCGSL`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`,
229: [](sec_pipelineksp), [](doc_faq_pipelined)
230: M*/
231: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
232: {
233: KSP_BCGS *bcgs;
235: PetscFunctionBegin;
236: PetscCall(PetscNew(&bcgs));
238: ksp->data = bcgs;
239: ksp->ops->setup = KSPSetUp_PIPEBCGS;
240: ksp->ops->solve = KSPSolve_PIPEBCGS;
241: ksp->ops->destroy = KSPDestroy_BCGS;
242: ksp->ops->reset = KSPReset_BCGS;
243: ksp->ops->buildresidual = KSPBuildResidualDefault;
244: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
246: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
247: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }