Actual source code: pipebcgs.c


  2: /*
  3:     This file implements pipelined BiCGStab (pipe-BiCGStab).
  4: */
  5: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>

  7: static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
  8: {
  9:   PetscFunctionBegin;
 10:   PetscCall(KSPSetWorkVecs(ksp, 15));
 11:   PetscFunctionReturn(PETSC_SUCCESS);
 12: }

 14: /* Only need a few hacks from KSPSolve_BCGS */
 15: #include <petsc/private/pcimpl.h>
 16: static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
 17: {
 18:   PetscInt    i;
 19:   PetscScalar rho, rhoold, alpha, beta, omega = 0.0, d1, d2, d3;
 20:   Vec         X, B, S, R, RP, Y, Q, P2, Q2, R2, S2, W, Z, W2, Z2, T, V;
 21:   PetscReal   dp   = 0.0;
 22:   KSP_BCGS   *bcgs = (KSP_BCGS *)ksp->data;
 23:   PC          pc;

 25:   PetscFunctionBegin;
 26:   X  = ksp->vec_sol;
 27:   B  = ksp->vec_rhs;
 28:   R  = ksp->work[0];
 29:   RP = ksp->work[1];
 30:   S  = ksp->work[2];
 31:   Y  = ksp->work[3];
 32:   Q  = ksp->work[4];
 33:   Q2 = ksp->work[5];
 34:   P2 = ksp->work[6];
 35:   R2 = ksp->work[7];
 36:   S2 = ksp->work[8];
 37:   W  = ksp->work[9];
 38:   Z  = ksp->work[10];
 39:   W2 = ksp->work[11];
 40:   Z2 = ksp->work[12];
 41:   T  = ksp->work[13];
 42:   V  = ksp->work[14];

 44:   if (!ksp->guess_zero) {
 45:     if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
 46:     PetscCall(VecCopy(X, bcgs->guess));
 47:   } else {
 48:     PetscCall(VecSet(X, 0.0));
 49:   }

 51:   /* Compute initial residual */
 52:   PetscCall(KSPGetPC(ksp, &pc));
 53:   PetscCall(PCSetUp(pc));
 54:   if (!ksp->guess_zero) {
 55:     PetscCall(KSP_MatMult(ksp, pc->mat, X, Q2));
 56:     PetscCall(VecCopy(B, R));
 57:     PetscCall(VecAXPY(R, -1.0, Q2));
 58:   } else {
 59:     PetscCall(VecCopy(B, R));
 60:   }

 62:   /* Test for nothing to do */
 63:   if (ksp->normtype != KSP_NORM_NONE) {
 64:     PetscCall(VecNorm(R, NORM_2, &dp));
 65:   } else dp = 0.0;
 66:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
 67:   ksp->its   = 0;
 68:   ksp->rnorm = dp;
 69:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
 70:   PetscCall(KSPLogResidualHistory(ksp, dp));
 71:   PetscCall(KSPMonitor(ksp, 0, dp));
 72:   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
 73:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

 75:   /* Initialize */
 76:   PetscCall(VecCopy(R, RP)); /* rp <- r */

 78:   PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
 79:   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 80:   PetscCall(KSP_PCApply(ksp, R, R2));          /* r2 <- K r */
 81:   PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w <- A r2 */
 82:   PetscCall(VecDotEnd(R, RP, &rho));

 84:   PetscCall(VecDotBegin(W, RP, &d2)); /* d2 <- (w,rp) */
 85:   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W)));
 86:   PetscCall(KSP_PCApply(ksp, W, W2));          /* w2 <- K w */
 87:   PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */
 88:   PetscCall(VecDotEnd(W, RP, &d2));

 90:   alpha = rho / d2;
 91:   beta  = 0.0;

 93:   /* Main loop */
 94:   i = 0;
 95:   do {
 96:     if (i == 0) {
 97:       PetscCall(VecCopy(R2, P2)); /* p2 <- r2 */
 98:       PetscCall(VecCopy(W, S));   /* s  <- w  */
 99:       PetscCall(VecCopy(W2, S2)); /* s2 <- w2 */
100:       PetscCall(VecCopy(T, Z));   /* z  <- t  */
101:     } else {
102:       PetscCall(VecAXPBYPCZ(P2, 1.0, -beta * omega, beta, R2, S2)); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
103:       PetscCall(VecAXPBYPCZ(S, 1.0, -beta * omega, beta, W, Z));    /* s  <- beta * s  + w  - beta * omega * z  */
104:       PetscCall(VecAXPBYPCZ(S2, 1.0, -beta * omega, beta, W2, Z2)); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
105:       PetscCall(VecAXPBYPCZ(Z, 1.0, -beta * omega, beta, T, V));    /* z  <- beta * z  + t  - beta * omega * v  */
106:     }
107:     PetscCall(VecWAXPY(Q, -alpha, S, R));    /* q  <- r  - alpha s  */
108:     PetscCall(VecWAXPY(Q2, -alpha, S2, R2)); /* q2 <- r2 - alpha s2 */
109:     PetscCall(VecWAXPY(Y, -alpha, Z, W));    /* y  <- w  - alpha z  */

111:     PetscCall(VecDotBegin(Q, Y, &d1)); /* d1 <- (q,y) */
112:     PetscCall(VecDotBegin(Y, Y, &d2)); /* d2 <- (y,y) */

114:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q)));
115:     PetscCall(KSP_PCApply(ksp, Z, Z2));          /* z2 <- K z */
116:     PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v <- A z2 */

118:     PetscCall(VecDotEnd(Q, Y, &d1));
119:     PetscCall(VecDotEnd(Y, Y, &d2));

121:     if (d2 == 0.0) {
122:       /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
123:       PetscCall(VecDot(Q, Q, &d1));
124:       if (d1 != 0.0) {
125:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
126:         break;
127:       }
128:       PetscCall(VecAXPY(X, alpha, P2)); /* x <- x + alpha p2 */
129:       PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
130:       ksp->its++;
131:       ksp->rnorm  = 0.0;
132:       ksp->reason = KSP_CONVERGED_RTOL;
133:       PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
134:       PetscCall(KSPLogResidualHistory(ksp, dp));
135:       PetscCall(KSPMonitor(ksp, i + 1, 0.0));
136:       break;
137:     }
138:     omega = d1 / d2;                                      /* omega <- (y'q) / (y'y) */
139:     PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, Q2)); /* x <- alpha * p2 + omega * q2 + x */
140:     PetscCall(VecWAXPY(R, -omega, Y, Q));                 /* r <- q - omega y */
141:     PetscCall(VecWAXPY(R2, -alpha, Z2, W2));              /* r2 <- w2 - alpha z2 */
142:     PetscCall(VecAYPX(R2, -omega, Q2));                   /* r2 <- q2 - omega r2 */
143:     PetscCall(VecWAXPY(W, -alpha, V, T));                 /* w <- t - alpha v */
144:     PetscCall(VecAYPX(W, -omega, Y));                     /* w <- y - omega w */
145:     rhoold = rho;

147:     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) { PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- norm(r) */ }
148:     PetscCall(VecDotBegin(R, RP, &rho)); /* rho <- (r,rp) */
149:     PetscCall(VecDotBegin(S, RP, &d1));  /* d1 <- (s,rp) */
150:     PetscCall(VecDotBegin(W, RP, &d2));  /* d2 <- (w,rp) */
151:     PetscCall(VecDotBegin(Z, RP, &d3));  /* d3 <- (z,rp) */

153:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
154:     PetscCall(KSP_PCApply(ksp, W, W2));          /* w2 <- K w */
155:     PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t <- A w2 */

157:     if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i + 2) PetscCall(VecNormEnd(R, NORM_2, &dp));
158:     PetscCall(VecDotEnd(R, RP, &rho));
159:     PetscCall(VecDotEnd(S, RP, &d1));
160:     PetscCall(VecDotEnd(W, RP, &d2));
161:     PetscCall(VecDotEnd(Z, RP, &d3));

163:     PetscCheck(d2 + beta * d1 - beta * omega * d3 != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Divide by zero");

165:     beta  = (rho / rhoold) * (alpha / omega);
166:     alpha = rho / (d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */

168:     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
169:     ksp->its++;

171:     /* Residual replacement step  */
172:     if (i > 0 && i % 100 == 0 && i < 1001) {
173:       PetscCall(KSP_MatMult(ksp, pc->mat, X, R));
174:       PetscCall(VecAYPX(R, -1.0, B));              /* r  <- b - Ax */
175:       PetscCall(KSP_PCApply(ksp, R, R2));          /* r2 <- K r */
176:       PetscCall(KSP_MatMult(ksp, pc->mat, R2, W)); /* w  <- A r2 */
177:       PetscCall(KSP_PCApply(ksp, W, W2));          /* w2 <- K w */
178:       PetscCall(KSP_MatMult(ksp, pc->mat, W2, T)); /* t  <- A w2 */
179:       PetscCall(KSP_MatMult(ksp, pc->mat, P2, S)); /* s  <- A p2 */
180:       PetscCall(KSP_PCApply(ksp, S, S2));          /* s2 <- K s */
181:       PetscCall(KSP_MatMult(ksp, pc->mat, S2, Z)); /* z  <- A s2 */
182:       PetscCall(KSP_PCApply(ksp, Z, Z2));          /* z2 <- K z */
183:       PetscCall(KSP_MatMult(ksp, pc->mat, Z2, V)); /* v  <- A z2 */
184:     }

186:     ksp->rnorm = dp;
187:     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
188:     PetscCall(KSPLogResidualHistory(ksp, dp));
189:     PetscCall(KSPMonitor(ksp, i + 1, dp));
190:     PetscCall((*ksp->converged)(ksp, i + 1, dp, &ksp->reason, ksp->cnvP));
191:     if (ksp->reason) break;
192:     if (rho == 0.0) {
193:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
194:       break;
195:     }
196:     i++;
197:   } while (i < ksp->max_it);

199:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
200:   PetscFunctionReturn(PETSC_SUCCESS);
201: }

203: /*MC
204:     KSPPIPEBCGS - Implements a pipelined BiCGStab method. [](sec_pipelineksp)

206:     Level: intermediate

208:     Notes:
209:     This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard FBCGS.  The
210:     non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.

212:     Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.

214:     Like `KSPFBCGS`, the `KSPPIPEBCGS` implementation only allows for right preconditioning.

216:     MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
217:     performance of pipelined methods. See [](doc_faq_pipelined)

219:     Contributed by:
220:     Siegfried Cools, Universiteit Antwerpen,
221:     EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.

223:     Reference:
224: .   * - S. Cools and W. Vanroose,
225:     "The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems",
226:     Parallel Computing, 65:1-20, 2017.

228: .seealso: [](ch_ksp), `KSPFBCGS`, `KSPFBCGSR`, `KSPBCGS`, `KSPBCGSL`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`,
229:            [](sec_pipelineksp), [](doc_faq_pipelined)
230: M*/
231: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
232: {
233:   KSP_BCGS *bcgs;

235:   PetscFunctionBegin;
236:   PetscCall(PetscNew(&bcgs));

238:   ksp->data                = bcgs;
239:   ksp->ops->setup          = KSPSetUp_PIPEBCGS;
240:   ksp->ops->solve          = KSPSolve_PIPEBCGS;
241:   ksp->ops->destroy        = KSPDestroy_BCGS;
242:   ksp->ops->reset          = KSPReset_BCGS;
243:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
244:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;

246:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
247:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }