Actual source code: pipecgrr.c


  2: #include <petsc/private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
 11: {
 12:   PetscFunctionBegin;
 13:   /* get work vectors needed by PIPECGRR */
 14:   PetscCall(KSPSetWorkVecs(ksp, 9));
 15:   PetscFunctionReturn(PETSC_SUCCESS);
 16: }

 18: /*
 19:  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
 20: */
 21: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
 22: {
 23:   PetscInt    i = 0, replace = 0, nsize;
 24:   PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
 25:   PetscReal   dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
 26:   PetscReal   ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
 27:   Vec         X, B, Z, P, W, Q, U, M, N, R, S;
 28:   Mat         Amat, Pmat;
 29:   PetscBool   diagonalscale;

 31:   PetscFunctionBegin;
 32:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
 33:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

 35:   X = ksp->vec_sol;
 36:   B = ksp->vec_rhs;
 37:   M = ksp->work[0];
 38:   Z = ksp->work[1];
 39:   P = ksp->work[2];
 40:   N = ksp->work[3];
 41:   W = ksp->work[4];
 42:   Q = ksp->work[5];
 43:   U = ksp->work[6];
 44:   R = ksp->work[7];
 45:   S = ksp->work[8];

 47:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

 49:   ksp->its = 0;
 50:   if (!ksp->guess_zero) {
 51:     PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*  r <- b - Ax  */
 52:     PetscCall(VecAYPX(R, -1.0, B));
 53:   } else {
 54:     PetscCall(VecCopy(B, R)); /*  r <- b (x is 0)  */
 55:   }

 57:   PetscCall(KSP_PCApply(ksp, R, U)); /*  u <- Br  */

 59:   switch (ksp->normtype) {
 60:   case KSP_NORM_PRECONDITIONED:
 61:     PetscCall(VecNormBegin(U, NORM_2, &dp)); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
 62:     PetscCall(VecNormBegin(B, NORM_2, &db));
 63:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
 64:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
 65:     PetscCall(VecNormEnd(U, NORM_2, &dp));
 66:     PetscCall(VecNormEnd(B, NORM_2, &db));
 67:     break;
 68:   case KSP_NORM_UNPRECONDITIONED:
 69:     PetscCall(VecNormBegin(R, NORM_2, &dp)); /*  dp <- r'*r = e'*A'*A*e  */
 70:     PetscCall(VecNormBegin(B, NORM_2, &db));
 71:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 72:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
 73:     PetscCall(VecNormEnd(R, NORM_2, &dp));
 74:     PetscCall(VecNormEnd(B, NORM_2, &db));
 75:     break;
 76:   case KSP_NORM_NATURAL:
 77:     PetscCall(VecDotBegin(R, U, &gamma)); /*  gamma <- u'*r  */
 78:     PetscCall(VecNormBegin(B, NORM_2, &db));
 79:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 80:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
 81:     PetscCall(VecDotEnd(R, U, &gamma));
 82:     PetscCall(VecNormEnd(B, NORM_2, &db));
 83:     KSPCheckDot(ksp, gamma);
 84:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
 85:     break;
 86:   case KSP_NORM_NONE:
 87:     PetscCall(KSP_MatMult(ksp, Amat, U, W));
 88:     dp = 0.0;
 89:     break;
 90:   default:
 91:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
 92:   }
 93:   PetscCall(KSPLogResidualHistory(ksp, dp));
 94:   PetscCall(KSPMonitor(ksp, 0, dp));
 95:   ksp->rnorm = dp;
 96:   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /*  test for convergence  */
 97:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

 99:   PetscCall(MatNorm(Amat, NORM_INFINITY, &Anorm));
100:   PetscCall(VecGetSize(B, &nsize));
101:   nsi = (PetscReal)nsize;
102:   sqn = PetscSqrtReal(nsi);

104:   do {
105:     if (i > 1) {
106:       pnp = dpp;
107:       snp = ds;
108:       qnp = dq;
109:       znp = dz;
110:     }
111:     if (i > 0) {
112:       rnp    = dp;
113:       unp    = du;
114:       wnp    = dw;
115:       xnp    = dx;
116:       alphap = alpha;
117:       betap  = beta;
118:     }

120:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
121:       PetscCall(VecNormBegin(R, NORM_2, &dp));
122:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
123:       PetscCall(VecNormBegin(U, NORM_2, &dp));
124:     }
125:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
126:     PetscCall(VecDotBegin(W, U, &delta));

128:     if (i > 0) {
129:       PetscCall(VecNormBegin(S, NORM_2, &ds));
130:       PetscCall(VecNormBegin(Z, NORM_2, &dz));
131:       PetscCall(VecNormBegin(P, NORM_2, &dpp));
132:       PetscCall(VecNormBegin(Q, NORM_2, &dq));
133:       PetscCall(VecNormBegin(M, NORM_2, &dm));
134:     }
135:     PetscCall(VecNormBegin(X, NORM_2, &dx));
136:     PetscCall(VecNormBegin(U, NORM_2, &du));
137:     PetscCall(VecNormBegin(W, NORM_2, &dw));

139:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
140:     PetscCall(KSP_PCApply(ksp, W, M));       /*   m <- Bw       */
141:     PetscCall(KSP_MatMult(ksp, Amat, M, N)); /*   n <- Am       */

143:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
144:       PetscCall(VecNormEnd(R, NORM_2, &dp));
145:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
146:       PetscCall(VecNormEnd(U, NORM_2, &dp));
147:     }
148:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
149:     PetscCall(VecDotEnd(W, U, &delta));

151:     if (i > 0) {
152:       PetscCall(VecNormEnd(S, NORM_2, &ds));
153:       PetscCall(VecNormEnd(Z, NORM_2, &dz));
154:       PetscCall(VecNormEnd(P, NORM_2, &dpp));
155:       PetscCall(VecNormEnd(Q, NORM_2, &dq));
156:       PetscCall(VecNormEnd(M, NORM_2, &dm));
157:     }
158:     PetscCall(VecNormEnd(X, NORM_2, &dx));
159:     PetscCall(VecNormEnd(U, NORM_2, &du));
160:     PetscCall(VecNormEnd(W, NORM_2, &dw));

162:     if (i > 0) {
163:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
164:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

166:       ksp->rnorm = dp;
167:       PetscCall(KSPLogResidualHistory(ksp, dp));
168:       PetscCall(KSPMonitor(ksp, i, dp));
169:       PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
170:       if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
171:     }

173:     if (i == 0) {
174:       alpha = gamma / delta;
175:       PetscCall(VecCopy(N, Z)); /*  z <- n  */
176:       PetscCall(VecCopy(M, Q)); /*  q <- m  */
177:       PetscCall(VecCopy(U, P)); /*  p <- u  */
178:       PetscCall(VecCopy(W, S)); /*  s <- w  */
179:     } else {
180:       beta  = gamma / gammaold;
181:       alpha = gamma / (delta - beta / alpha * gamma);
182:       PetscCall(VecAYPX(Z, beta, N)); /*  z <- n + beta * z  */
183:       PetscCall(VecAYPX(Q, beta, M)); /*  q <- m + beta * q  */
184:       PetscCall(VecAYPX(P, beta, U)); /*  p <- u + beta * p  */
185:       PetscCall(VecAYPX(S, beta, W)); /*  s <- w + beta * s  */
186:     }
187:     PetscCall(VecAXPY(X, alpha, P));  /*  x <- x + alpha * p  */
188:     PetscCall(VecAXPY(U, -alpha, Q)); /*  u <- u - alpha * q  */
189:     PetscCall(VecAXPY(W, -alpha, Z)); /*  w <- w - alpha * z  */
190:     PetscCall(VecAXPY(R, -alpha, S)); /*  r <- r - alpha * s  */
191:     gammaold = gamma;

193:     if (i > 0) {
194:       errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
195:       errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
196:     }
197:     if (i > 1) {
198:       errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
199:       errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
200:     }

202:     if (i > 0) {
203:       if (i == 1) {
204:         errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
205:         errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
206:         errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
207:         errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
208:       } else if (replace == 1) {
209:         errrprev = errr;
210:         errr     = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
211:         errs     = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
212:         errw     = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
213:         errz     = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
214:         replace  = 0;
215:       } else {
216:         errrprev = errr;
217:         errr     = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
218:         errs     = errw + PetscAbsScalar(betap) * errs + errncs;
219:         errw     = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
220:         errz     = PetscAbsScalar(betap) * errz + errncz;
221:       }
222:       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
223:         PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*  r <- Ax - b  */
224:         PetscCall(VecAYPX(R, -1.0, B));
225:         PetscCall(KSP_PCApply(ksp, R, U));       /*  u <- Br  */
226:         PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*  w <- Au  */
227:         PetscCall(KSP_MatMult(ksp, Amat, P, S)); /*  s <- Ap  */
228:         PetscCall(KSP_PCApply(ksp, S, Q));       /*  q <- Bs  */
229:         PetscCall(KSP_MatMult(ksp, Amat, Q, Z)); /*  z <- Aq  */
230:         replace = 1;
231:       }
232:     }

234:     i++;
235:     ksp->its = i;

237:   } while (i <= ksp->max_it);
238:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
239:   PetscFunctionReturn(PETSC_SUCCESS);
240: }

242: /*MC
243:    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements. [](sec_pipelineksp)

245:    Level: intermediate

247:    Notes:
248:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.  The
249:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

251:    `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
252:    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
253:    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.

255:    See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
256:    See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.

258:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
259:    performance of pipelined methods. See [](doc_faq_pipelined)

261:    Contributed by:
262:    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
263:    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)

265:    Reference:
266:    S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
267:    propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
268:    SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.

270: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
271: M*/
272: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
273: {
274:   PetscFunctionBegin;
275:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
276:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
277:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
278:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

280:   ksp->ops->setup          = KSPSetUp_PIPECGRR;
281:   ksp->ops->solve          = KSPSolve_PIPECGRR;
282:   ksp->ops->destroy        = KSPDestroyDefault;
283:   ksp->ops->view           = NULL;
284:   ksp->ops->setfromoptions = NULL;
285:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
286:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
287:   PetscFunctionReturn(PETSC_SUCCESS);
288: }