Actual source code: fbcgsr.c


  2: /*
  3:     This file implements FBiCGStab-R.
  4:     FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
  5:       (1) There are fewer MPI_Allreduce calls.
  6:       (2) The convergence occasionally is much faster than that of FBiCGStab.
  7: */
  8: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
  9: #include <petsc/private/vecimpl.h>

 11: static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
 12: {
 13:   PetscFunctionBegin;
 14:   PetscCall(KSPSetWorkVecs(ksp, 8));
 15:   PetscFunctionReturn(PETSC_SUCCESS);
 16: }

 18: static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
 19: {
 20:   PetscInt                    i, j, N;
 21:   PetscScalar                 tau, sigma, alpha, omega, beta;
 22:   PetscReal                   rho;
 23:   PetscScalar                 xi1, xi2, xi3, xi4;
 24:   Vec                         X, B, P, P2, RP, R, V, S, T, S2;
 25:   PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
 26:   PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
 27:   PetscScalar insums[4], outsums[4];
 28:   KSP_BCGS   *bcgs = (KSP_BCGS *)ksp->data;
 29:   PC          pc;
 30:   Mat         mat;

 32:   PetscFunctionBegin;
 33:   PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");
 34:   PetscCall(VecGetLocalSize(ksp->vec_sol, &N));

 36:   X  = ksp->vec_sol;
 37:   B  = ksp->vec_rhs;
 38:   P2 = ksp->work[0];

 40:   /* The following are involved in modified inner product calculations and vector updates */
 41:   RP = ksp->work[1];
 42:   PetscCall(VecGetArray(RP, (PetscScalar **)&rp));
 43:   PetscCall(VecRestoreArray(RP, NULL));
 44:   R = ksp->work[2];
 45:   PetscCall(VecGetArray(R, (PetscScalar **)&r));
 46:   PetscCall(VecRestoreArray(R, NULL));
 47:   P = ksp->work[3];
 48:   PetscCall(VecGetArray(P, (PetscScalar **)&p));
 49:   PetscCall(VecRestoreArray(P, NULL));
 50:   V = ksp->work[4];
 51:   PetscCall(VecGetArray(V, (PetscScalar **)&v));
 52:   PetscCall(VecRestoreArray(V, NULL));
 53:   S = ksp->work[5];
 54:   PetscCall(VecGetArray(S, (PetscScalar **)&s));
 55:   PetscCall(VecRestoreArray(S, NULL));
 56:   T = ksp->work[6];
 57:   PetscCall(VecGetArray(T, (PetscScalar **)&t));
 58:   PetscCall(VecRestoreArray(T, NULL));
 59:   S2 = ksp->work[7];
 60:   PetscCall(VecGetArray(S2, (PetscScalar **)&s2));
 61:   PetscCall(VecRestoreArray(S2, NULL));

 63:   /* Only supports right preconditioning */
 64:   PetscCheck(ksp->pc_side == PC_RIGHT, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "KSP fbcgsr does not support %s", PCSides[ksp->pc_side]);
 65:   if (!ksp->guess_zero) {
 66:     if (!bcgs->guess) PetscCall(VecDuplicate(X, &bcgs->guess));
 67:     PetscCall(VecCopy(X, bcgs->guess));
 68:   } else {
 69:     PetscCall(VecSet(X, 0.0));
 70:   }

 72:   /* Compute initial residual */
 73:   PetscCall(KSPGetPC(ksp, &pc));
 74:   PetscCall(PCSetUp(pc));
 75:   PetscCall(PCGetOperators(pc, &mat, NULL));
 76:   if (!ksp->guess_zero) {
 77:     PetscCall(KSP_MatMult(ksp, mat, X, P2)); /* P2 is used as temporary storage */
 78:     PetscCall(VecCopy(B, R));
 79:     PetscCall(VecAXPY(R, -1.0, P2));
 80:   } else {
 81:     PetscCall(VecCopy(B, R));
 82:   }

 84:   /* Test for nothing to do */
 85:   PetscCall(VecNorm(R, NORM_2, &rho));
 86:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
 87:   ksp->its = 0;
 88:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
 89:   else ksp->rnorm = 0;
 90:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
 91:   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
 92:   PetscCall(KSPMonitor(ksp, 0, ksp->rnorm));
 93:   PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
 94:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

 96:   /* Initialize iterates */
 97:   PetscCall(VecCopy(R, RP)); /* rp <- r */
 98:   PetscCall(VecCopy(R, P));  /* p <- r */

100:   /* Big loop */
101:   for (i = 0; i < ksp->max_it; i++) {
102:     /* matmult and pc */
103:     PetscCall(KSP_PCApply(ksp, P, P2));      /* p2 <- K p */
104:     PetscCall(KSP_MatMult(ksp, mat, P2, V)); /* v <- A p2 */

106:     /* inner prodcuts */
107:     if (i == 0) {
108:       tau = rho * rho;
109:       PetscCall(VecDot(V, RP, &sigma)); /* sigma <- (v,rp) */
110:     } else {
111:       PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
112:       tau = sigma = 0.0;
113:       for (j = 0; j < N; j++) {
114:         tau += r[j] * rp[j];   /* tau <- (r,rp) */
115:         sigma += v[j] * rp[j]; /* sigma <- (v,rp) */
116:       }
117:       PetscCall(PetscLogFlops(4.0 * N));
118:       PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
119:       insums[0] = tau;
120:       insums[1] = sigma;
121:       PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
122:       PetscCall(MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
123:       PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
124:       tau   = outsums[0];
125:       sigma = outsums[1];
126:     }

128:     /* scalar update */
129:     alpha = tau / sigma;

131:     /* vector update */
132:     PetscCall(VecWAXPY(S, -alpha, V, R)); /* s <- r - alpha v */

134:     /* matmult and pc */
135:     PetscCall(KSP_PCApply(ksp, S, S2));      /* s2 <- K s */
136:     PetscCall(KSP_MatMult(ksp, mat, S2, T)); /* t <- A s2 */

138:     /* inner prodcuts */
139:     PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
140:     xi1 = xi2 = xi3 = xi4 = 0.0;
141:     for (j = 0; j < N; j++) {
142:       xi1 += s[j] * s[j];  /* xi1 <- (s,s) */
143:       xi2 += t[j] * s[j];  /* xi2 <- (t,s) */
144:       xi3 += t[j] * t[j];  /* xi3 <- (t,t) */
145:       xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */
146:     }
147:     PetscCall(PetscLogFlops(8.0 * N));
148:     PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));

150:     insums[0] = xi1;
151:     insums[1] = xi2;
152:     insums[2] = xi3;
153:     insums[3] = xi4;

155:     PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
156:     PetscCall(MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
157:     PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
158:     xi1 = outsums[0];
159:     xi2 = outsums[1];
160:     xi3 = outsums[2];
161:     xi4 = outsums[3];

163:     /* test denominator */
164:     if ((xi3 == 0.0) || (sigma == 0.0)) {
165:       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has failed due to zero inner product");
166:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
167:       PetscCall(PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n"));
168:       break;
169:     }

171:     /* scalar updates */
172:     omega = xi2 / xi3;
173:     beta  = -xi4 / sigma;
174:     rho   = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */

176:     /* vector updates */
177:     PetscCall(VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2)); /* x <- alpha * p2 + omega * s2 + x */

179:     /* convergence test */
180:     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
181:     ksp->its++;
182:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
183:     else ksp->rnorm = 0;
184:     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
185:     PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
186:     PetscCall(KSPMonitor(ksp, i + 1, ksp->rnorm));
187:     PetscCall((*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP));
188:     if (ksp->reason) break;

190:     /* vector updates */
191:     PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
192:     for (j = 0; j < N; j++) {
193:       r[j] = s[j] - omega * t[j];                 /* r <- s - omega t */
194:       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
195:     }
196:     PetscCall(PetscLogFlops(6.0 * N));
197:     PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
198:   }

200:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
201:   PetscFunctionReturn(PETSC_SUCCESS);
202: }

204: /*MC
205:      KSPFBCGSR - Implements a mathematically equivalent variant of flexible bi-CG-stab, `KSPFBCGS`. [](sec_flexibleksp)

207:    Level: beginner

209:    Notes:
210:    This implementation requires fewer `MPI_Allreduce()` calls than `KSPFBCGS` and may converge faster

212:    See `KSPPIPEBCGS` for a pipelined version of the algorithm

214:    Flexible BiCGStab, unlike most Krylov methods, allows the preconditioner to be nonlinear, that is the action of the preconditioner to a vector need not be linear
215:    in the vector entries.

217:    Only supports right preconditioning

219: .seealso: [](ch_ksp),  [](sec_flexibleksp), `KSPFBCGSR`, `KSPPIPEBCGS`, `KSPBCGSL`, `KSPBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGS`, `KSPSetPCSide()`
220: M*/
221: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
222: {
223:   KSP_BCGS *bcgs;

225:   PetscFunctionBegin;
226:   PetscCall(PetscNew(&bcgs));

228:   ksp->data                = bcgs;
229:   ksp->ops->setup          = KSPSetUp_FBCGSR;
230:   ksp->ops->solve          = KSPSolve_FBCGSR;
231:   ksp->ops->destroy        = KSPDestroy_BCGS;
232:   ksp->ops->reset          = KSPReset_BCGS;
233:   ksp->ops->buildsolution  = KSPBuildSolution_BCGS;
234:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
235:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
236:   ksp->pc_side             = PC_RIGHT; /* set default PC side */

238:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
239:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
240:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
241:   PetscFunctionReturn(PETSC_SUCCESS);
242: }