Actual source code: pipecg.c


  2: #include <petsc/private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
 11: {
 12:   PetscFunctionBegin;
 13:   /* get work vectors needed by PIPECG */
 14:   PetscCall(KSPSetWorkVecs(ksp, 9));
 15:   PetscFunctionReturn(PETSC_SUCCESS);
 16: }

 18: /*
 19:  KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
 20: */
 21: static PetscErrorCode KSPSolve_PIPECG(KSP ksp)
 22: {
 23:   PetscInt    i;
 24:   PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0;
 25:   PetscReal   dp = 0.0;
 26:   Vec         X, B, Z, P, W, Q, U, M, N, R, S;
 27:   Mat         Amat, Pmat;
 28:   PetscBool   diagonalscale;

 30:   PetscFunctionBegin;
 31:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
 32:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

 34:   X = ksp->vec_sol;
 35:   B = ksp->vec_rhs;
 36:   R = ksp->work[0];
 37:   Z = ksp->work[1];
 38:   P = ksp->work[2];
 39:   N = ksp->work[3];
 40:   W = ksp->work[4];
 41:   Q = ksp->work[5];
 42:   U = ksp->work[6];
 43:   M = ksp->work[7];
 44:   S = ksp->work[8];

 46:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

 48:   ksp->its = 0;
 49:   if (!ksp->guess_zero) {
 50:     PetscCall(KSP_MatMult(ksp, Amat, X, R)); /*     r <- b - Ax     */
 51:     PetscCall(VecAYPX(R, -1.0, B));
 52:   } else {
 53:     PetscCall(VecCopy(B, R)); /*     r <- b (x is 0) */
 54:   }

 56:   PetscCall(KSP_PCApply(ksp, R, U)); /*     u <- Br   */

 58:   switch (ksp->normtype) {
 59:   case KSP_NORM_PRECONDITIONED:
 60:     PetscCall(VecNormBegin(U, NORM_2, &dp)); /*     dp <- u'*u = e'*A'*B'*B*A'*e'     */
 61:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
 62:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*     w <- Au   */
 63:     PetscCall(VecNormEnd(U, NORM_2, &dp));
 64:     break;
 65:   case KSP_NORM_UNPRECONDITIONED:
 66:     PetscCall(VecNormBegin(R, NORM_2, &dp)); /*     dp <- r'*r = e'*A'*A*e            */
 67:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 68:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*     w <- Au   */
 69:     PetscCall(VecNormEnd(R, NORM_2, &dp));
 70:     break;
 71:   case KSP_NORM_NATURAL:
 72:     PetscCall(VecDotBegin(R, U, &gamma)); /*     gamma <- u'*r       */
 73:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
 74:     PetscCall(KSP_MatMult(ksp, Amat, U, W)); /*     w <- Au   */
 75:     PetscCall(VecDotEnd(R, U, &gamma));
 76:     KSPCheckDot(ksp, gamma);
 77:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /*     dp <- r'*u = r'*B*r = e'*A'*B*A*e */
 78:     break;
 79:   case KSP_NORM_NONE:
 80:     PetscCall(KSP_MatMult(ksp, Amat, U, W));
 81:     dp = 0.0;
 82:     break;
 83:   default:
 84:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
 85:   }
 86:   PetscCall(KSPLogResidualHistory(ksp, dp));
 87:   PetscCall(KSPMonitor(ksp, 0, dp));
 88:   ksp->rnorm = dp;
 89:   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /* test for convergence */
 90:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

 92:   i = 0;
 93:   do {
 94:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
 95:       PetscCall(VecNormBegin(R, NORM_2, &dp));
 96:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
 97:       PetscCall(VecNormBegin(U, NORM_2, &dp));
 98:     }
 99:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
100:     PetscCall(VecDotBegin(W, U, &delta));
101:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));

103:     PetscCall(KSP_PCApply(ksp, W, M));       /*   m <- Bw       */
104:     PetscCall(KSP_MatMult(ksp, Amat, M, N)); /*   n <- Am       */

106:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
107:       PetscCall(VecNormEnd(R, NORM_2, &dp));
108:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
109:       PetscCall(VecNormEnd(U, NORM_2, &dp));
110:     }
111:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
112:     PetscCall(VecDotEnd(W, U, &delta));

114:     if (i > 0) {
115:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
116:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

118:       ksp->rnorm = dp;
119:       PetscCall(KSPLogResidualHistory(ksp, dp));
120:       PetscCall(KSPMonitor(ksp, i, dp));
121:       PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
122:       if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
123:     }

125:     if (i == 0) {
126:       alpha = gamma / delta;
127:       PetscCall(VecCopy(N, Z)); /*     z <- n          */
128:       PetscCall(VecCopy(M, Q)); /*     q <- m          */
129:       PetscCall(VecCopy(U, P)); /*     p <- u          */
130:       PetscCall(VecCopy(W, S)); /*     s <- w          */
131:     } else {
132:       beta  = gamma / gammaold;
133:       alpha = gamma / (delta - beta / alpha * gamma);
134:       PetscCall(VecAYPX(Z, beta, N)); /*     z <- n + beta * z   */
135:       PetscCall(VecAYPX(Q, beta, M)); /*     q <- m + beta * q   */
136:       PetscCall(VecAYPX(P, beta, U)); /*     p <- u + beta * p   */
137:       PetscCall(VecAYPX(S, beta, W)); /*     s <- w + beta * s   */
138:     }
139:     PetscCall(VecAXPY(X, alpha, P));  /*     x <- x + alpha * p   */
140:     PetscCall(VecAXPY(U, -alpha, Q)); /*     u <- u - alpha * q   */
141:     PetscCall(VecAXPY(W, -alpha, Z)); /*     w <- w - alpha * z   */
142:     PetscCall(VecAXPY(R, -alpha, S)); /*     r <- r - alpha * s   */
143:     gammaold = gamma;
144:     i++;
145:     ksp->its = i;

147:     /* if (i%50 == 0) { */
148:     /*   PetscCall(KSP_MatMult(ksp,Amat,X,R));            /\*     w <- b - Ax     *\/ */
149:     /*   PetscCall(VecAYPX(R,-1.0,B)); */
150:     /*   PetscCall(KSP_PCApply(ksp,R,U)); */
151:     /*   PetscCall(KSP_MatMult(ksp,Amat,U,W)); */
152:     /* } */

154:   } while (i <= ksp->max_it);
155:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
156:   PetscFunctionReturn(PETSC_SUCCESS);
157: }

159: PETSC_INTERN PetscErrorCode KSPBuildResidual_CG(KSP, Vec, Vec, Vec *);

161: /*MC
162:    KSPPIPECG - Pipelined conjugate gradient method. [](sec_pipelineksp)

164:    Level: intermediate

166:    Notes:
167:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG.  The
168:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

170:    See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product and `KSPGROPPCG`

172:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
173:    See [](doc_faq_pipelined)

175:    Contributed by:
176:    Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders

178:    Reference:
179:    P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
180:    Submitted to Parallel Computing, 2012.

182: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG2`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
183: M*/
184: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
185: {
186:   PetscFunctionBegin;
187:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
188:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
189:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
190:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

192:   ksp->ops->setup          = KSPSetUp_PIPECG;
193:   ksp->ops->solve          = KSPSolve_PIPECG;
194:   ksp->ops->destroy        = KSPDestroyDefault;
195:   ksp->ops->view           = NULL;
196:   ksp->ops->setfromoptions = NULL;
197:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
198:   ksp->ops->buildresidual  = KSPBuildResidual_CG;
199:   PetscFunctionReturn(PETSC_SUCCESS);
200: }