Actual source code: pipecg.c
2: #include <petsc/private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECG - Sets up the workspace needed by the PIPECG method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
10: static PetscErrorCode KSPSetUp_PIPECG(KSP ksp)
11: {
12: PetscFunctionBegin;
13: /* get work vectors needed by PIPECG */
14: PetscCall(KSPSetWorkVecs(ksp, 9));
15: PetscFunctionReturn(PETSC_SUCCESS);
16: }
18: /*
19: KSPSolve_PIPECG - This routine actually applies the pipelined conjugate gradient method
20: */
21: static PetscErrorCode KSPSolve_PIPECG(KSP ksp)
22: {
23: PetscInt i;
24: PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0;
25: PetscReal dp = 0.0;
26: Vec X, B, Z, P, W, Q, U, M, N, R, S;
27: Mat Amat, Pmat;
28: PetscBool diagonalscale;
30: PetscFunctionBegin;
31: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
32: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
34: X = ksp->vec_sol;
35: B = ksp->vec_rhs;
36: R = ksp->work[0];
37: Z = ksp->work[1];
38: P = ksp->work[2];
39: N = ksp->work[3];
40: W = ksp->work[4];
41: Q = ksp->work[5];
42: U = ksp->work[6];
43: M = ksp->work[7];
44: S = ksp->work[8];
46: PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
48: ksp->its = 0;
49: if (!ksp->guess_zero) {
50: PetscCall(KSP_MatMult(ksp, Amat, X, R)); /* r <- b - Ax */
51: PetscCall(VecAYPX(R, -1.0, B));
52: } else {
53: PetscCall(VecCopy(B, R)); /* r <- b (x is 0) */
54: }
56: PetscCall(KSP_PCApply(ksp, R, U)); /* u <- Br */
58: switch (ksp->normtype) {
59: case KSP_NORM_PRECONDITIONED:
60: PetscCall(VecNormBegin(U, NORM_2, &dp)); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
61: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U)));
62: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
63: PetscCall(VecNormEnd(U, NORM_2, &dp));
64: break;
65: case KSP_NORM_UNPRECONDITIONED:
66: PetscCall(VecNormBegin(R, NORM_2, &dp)); /* dp <- r'*r = e'*A'*A*e */
67: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
68: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
69: PetscCall(VecNormEnd(R, NORM_2, &dp));
70: break;
71: case KSP_NORM_NATURAL:
72: PetscCall(VecDotBegin(R, U, &gamma)); /* gamma <- u'*r */
73: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
74: PetscCall(KSP_MatMult(ksp, Amat, U, W)); /* w <- Au */
75: PetscCall(VecDotEnd(R, U, &gamma));
76: KSPCheckDot(ksp, gamma);
77: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
78: break;
79: case KSP_NORM_NONE:
80: PetscCall(KSP_MatMult(ksp, Amat, U, W));
81: dp = 0.0;
82: break;
83: default:
84: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
85: }
86: PetscCall(KSPLogResidualHistory(ksp, dp));
87: PetscCall(KSPMonitor(ksp, 0, dp));
88: ksp->rnorm = dp;
89: PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP)); /* test for convergence */
90: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
92: i = 0;
93: do {
94: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
95: PetscCall(VecNormBegin(R, NORM_2, &dp));
96: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
97: PetscCall(VecNormBegin(U, NORM_2, &dp));
98: }
99: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotBegin(R, U, &gamma));
100: PetscCall(VecDotBegin(W, U, &delta));
101: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R)));
103: PetscCall(KSP_PCApply(ksp, W, M)); /* m <- Bw */
104: PetscCall(KSP_MatMult(ksp, Amat, M, N)); /* n <- Am */
106: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
107: PetscCall(VecNormEnd(R, NORM_2, &dp));
108: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
109: PetscCall(VecNormEnd(U, NORM_2, &dp));
110: }
111: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) PetscCall(VecDotEnd(R, U, &gamma));
112: PetscCall(VecDotEnd(W, U, &delta));
114: if (i > 0) {
115: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
116: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
118: ksp->rnorm = dp;
119: PetscCall(KSPLogResidualHistory(ksp, dp));
120: PetscCall(KSPMonitor(ksp, i, dp));
121: PetscCall((*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP));
122: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: if (i == 0) {
126: alpha = gamma / delta;
127: PetscCall(VecCopy(N, Z)); /* z <- n */
128: PetscCall(VecCopy(M, Q)); /* q <- m */
129: PetscCall(VecCopy(U, P)); /* p <- u */
130: PetscCall(VecCopy(W, S)); /* s <- w */
131: } else {
132: beta = gamma / gammaold;
133: alpha = gamma / (delta - beta / alpha * gamma);
134: PetscCall(VecAYPX(Z, beta, N)); /* z <- n + beta * z */
135: PetscCall(VecAYPX(Q, beta, M)); /* q <- m + beta * q */
136: PetscCall(VecAYPX(P, beta, U)); /* p <- u + beta * p */
137: PetscCall(VecAYPX(S, beta, W)); /* s <- w + beta * s */
138: }
139: PetscCall(VecAXPY(X, alpha, P)); /* x <- x + alpha * p */
140: PetscCall(VecAXPY(U, -alpha, Q)); /* u <- u - alpha * q */
141: PetscCall(VecAXPY(W, -alpha, Z)); /* w <- w - alpha * z */
142: PetscCall(VecAXPY(R, -alpha, S)); /* r <- r - alpha * s */
143: gammaold = gamma;
144: i++;
145: ksp->its = i;
147: /* if (i%50 == 0) { */
148: /* PetscCall(KSP_MatMult(ksp,Amat,X,R)); /\* w <- b - Ax *\/ */
149: /* PetscCall(VecAYPX(R,-1.0,B)); */
150: /* PetscCall(KSP_PCApply(ksp,R,U)); */
151: /* PetscCall(KSP_MatMult(ksp,Amat,U,W)); */
152: /* } */
154: } while (i <= ksp->max_it);
155: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: PETSC_INTERN PetscErrorCode KSPBuildResidual_CG(KSP, Vec, Vec, Vec *);
161: /*MC
162: KSPPIPECG - Pipelined conjugate gradient method. [](sec_pipelineksp)
164: Level: intermediate
166: Notes:
167: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG. The
168: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
170: See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product and `KSPGROPPCG`
172: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
173: See [](doc_faq_pipelined)
175: Contributed by:
176: Pieter Ghysels, Universiteit Antwerpen, Intel Exascience lab Flanders
178: Reference:
179: P. Ghysels and W. Vanroose, "Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm",
180: Submitted to Parallel Computing, 2012.
182: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECG2`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPGMRES`, `KSPCG`, `KSPCGUseSingleReduction()`
183: M*/
184: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG(KSP ksp)
185: {
186: PetscFunctionBegin;
187: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2));
188: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
189: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
190: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
192: ksp->ops->setup = KSPSetUp_PIPECG;
193: ksp->ops->solve = KSPSolve_PIPECG;
194: ksp->ops->destroy = KSPDestroyDefault;
195: ksp->ops->view = NULL;
196: ksp->ops->setfromoptions = NULL;
197: ksp->ops->buildsolution = KSPBuildSolutionDefault;
198: ksp->ops->buildresidual = KSPBuildResidual_CG;
199: PetscFunctionReturn(PETSC_SUCCESS);
200: }