Actual source code: ibcgs.c
2: #include <petsc/private/kspimpl.h>
3: #include <petsc/private/vecimpl.h>
5: static PetscErrorCode KSPSetUp_IBCGS(KSP ksp)
6: {
7: PetscBool diagonalscale;
9: PetscFunctionBegin;
10: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
11: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
12: PetscCall(KSPSetWorkVecs(ksp, 9));
13: PetscFunctionReturn(PETSC_SUCCESS);
14: }
16: /*
17: The code below "cheats" from PETSc style
18: 1) VecRestoreArray() is called immediately after VecGetArray() and the array values are still accessed; the reason for the immediate
19: restore is that Vec operations are done on some of the vectors during the solve and if we did not restore immediately it would
20: generate two VecGetArray() (the second one inside the Vec operation) calls without a restore between them.
21: 2) The vector operations on done directly on the arrays instead of with VecXXXX() calls
23: For clarity in the code we name single VECTORS with two names, for example, Rn_1 and R, but they actually always
24: the exact same memory. We do this with macro defines so that compiler won't think they are
25: two different variables.
27: */
28: #define Xn_1 Xn
29: #define xn_1 xn
30: #define Rn_1 Rn
31: #define rn_1 rn
32: #define Un_1 Un
33: #define un_1 un
34: #define Vn_1 Vn
35: #define vn_1 vn
36: #define Qn_1 Qn
37: #define qn_1 qn
38: #define Zn_1 Zn
39: #define zn_1 zn
40: static PetscErrorCode KSPSolve_IBCGS(KSP ksp)
41: {
42: PetscInt i, N;
43: PetscReal rnorm = 0.0, rnormin = 0.0;
44: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
45: /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem
46: on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithmetic
47: rather than just 64-bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
48: and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.
50: Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
51: precision into a 16 byte space (the rest of the space is ignored) */
52: long double insums[7], outsums[7];
53: #else
54: PetscScalar insums[7], outsums[7];
55: #endif
56: PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin, tmp1, tmp2;
57: PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
58: const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
59: PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
60: /* the rest do not have to keep n_1 values */
61: PetscScalar kappan, thetan, etan, gamman, betan, deltan;
62: const PetscScalar *PETSC_RESTRICT tn;
63: PetscScalar *PETSC_RESTRICT sn;
64: Vec R0, Rn, Xn, F0, Vn, Zn, Qn, Tn, Sn, B, Un;
65: Mat A;
67: PetscFunctionBegin;
68: PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");
70: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
71: /* since 80 bit long doubls do not fill the upper bits, we fill them initially so that
72: valgrind won't detect MPI_Allreduce() with uninitialized data */
73: PetscCall(PetscMemzero(insums, sizeof(insums)));
74: PetscCall(PetscMemzero(insums, sizeof(insums)));
75: #endif
77: PetscCall(PCGetOperators(ksp->pc, &A, NULL));
78: PetscCall(VecGetLocalSize(ksp->vec_sol, &N));
79: Xn = ksp->vec_sol;
80: PetscCall(VecGetArray(Xn_1, (PetscScalar **)&xn_1));
81: PetscCall(VecRestoreArray(Xn_1, NULL));
82: B = ksp->vec_rhs;
83: PetscCall(VecGetArrayRead(B, (const PetscScalar **)&b));
84: PetscCall(VecRestoreArrayRead(B, NULL));
85: R0 = ksp->work[0];
86: PetscCall(VecGetArrayRead(R0, (const PetscScalar **)&r0));
87: PetscCall(VecRestoreArrayRead(R0, NULL));
88: Rn = ksp->work[1];
89: PetscCall(VecGetArray(Rn_1, (PetscScalar **)&rn_1));
90: PetscCall(VecRestoreArray(Rn_1, NULL));
91: Un = ksp->work[2];
92: PetscCall(VecGetArrayRead(Un_1, (const PetscScalar **)&un_1));
93: PetscCall(VecRestoreArrayRead(Un_1, NULL));
94: F0 = ksp->work[3];
95: PetscCall(VecGetArrayRead(F0, (const PetscScalar **)&f0));
96: PetscCall(VecRestoreArrayRead(F0, NULL));
97: Vn = ksp->work[4];
98: PetscCall(VecGetArray(Vn_1, (PetscScalar **)&vn_1));
99: PetscCall(VecRestoreArray(Vn_1, NULL));
100: Zn = ksp->work[5];
101: PetscCall(VecGetArray(Zn_1, (PetscScalar **)&zn_1));
102: PetscCall(VecRestoreArray(Zn_1, NULL));
103: Qn = ksp->work[6];
104: PetscCall(VecGetArrayRead(Qn_1, (const PetscScalar **)&qn_1));
105: PetscCall(VecRestoreArrayRead(Qn_1, NULL));
106: Tn = ksp->work[7];
107: PetscCall(VecGetArrayRead(Tn, (const PetscScalar **)&tn));
108: PetscCall(VecRestoreArrayRead(Tn, NULL));
109: Sn = ksp->work[8];
110: PetscCall(VecGetArrayRead(Sn, (const PetscScalar **)&sn));
111: PetscCall(VecRestoreArrayRead(Sn, NULL));
113: /* r0 = rn_1 = b - A*xn_1; */
114: /* PetscCall(KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn));
115: PetscCall(VecAYPX(Rn_1,-1.0,B)); */
116: PetscCall(KSPInitialResidual(ksp, Xn_1, Tn, Sn, Rn_1, B));
117: if (ksp->normtype != KSP_NORM_NONE) {
118: PetscCall(VecNorm(Rn_1, NORM_2, &rnorm));
119: KSPCheckNorm(ksp, rnorm);
120: }
121: PetscCall(KSPMonitor(ksp, 0, rnorm));
122: PetscCall((*ksp->converged)(ksp, 0, rnorm, &ksp->reason, ksp->cnvP));
123: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
125: PetscCall(VecCopy(Rn_1, R0));
127: /* un_1 = A*rn_1; */
128: PetscCall(KSP_PCApplyBAorAB(ksp, Rn_1, Un_1, Tn));
130: /* f0 = A'*rn_1; */
131: if (ksp->pc_side == PC_RIGHT) { /* B' A' */
132: PetscCall(KSP_MatMultTranspose(ksp, A, R0, Tn));
133: PetscCall(KSP_PCApplyTranspose(ksp, Tn, F0));
134: } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
135: PetscCall(KSP_PCApplyTranspose(ksp, R0, Tn));
136: PetscCall(KSP_MatMultTranspose(ksp, A, Tn, F0));
137: }
139: /*qn_1 = vn_1 = zn_1 = 0.0; */
140: PetscCall(VecSet(Qn_1, 0.0));
141: PetscCall(VecSet(Vn_1, 0.0));
142: PetscCall(VecSet(Zn_1, 0.0));
144: sigman_2 = pin_1 = taun_1 = 0.0;
146: /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
147: PetscCall(VecDot(R0, R0, &phin_1));
148: KSPCheckDot(ksp, phin_1);
150: /* sigman_1 = rn_1'un_1 */
151: PetscCall(VecDot(R0, Un_1, &sigman_1));
153: alphan_1 = omegan_1 = 1.0;
155: for (ksp->its = 1; ksp->its < ksp->max_it + 1; ksp->its++) {
156: rhon = phin_1 - omegan_1 * sigman_2 + omegan_1 * alphan_1 * pin_1;
157: if (ksp->its == 1) deltan = rhon;
158: else deltan = rhon / taun_1;
159: betan = deltan / omegan_1;
160: taun = sigman_1 + betan * taun_1 - deltan * pin_1;
161: if (taun == 0.0) {
162: PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to taun is zero, iteration %" PetscInt_FMT, ksp->its);
163: ksp->reason = KSP_DIVERGED_NANORINF;
164: PetscFunctionReturn(PETSC_SUCCESS);
165: }
166: alphan = rhon / taun;
167: PetscCall(PetscLogFlops(15.0));
169: /*
170: zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
171: vn = un_1 + betan*vn_1 - deltan*qn_1
172: sn = rn_1 - alphan*vn
174: The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
175: */
176: PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
177: tmp1 = (alphan / alphan_1) * betan;
178: tmp2 = alphan * deltan;
179: for (i = 0; i < N; i++) {
180: zn[i] = alphan * rn_1[i] + tmp1 * zn_1[i] - tmp2 * vn_1[i];
181: vn[i] = un_1[i] + betan * vn_1[i] - deltan * qn_1[i];
182: sn[i] = rn_1[i] - alphan * vn[i];
183: }
184: PetscCall(PetscLogFlops(3.0 + 11.0 * N));
185: PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
187: /*
188: qn = A*vn
189: */
190: PetscCall(KSP_PCApplyBAorAB(ksp, Vn, Qn, Tn));
192: /*
193: tn = un_1 - alphan*qn
194: */
195: PetscCall(VecWAXPY(Tn, -alphan, Qn, Un_1));
197: /*
198: phin = r0'sn
199: pin = r0'qn
200: gamman = f0'sn
201: etan = f0'tn
202: thetan = sn'tn
203: kappan = tn'tn
204: */
205: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
206: phin = pin = gamman = etan = thetan = kappan = 0.0;
207: for (i = 0; i < N; i++) {
208: phin += r0[i] * sn[i];
209: pin += r0[i] * qn[i];
210: gamman += f0[i] * sn[i];
211: etan += f0[i] * tn[i];
212: thetan += sn[i] * tn[i];
213: kappan += tn[i] * tn[i];
214: }
215: PetscCall(PetscLogFlops(12.0 * N));
216: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
218: insums[0] = phin;
219: insums[1] = pin;
220: insums[2] = gamman;
221: insums[3] = etan;
222: insums[4] = thetan;
223: insums[5] = kappan;
224: insums[6] = rnormin;
226: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
227: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
228: if (ksp->lagnorm && ksp->its > 1) {
229: PetscCall(MPIU_Allreduce(insums, outsums, 7, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
230: } else {
231: PetscCall(MPIU_Allreduce(insums, outsums, 6, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
232: }
233: #else
234: if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) {
235: PetscCall(MPIU_Allreduce(insums, outsums, 7, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
236: } else {
237: PetscCall(MPIU_Allreduce(insums, outsums, 6, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
238: }
239: #endif
240: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
241: phin = outsums[0];
242: pin = outsums[1];
243: gamman = outsums[2];
244: etan = outsums[3];
245: thetan = outsums[4];
246: kappan = outsums[5];
247: if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));
249: if (kappan == 0.0) {
250: PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to kappan is zero, iteration %" PetscInt_FMT, ksp->its);
251: ksp->reason = KSP_DIVERGED_NANORINF;
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
254: if (thetan == 0.0) {
255: PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to thetan is zero, iteration %" PetscInt_FMT, ksp->its);
256: ksp->reason = KSP_DIVERGED_NANORINF;
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
259: omegan = thetan / kappan;
260: sigman = gamman - omegan * etan;
262: /*
263: rn = sn - omegan*tn
264: xn = xn_1 + zn + omegan*sn
265: */
266: PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
267: rnormin = 0.0;
268: for (i = 0; i < N; i++) {
269: rn[i] = sn[i] - omegan * tn[i];
270: rnormin += PetscRealPart(PetscConj(rn[i]) * rn[i]);
271: xn[i] += zn[i] + omegan * sn[i];
272: }
273: PetscCall(PetscObjectStateIncrease((PetscObject)Xn));
274: PetscCall(PetscLogFlops(7.0 * N));
275: PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));
277: if (!ksp->lagnorm && ksp->chknorm < ksp->its && ksp->normtype != KSP_NORM_NONE) {
278: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
279: PetscCall(MPIU_Allreduce(&rnormin, &rnorm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
280: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
281: rnorm = PetscSqrtReal(rnorm);
282: }
284: /* Test for convergence */
285: PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
286: PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
287: if (ksp->reason) {
288: PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
289: PetscFunctionReturn(PETSC_SUCCESS);
290: }
292: /* un = A*rn */
293: PetscCall(KSP_PCApplyBAorAB(ksp, Rn, Un, Tn));
295: /* Update n-1 locations with n locations */
296: sigman_2 = sigman_1;
297: sigman_1 = sigman;
298: pin_1 = pin;
299: phin_1 = phin;
300: alphan_1 = alphan;
301: taun_1 = taun;
302: omegan_1 = omegan;
303: }
304: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
305: PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: /*MC
310: KSPIBCGS - Implements the IBiCGStab (Improved Stabilized version of BiConjugate Gradient) method
311: in an alternative form to have only a single global reduction operation instead of the usual 3 (or 4)
313: Level: beginner
315: Notes:
316: Supports left and right preconditioning
318: See `KSPBCGSL` for additional stabilization
320: Unlike the Bi-CG-stab algorithm, this requires one multiplication be the transpose of the operator
321: before the iteration starts.
323: The paper has two errors in the algorithm presented, they are fixed in the code in `KSPSolve_IBCGS()`
325: For maximum reduction in the number of global reduction operations, this solver should be used with
326: `KSPSetLagNorm()`.
328: This is not supported for complex numbers.
330: Reference:
331: The Improved BiCGStab Method for Large and Sparse Unsymmetric Linear Systems on Parallel Distributed Memory
332: Architectures. L. T. Yang and R. Brent, Proceedings of the Fifth International Conference on Algorithms and
333: Architectures for Parallel Processing, 2002, IEEE.
335: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPBCGSL`, `KSPIBCGS`, `KSPSetLagNorm()`
336: M*/
338: PETSC_EXTERN PetscErrorCode KSPCreate_IBCGS(KSP ksp)
339: {
340: PetscFunctionBegin;
342: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
343: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
344: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
346: ksp->ops->setup = KSPSetUp_IBCGS;
347: ksp->ops->solve = KSPSolve_IBCGS;
348: ksp->ops->destroy = KSPDestroyDefault;
349: ksp->ops->buildsolution = KSPBuildSolutionDefault;
350: ksp->ops->buildresidual = KSPBuildResidualDefault;
351: ksp->ops->setfromoptions = NULL;
352: ksp->ops->view = NULL;
353: #if defined(PETSC_USE_COMPLEX)
354: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "This is not supported for complex numbers");
355: #else
356: PetscFunctionReturn(PETSC_SUCCESS);
357: #endif
358: }