Actual source code: dgmres.c
1: /*
2: Implements deflated GMRES.
3: */
5: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>
7: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
9: #define GMRES_DELTA_DIRECTIONS 10
10: #define GMRES_DEFAULT_MAXK 30
11: static PetscErrorCode KSPDGMRESGetNewVectors(KSP, PetscInt);
12: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP, PetscInt, PetscBool, PetscReal *);
13: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
15: PetscErrorCode KSPDGMRESSetEigen(KSP ksp, PetscInt nb_eig)
16: {
17: PetscFunctionBegin;
18: PetscTryMethod((ksp), "KSPDGMRESSetEigen_C", (KSP, PetscInt), (ksp, nb_eig));
19: PetscFunctionReturn(PETSC_SUCCESS);
20: }
21: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp, PetscInt max_neig)
22: {
23: PetscFunctionBegin;
24: PetscTryMethod((ksp), "KSPDGMRESSetMaxEigen_C", (KSP, PetscInt), (ksp, max_neig));
25: PetscFunctionReturn(PETSC_SUCCESS);
26: }
27: PetscErrorCode KSPDGMRESForce(KSP ksp, PetscBool force)
28: {
29: PetscFunctionBegin;
30: PetscTryMethod((ksp), "KSPDGMRESForce_C", (KSP, PetscBool), (ksp, force));
31: PetscFunctionReturn(PETSC_SUCCESS);
32: }
33: PetscErrorCode KSPDGMRESSetRatio(KSP ksp, PetscReal ratio)
34: {
35: PetscFunctionBegin;
36: PetscTryMethod((ksp), "KSPDGMRESSetRatio_C", (KSP, PetscReal), (ksp, ratio));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
39: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp, PetscInt *neig)
40: {
41: PetscFunctionBegin;
42: PetscUseMethod((ksp), "KSPDGMRESComputeSchurForm_C", (KSP, PetscInt *), (ksp, neig));
43: PetscFunctionReturn(PETSC_SUCCESS);
44: }
45: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp, PetscInt *curneigh)
46: {
47: PetscFunctionBegin;
48: PetscUseMethod((ksp), "KSPDGMRESComputeDeflationData_C", (KSP, PetscInt *), (ksp, curneigh));
49: PetscFunctionReturn(PETSC_SUCCESS);
50: }
51: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
52: {
53: PetscFunctionBegin;
54: PetscUseMethod((ksp), "KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
59: {
60: PetscFunctionBegin;
61: PetscUseMethod((ksp), "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
66: {
67: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
68: PetscInt neig = dgmres->neig + EIG_OFFSET;
69: PetscInt max_k = dgmres->max_k + 1;
71: PetscFunctionBegin;
72: PetscCall(KSPSetUp_GMRES(ksp));
73: if (!dgmres->neig) PetscFunctionReturn(PETSC_SUCCESS);
75: /* Allocate workspace for the Schur vectors*/
76: PetscCall(PetscMalloc1(neig * max_k, &SR));
77: dgmres->wr = NULL;
78: dgmres->wi = NULL;
79: dgmres->perm = NULL;
80: dgmres->modul = NULL;
81: dgmres->Q = NULL;
82: dgmres->Z = NULL;
84: UU = NULL;
85: XX = NULL;
86: MX = NULL;
87: AUU = NULL;
88: XMX = NULL;
89: XMU = NULL;
90: UMX = NULL;
91: AUAU = NULL;
92: TT = NULL;
93: TTF = NULL;
94: INVP = NULL;
95: X1 = NULL;
96: X2 = NULL;
97: MU = NULL;
98: PetscFunctionReturn(PETSC_SUCCESS);
99: }
101: /*
102: Run GMRES, possibly with restart. Return residual history if requested.
103: input parameters:
105: . gmres - structure containing parameters and work areas
107: output parameters:
108: . nres - residuals (from preconditioned system) at each step.
109: If restarting, consider passing nres+it. If null,
110: ignored
111: . itcount - number of iterations used. nres[0] to nres[itcount]
112: are defined. If null, ignored.
114: Notes:
115: On entry, the value in vector VEC_VV(0) should be the initial residual
116: (this allows shortcuts where the initial preconditioned residual is 0).
117: */
118: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount, KSP ksp)
119: {
120: KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);
121: PetscReal res_norm, res, hapbnd, tt;
122: PetscInt it = 0;
123: PetscInt max_k = dgmres->max_k;
124: PetscBool hapend = PETSC_FALSE;
125: PetscReal res_old;
126: PetscInt test = 0;
128: PetscFunctionBegin;
129: PetscCall(VecNormalize(VEC_VV(0), &res_norm));
130: KSPCheckNorm(ksp, res_norm);
131: res = res_norm;
132: *GRS(0) = res_norm;
134: /* check for the convergence */
135: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
136: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
137: else ksp->rnorm = 0.0;
138: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
139: dgmres->it = (it - 1);
140: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
141: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
142: if (!res) {
143: if (itcount) *itcount = 0;
144: ksp->reason = KSP_CONVERGED_ATOL;
145: PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
148: /* record the residual norm to test if deflation is needed */
149: res_old = res;
151: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
152: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
153: if (it) {
154: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
155: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
156: }
157: dgmres->it = (it - 1);
158: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPDGMRESGetNewVectors(ksp, it + 1));
159: if (dgmres->r > 0) {
160: if (ksp->pc_side == PC_LEFT) {
161: /* Apply the first preconditioner */
162: PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_TEMP, VEC_TEMP_MATOP));
163: /* Then apply Deflation as a preconditioner */
164: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1 + it)));
165: } else if (ksp->pc_side == PC_RIGHT) {
166: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP));
167: PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1 + it), VEC_TEMP_MATOP));
168: }
169: } else {
170: PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_VV(1 + it), VEC_TEMP_MATOP));
171: }
172: dgmres->matvecs += 1;
173: /* update hessenberg matrix and do Gram-Schmidt */
174: PetscCall((*dgmres->orthog)(ksp, it));
176: /* vv(i+1) . vv(i+1) */
177: PetscCall(VecNormalize(VEC_VV(it + 1), &tt));
178: /* save the magnitude */
179: *HH(it + 1, it) = tt;
180: *HES(it + 1, it) = tt;
182: /* check for the happy breakdown */
183: hapbnd = PetscAbsScalar(tt / *GRS(it));
184: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
185: if (tt < hapbnd) {
186: PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %g tt = %g\n", (double)hapbnd, (double)tt));
187: hapend = PETSC_TRUE;
188: }
189: PetscCall(KSPDGMRESUpdateHessenberg(ksp, it, hapend, &res));
191: it++;
192: dgmres->it = (it - 1); /* For converged */
193: ksp->its++;
194: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
195: else ksp->rnorm = 0.0;
196: if (ksp->reason) break;
198: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
200: /* Catch error in happy breakdown and signal convergence and break from loop */
201: if (hapend) {
202: if (!ksp->reason) {
203: PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res);
204: ksp->reason = KSP_DIVERGED_BREAKDOWN;
205: break;
206: }
207: }
208: }
210: /* Monitor if we know that we will not return for a restart */
211: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
212: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
213: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
214: }
215: if (itcount) *itcount = it;
217: /*
218: Down here we have to solve for the "best" coefficients of the Krylov
219: columns, add the solution values together, and possibly unwind the
220: preconditioning from the solution
221: */
222: /* Form the solution (or the solution so far) */
223: PetscCall(KSPDGMRESBuildSoln(GRS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 1));
225: /* Compute data for the deflation to be used during the next restart */
226: if (!ksp->reason && ksp->its < ksp->max_it) {
227: test = max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old);
228: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
229: if ((test > dgmres->smv * (ksp->max_it - ksp->its)) || dgmres->force) PetscCall(KSPDGMRESComputeDeflationData(ksp, NULL));
230: }
231: PetscFunctionReturn(PETSC_SUCCESS);
232: }
234: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
235: {
236: PetscInt i, its, itcount;
237: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
238: PetscBool guess_zero = ksp->guess_zero;
240: PetscFunctionBegin;
241: PetscCheck(!ksp->calc_sings || dgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
243: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
244: ksp->its = 0;
245: dgmres->matvecs = 0;
246: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
248: itcount = 0;
249: while (!ksp->reason) {
250: PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
251: if (ksp->pc_side == PC_LEFT) {
252: dgmres->matvecs += 1;
253: if (dgmres->r > 0) {
254: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP));
255: PetscCall(VecCopy(VEC_TEMP, VEC_VV(0)));
256: }
257: }
259: PetscCall(KSPDGMRESCycle(&its, ksp));
260: itcount += its;
261: if (itcount >= ksp->max_it) {
262: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
263: break;
264: }
265: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
266: }
267: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
269: for (i = 0; i < dgmres->r; i++) PetscCall(VecViewFromOptions(UU[i], (PetscObject)ksp, "-ksp_dgmres_view_deflation_vecs"));
270: PetscFunctionReturn(PETSC_SUCCESS);
271: }
273: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
274: {
275: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
276: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
277: PetscInt max_neig = dgmres->max_neig;
279: PetscFunctionBegin;
280: if (dgmres->r) {
281: PetscCall(VecDestroyVecs(max_neig, &UU));
282: PetscCall(VecDestroyVecs(max_neig, &MU));
283: if (XX) {
284: PetscCall(VecDestroyVecs(neig1, &XX));
285: PetscCall(VecDestroyVecs(neig1, &MX));
286: }
287: PetscCall(PetscFree(TT));
288: PetscCall(PetscFree(TTF));
289: PetscCall(PetscFree(INVP));
290: PetscCall(PetscFree(XMX));
291: PetscCall(PetscFree(UMX));
292: PetscCall(PetscFree(XMU));
293: PetscCall(PetscFree(X1));
294: PetscCall(PetscFree(X2));
295: PetscCall(PetscFree(dgmres->work));
296: PetscCall(PetscFree(dgmres->iwork));
297: PetscCall(PetscFree(dgmres->wr));
298: PetscCall(PetscFree(dgmres->wi));
299: PetscCall(PetscFree(dgmres->modul));
300: PetscCall(PetscFree(dgmres->Q));
301: PetscCall(PetscFree(ORTH));
302: PetscCall(PetscFree(AUAU));
303: PetscCall(PetscFree(AUU));
304: PetscCall(PetscFree(SR2));
305: }
306: PetscCall(PetscFree(SR));
307: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", NULL));
308: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", NULL));
309: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", NULL));
310: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", NULL));
311: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", NULL));
312: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", NULL));
313: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", NULL));
314: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", NULL));
315: PetscCall(KSPDestroy_GMRES(ksp));
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: /*
320: KSPDGMRESBuildSoln - create the solution from the starting vector and the
321: current iterates.
323: Input parameters:
324: nrs - work area of size it + 1.
325: vs - index of initial guess
326: vdest - index of result. Note that vs may == vdest (replace
327: guess with the solution).
329: This is an internal routine that knows about the GMRES internals.
330: */
331: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs, Vec vs, Vec vdest, KSP ksp, PetscInt it)
332: {
333: PetscScalar tt;
334: PetscInt ii, k, j;
335: KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);
337: /* Solve for solution vector that minimizes the residual */
339: PetscFunctionBegin;
340: /* If it is < 0, no gmres steps have been performed */
341: if (it < 0) {
342: PetscCall(VecCopy(vs, vdest)); /* VecCopy() is smart, exists immediately if vguess == vdest */
343: PetscFunctionReturn(PETSC_SUCCESS);
344: }
345: PetscCheck(*HH(it, it) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %" PetscInt_FMT " GRS(it) = %g", it, (double)PetscAbsScalar(*GRS(it)));
346: if (*HH(it, it) != 0.0) nrs[it] = *GRS(it) / *HH(it, it);
347: else nrs[it] = 0.0;
349: for (ii = 1; ii <= it; ii++) {
350: k = it - ii;
351: tt = *GRS(k);
352: for (j = k + 1; j <= it; j++) tt = tt - *HH(k, j) * nrs[j];
353: PetscCheck(*HH(k, k) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is singular. HH(k,k) is identically zero; it = %" PetscInt_FMT " k = %" PetscInt_FMT, it, k);
354: nrs[k] = tt / *HH(k, k);
355: }
357: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
358: PetscCall(VecSet(VEC_TEMP, 0.0));
359: PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &VEC_VV(0)));
361: /* Apply deflation */
362: if (ksp->pc_side == PC_RIGHT && dgmres->r > 0) {
363: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP));
364: PetscCall(VecCopy(VEC_TEMP_MATOP, VEC_TEMP));
365: }
366: PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP));
368: /* add solution to previous solution */
369: if (vdest != vs) PetscCall(VecCopy(vs, vdest));
370: PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
371: PetscFunctionReturn(PETSC_SUCCESS);
372: }
374: /*
375: Do the scalar work for the orthogonalization. Return new residual norm.
376: */
377: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool hapend, PetscReal *res)
378: {
379: PetscScalar *hh, *cc, *ss, tt;
380: PetscInt j;
381: KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);
383: PetscFunctionBegin;
384: hh = HH(0, it);
385: cc = CC(0);
386: ss = SS(0);
388: /* Apply all the previously computed plane rotations to the new column
389: of the Hessenberg matrix */
390: for (j = 1; j <= it; j++) {
391: tt = *hh;
392: *hh = PetscConj(*cc) * tt + *ss * *(hh + 1);
393: hh++;
394: *hh = *cc++ * *hh - (*ss++ * tt);
395: }
397: /*
398: compute the new plane rotation, and apply it to:
399: 1) the right-hand-side of the Hessenberg system
400: 2) the new column of the Hessenberg matrix
401: thus obtaining the updated value of the residual
402: */
403: if (!hapend) {
404: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh + 1)) * *(hh + 1));
405: if (tt == 0.0) {
406: ksp->reason = KSP_DIVERGED_NULL;
407: PetscFunctionReturn(PETSC_SUCCESS);
408: }
409: *cc = *hh / tt;
410: *ss = *(hh + 1) / tt;
411: *GRS(it + 1) = -(*ss * *GRS(it));
412: *GRS(it) = PetscConj(*cc) * *GRS(it);
413: *hh = PetscConj(*cc) * *hh + *ss * *(hh + 1);
414: *res = PetscAbsScalar(*GRS(it + 1));
415: } else {
416: /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
417: another rotation matrix (so RH doesn't change). The new residual is
418: always the new sine term times the residual from last time (GRS(it)),
419: but now the new sine rotation would be zero...so the residual should
420: be zero...so we will multiply "zero" by the last residual. This might
421: not be exactly what we want to do here -could just return "zero". */
423: *res = 0.0;
424: }
425: PetscFunctionReturn(PETSC_SUCCESS);
426: }
428: /*
429: Allocates more work vectors, starting from VEC_VV(it).
430: */
431: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp, PetscInt it)
432: {
433: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
434: PetscInt nwork = dgmres->nwork_alloc, k, nalloc;
436: PetscFunctionBegin;
437: nalloc = PetscMin(ksp->max_it, dgmres->delta_allocate);
438: /* Adjust the number to allocate to make sure that we don't exceed the
439: number of available slots */
440: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
441: if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
443: dgmres->vv_allocated += nalloc;
445: PetscCall(KSPCreateVecs(ksp, nalloc, &dgmres->user_work[nwork], 0, NULL));
447: dgmres->mwork_alloc[nwork] = nalloc;
448: for (k = 0; k < nalloc; k++) dgmres->vecs[it + VEC_OFFSET + k] = dgmres->user_work[nwork][k];
449: dgmres->nwork_alloc++;
450: PetscFunctionReturn(PETSC_SUCCESS);
451: }
453: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp, Vec ptr, Vec *result)
454: {
455: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
457: PetscFunctionBegin;
458: if (!ptr) {
459: if (!dgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &dgmres->sol_temp));
460: ptr = dgmres->sol_temp;
461: }
462: if (!dgmres->nrs) {
463: /* allocate the work area */
464: PetscCall(PetscMalloc1(dgmres->max_k, &dgmres->nrs));
465: }
466: PetscCall(KSPDGMRESBuildSoln(dgmres->nrs, ksp->vec_sol, ptr, ksp, dgmres->it));
467: if (result) *result = ptr;
468: PetscFunctionReturn(PETSC_SUCCESS);
469: }
471: PetscErrorCode KSPView_DGMRES(KSP ksp, PetscViewer viewer)
472: {
473: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
474: PetscBool iascii, isharmonic;
476: PetscFunctionBegin;
477: PetscCall(KSPView_GMRES(ksp, viewer));
478: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
479: if (iascii) {
480: PetscCall(PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: %s\n", PetscBools[dgmres->force]));
481: PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic));
482: if (isharmonic) {
483: PetscCall(PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %" PetscInt_FMT " using Harmonic Ritz values \n", dgmres->neig));
484: } else {
485: PetscCall(PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %" PetscInt_FMT " using Ritz values \n", dgmres->neig));
486: }
487: PetscCall(PetscViewerASCIIPrintf(viewer, " Total number of extracted eigenvalues = %" PetscInt_FMT "\n", dgmres->r));
488: PetscCall(PetscViewerASCIIPrintf(viewer, " Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", dgmres->max_neig));
489: PetscCall(PetscViewerASCIIPrintf(viewer, " relaxation parameter for the adaptive strategy(smv) = %g\n", (double)dgmres->smv));
490: PetscCall(PetscViewerASCIIPrintf(viewer, " Number of matvecs : %" PetscInt_FMT "\n", dgmres->matvecs));
491: }
492: PetscFunctionReturn(PETSC_SUCCESS);
493: }
495: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp, PetscInt neig)
496: {
497: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
499: PetscFunctionBegin;
500: PetscCheck(neig >= 0 && neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of neig must be positive and less than the restart value ");
501: dgmres->neig = neig;
502: PetscFunctionReturn(PETSC_SUCCESS);
503: }
505: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp, PetscInt max_neig)
506: {
507: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
509: PetscFunctionBegin;
510: PetscCheck(max_neig >= 0 && max_neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of max_neig must be positive and less than the restart value ");
511: dgmres->max_neig = max_neig;
512: PetscFunctionReturn(PETSC_SUCCESS);
513: }
515: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp, PetscReal ratio)
516: {
517: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
519: PetscFunctionBegin;
520: PetscCheck(ratio > 0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The relaxation parameter value must be positive");
521: dgmres->smv = ratio;
522: PetscFunctionReturn(PETSC_SUCCESS);
523: }
525: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp, PetscBool force)
526: {
527: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
529: PetscFunctionBegin;
530: dgmres->force = force;
531: PetscFunctionReturn(PETSC_SUCCESS);
532: }
534: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
535: {
536: PetscInt neig;
537: PetscInt max_neig;
538: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
539: PetscBool flg;
541: PetscFunctionBegin;
542: PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
543: PetscOptionsHeadBegin(PetscOptionsObject, "KSP DGMRES Options");
544: PetscCall(PetscOptionsInt("-ksp_dgmres_eigen", "Number of smallest eigenvalues to extract at each restart", "KSPDGMRESSetEigen", dgmres->neig, &neig, &flg));
545: if (flg) PetscCall(KSPDGMRESSetEigen(ksp, neig));
546: PetscCall(PetscOptionsInt("-ksp_dgmres_max_eigen", "Maximum Number of smallest eigenvalues to extract ", "KSPDGMRESSetMaxEigen", dgmres->max_neig, &max_neig, &flg));
547: if (flg) PetscCall(KSPDGMRESSetMaxEigen(ksp, max_neig));
548: PetscCall(PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &dgmres->smv, NULL));
549: PetscCall(PetscOptionsBool("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &dgmres->improve, NULL));
550: PetscCall(PetscOptionsBool("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &dgmres->force, NULL));
551: PetscOptionsHeadEnd();
552: PetscFunctionReturn(PETSC_SUCCESS);
553: }
555: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
556: {
557: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
558: PetscInt i, j, k;
559: PetscBLASInt nr, bmax;
560: PetscInt r = dgmres->r;
561: PetscInt neig; /* number of eigenvalues to extract at each restart */
562: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
563: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
564: PetscInt N = dgmres->max_k + 1;
565: PetscInt n = dgmres->it + 1;
566: PetscReal alpha;
568: PetscFunctionBegin;
569: PetscCall(PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
570: if (dgmres->neig == 0 || (max_neig < (r + neig1) && !dgmres->improve)) {
571: PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
572: PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: PetscCall(KSPDGMRESComputeSchurForm(ksp, &neig));
576: /* Form the extended Schur vectors X=VV*Sr */
577: if (!XX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &XX));
578: for (j = 0; j < neig; j++) {
579: PetscCall(VecZeroEntries(XX[j]));
580: PetscCall(VecMAXPY(XX[j], n, &SR[j * N], &VEC_VV(0)));
581: }
583: /* Orthogonalize X against U */
584: if (!ORTH) PetscCall(PetscMalloc1(max_neig, &ORTH));
585: if (r > 0) {
586: /* modified Gram-Schmidt */
587: for (j = 0; j < neig; j++) {
588: for (i = 0; i < r; i++) {
589: /* First, compute U'*X[j] */
590: PetscCall(VecDot(XX[j], UU[i], &alpha));
591: /* Then, compute X(j)=X(j)-U*U'*X(j) */
592: PetscCall(VecAXPY(XX[j], -alpha, UU[i]));
593: }
594: }
595: }
596: /* Compute MX = M^{-1}*A*X */
597: if (!MX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &MX));
598: for (j = 0; j < neig; j++) PetscCall(KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP));
599: dgmres->matvecs += neig;
601: if ((r + neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
602: PetscCall(KSPDGMRESImproveEig(ksp, neig));
603: PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
604: PetscFunctionReturn(PETSC_SUCCESS); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
605: }
607: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
608: if (!XMX) PetscCall(PetscMalloc1(neig1 * neig1, &XMX));
609: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, XX, &(XMX[j * neig1])));
611: if (r > 0) {
612: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
613: if (!UMX) PetscCall(PetscMalloc1(max_neig * neig1, &UMX));
614: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, UU, &(UMX[j * max_neig])));
615: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
616: if (!XMU) PetscCall(PetscMalloc1(max_neig * neig1, &XMU));
617: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, XX, &(XMU[j * neig1])));
618: }
620: /* Form the new matrix T = [T UMX; XMU XMX]; */
621: if (!TT) PetscCall(PetscMalloc1(max_neig * max_neig, &TT));
622: if (r > 0) {
623: /* Add XMU to T */
624: for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&(TT[max_neig * j + r]), &(XMU[neig1 * j]), neig));
625: /* Add [UMX; XMX] to T */
626: for (j = 0; j < neig; j++) {
627: k = r + j;
628: PetscCall(PetscArraycpy(&(TT[max_neig * k]), &(UMX[max_neig * j]), r));
629: PetscCall(PetscArraycpy(&(TT[max_neig * k + r]), &(XMX[neig1 * j]), neig));
630: }
631: } else { /* Add XMX to T */
632: for (j = 0; j < neig; j++) PetscCall(PetscArraycpy(&(TT[max_neig * j]), &(XMX[neig1 * j]), neig));
633: }
635: dgmres->r += neig;
636: r = dgmres->r;
637: PetscCall(PetscBLASIntCast(r, &nr));
638: /*LU Factorize T with Lapack xgetrf routine */
640: PetscCall(PetscBLASIntCast(max_neig, &bmax));
641: if (!TTF) PetscCall(PetscMalloc1(bmax * bmax, &TTF));
642: PetscCall(PetscArraycpy(TTF, TT, bmax * r));
643: if (!INVP) PetscCall(PetscMalloc1(bmax, &INVP));
644: {
645: PetscBLASInt info;
646: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
647: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%d", (int)info);
648: }
650: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
651: if (!UU) {
652: PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &UU));
653: PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &MU));
654: }
655: for (j = 0; j < neig; j++) {
656: PetscCall(VecCopy(XX[j], UU[r - neig + j]));
657: PetscCall(VecCopy(MX[j], MU[r - neig + j]));
658: }
659: PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
660: PetscFunctionReturn(PETSC_SUCCESS);
661: }
663: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
664: {
665: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
666: PetscInt N = dgmres->max_k + 1, n = dgmres->it + 1;
667: PetscBLASInt bn;
668: PetscReal *A;
669: PetscBLASInt ihi;
670: PetscBLASInt ldA = 0; /* leading dimension of A */
671: PetscBLASInt ldQ; /* leading dimension of Q */
672: PetscReal *Q; /* orthogonal matrix of (left) Schur vectors */
673: PetscReal *work; /* working vector */
674: PetscBLASInt lwork; /* size of the working vector */
675: PetscInt *perm; /* Permutation vector to sort eigenvalues */
676: PetscInt i, j;
677: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
678: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modulus of the eigenvalues of A */
679: PetscBLASInt *select;
680: PetscBLASInt *iwork;
681: PetscBLASInt liwork;
682: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
683: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
684: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
685: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
687: PetscFunctionBegin;
688: PetscCall(PetscBLASIntCast(n, &bn));
689: PetscCall(PetscBLASIntCast(N, &ldA));
690: ihi = ldQ = bn;
691: PetscCall(PetscBLASIntCast(5 * N, &lwork));
693: #if defined(PETSC_USE_COMPLEX)
694: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
695: #endif
697: PetscCall(PetscMalloc1(ldA * ldA, &A));
698: PetscCall(PetscMalloc1(ldQ * n, &Q));
699: PetscCall(PetscMalloc1(lwork, &work));
700: if (!dgmres->wr) {
701: PetscCall(PetscMalloc1(n, &dgmres->wr));
702: PetscCall(PetscMalloc1(n, &dgmres->wi));
703: }
704: wr = dgmres->wr;
705: wi = dgmres->wi;
706: PetscCall(PetscMalloc1(n, &modul));
707: PetscCall(PetscMalloc1(n, &perm));
708: /* copy the Hessenberg matrix to work space */
709: PetscCall(PetscArraycpy(A, dgmres->hes_origin, ldA * ldA));
710: PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag));
711: if (flag) {
712: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
713: /* Transpose the Hessenberg matrix */
714: PetscCall(PetscMalloc1(bn * bn, &Ht));
715: for (i = 0; i < bn; i++) {
716: for (j = 0; j < bn; j++) Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
717: }
719: /* Solve the system H^T*t = h_{m+1,m}e_m */
720: PetscCall(PetscCalloc1(bn, &t));
721: t[bn - 1] = dgmres->hes_origin[(bn - 1) * ldA + bn]; /* Pick the last element H(m+1,m) */
722: PetscCall(PetscMalloc1(bn, &ipiv));
723: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
724: {
725: PetscBLASInt info;
726: PetscBLASInt nrhs = 1;
727: PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
728: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error while calling the Lapack routine DGESV");
729: }
730: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
731: for (i = 0; i < bn; i++) A[(bn - 1) * bn + i] += t[i];
732: PetscCall(PetscFree(t));
733: PetscCall(PetscFree(Ht));
734: }
735: /* Compute eigenvalues with the Schur form */
736: {
737: PetscBLASInt info = 0;
738: PetscBLASInt ilo = 1;
739: PetscCallBLAS("LAPACKhseqr", LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
740: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XHSEQR %d", (int)info);
741: }
742: PetscCall(PetscFree(work));
744: /* sort the eigenvalues */
745: for (i = 0; i < n; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
746: for (i = 0; i < n; i++) perm[i] = i;
748: PetscCall(PetscSortRealWithPermutation(n, modul, perm));
749: /* save the complex modulus of the largest eigenvalue in magnitude */
750: if (dgmres->lambdaN < modul[perm[n - 1]]) dgmres->lambdaN = modul[perm[n - 1]];
751: /* count the number of extracted eigenvalues (with complex conjugates) */
752: NbrEig = 0;
753: while (NbrEig < dgmres->neig) {
754: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
755: else NbrEig += 1;
756: }
757: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
759: PetscCall(PetscCalloc1(n, &select));
761: if (!dgmres->GreatestEig) {
762: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
763: } else {
764: for (j = 0; j < NbrEig; j++) select[perm[n - j - 1]] = 1;
765: }
766: /* call Lapack dtrsen */
767: lwork = PetscMax(1, 4 * NbrEig * (bn - NbrEig));
768: liwork = PetscMax(1, 2 * NbrEig * (bn - NbrEig));
769: PetscCall(PetscMalloc1(lwork, &work));
770: PetscCall(PetscMalloc1(liwork, &iwork));
771: {
772: PetscBLASInt info = 0;
773: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
774: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
775: PetscCallBLAS("LAPACKtrsen", LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
776: PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
777: }
778: PetscCall(PetscFree(select));
780: /* Extract the Schur vectors */
781: for (j = 0; j < NbrEig; j++) PetscCall(PetscArraycpy(&SR[j * N], &(Q[j * ldQ]), n));
782: *neig = NbrEig;
783: PetscCall(PetscFree(A));
784: PetscCall(PetscFree(work));
785: PetscCall(PetscFree(perm));
786: PetscCall(PetscFree(work));
787: PetscCall(PetscFree(iwork));
788: PetscCall(PetscFree(modul));
789: PetscCall(PetscFree(Q));
790: PetscFunctionReturn(PETSC_SUCCESS);
791: }
793: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
794: {
795: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
796: PetscInt i, r = dgmres->r;
797: PetscReal alpha = 1.0;
798: PetscInt max_neig = dgmres->max_neig;
799: PetscBLASInt br, bmax;
800: PetscReal lambda = dgmres->lambdaN;
802: PetscFunctionBegin;
803: PetscCall(PetscBLASIntCast(r, &br));
804: PetscCall(PetscBLASIntCast(max_neig, &bmax));
805: PetscCall(PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
806: if (!r) {
807: PetscCall(VecCopy(x, y));
808: PetscFunctionReturn(PETSC_SUCCESS);
809: }
810: /* Compute U'*x */
811: if (!X1) {
812: PetscCall(PetscMalloc1(bmax, &X1));
813: PetscCall(PetscMalloc1(bmax, &X2));
814: }
815: PetscCall(VecMDot(x, r, UU, X1));
817: /* Solve T*X1=X2 for X1*/
818: PetscCall(PetscArraycpy(X2, X1, br));
819: {
820: PetscBLASInt info;
821: PetscBLASInt nrhs = 1;
822: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
823: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRS %d", (int)info);
824: }
825: /* Iterative refinement -- is it really necessary ?? */
826: if (!WORK) {
827: PetscCall(PetscMalloc1(3 * bmax, &WORK));
828: PetscCall(PetscMalloc1(bmax, &IWORK));
829: }
830: {
831: PetscBLASInt info;
832: PetscReal berr, ferr;
833: PetscBLASInt nrhs = 1;
834: PetscCallBLAS("LAPACKgerfs", LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax, X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
835: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGERFS %d", (int)info);
836: }
838: for (i = 0; i < r; i++) X2[i] = X1[i] / lambda - X2[i];
840: /* Compute X2=U*X2 */
841: PetscCall(VecZeroEntries(y));
842: PetscCall(VecMAXPY(y, r, X2, UU));
843: PetscCall(VecAXPY(y, alpha, x));
845: PetscCall(PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
846: PetscFunctionReturn(PETSC_SUCCESS);
847: }
849: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
850: {
851: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
852: PetscInt j, r_old, r = dgmres->r;
853: PetscBLASInt i = 0;
854: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
855: PetscInt bmax = dgmres->max_neig;
856: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
857: PetscInt aug1 = bmax + neig1; /* maximum size of the augmented invariant basis */
858: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
859: PetscBLASInt N; /* size of AUAU */
860: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
861: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
862: PetscReal *work; /* working vector */
863: PetscBLASInt lwork; /* size of the working vector */
864: PetscInt *perm; /* Permutation vector to sort eigenvalues */
865: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modulus of the eigenvalues of A*/
866: PetscBLASInt NbrEig = 0, nr, bm;
867: PetscBLASInt *select;
868: PetscBLASInt liwork, *iwork;
870: PetscFunctionBegin;
871: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
872: if (!AUU) {
873: PetscCall(PetscMalloc1(aug1 * aug1, &AUU));
874: PetscCall(PetscMalloc1(aug1 * aug1, &AUAU));
875: }
876: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
877: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
878: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
879: for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], r, MU, &AUU[j * aug1]));
880: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
881: for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], r, MU, &AUU[(r + j) * aug1]));
882: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
883: for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], neig, MX, &AUU[j * aug1 + r]));
884: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
885: for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], neig, MX, &AUU[(r + j) * aug1 + r]));
887: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
888: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
889: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, MU, &AUAU[j * aug1]));
890: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
891: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, MU, &AUAU[(r + j) * aug1]));
892: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
893: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, MX, &AUAU[j * aug1 + r]));
894: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
895: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, MX, &AUAU[(r + j) * aug1 + r]));
897: /* Computation of the eigenvectors */
898: PetscCall(PetscBLASIntCast(aug1, &ldA));
899: PetscCall(PetscBLASIntCast(aug, &N));
900: lwork = 8 * N + 20; /* sizeof the working space */
901: PetscCall(PetscMalloc1(N, &wr));
902: PetscCall(PetscMalloc1(N, &wi));
903: PetscCall(PetscMalloc1(N, &beta));
904: PetscCall(PetscMalloc1(N, &modul));
905: PetscCall(PetscMalloc1(N, &perm));
906: PetscCall(PetscMalloc1(N * N, &Q));
907: PetscCall(PetscMalloc1(N * N, &Z));
908: PetscCall(PetscMalloc1(lwork, &work));
909: {
910: PetscBLASInt info = 0;
911: PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
912: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGGES %d", (int)info);
913: }
914: for (i = 0; i < N; i++) {
915: if (beta[i] != 0.0) {
916: wr[i] /= beta[i];
917: wi[i] /= beta[i];
918: }
919: }
920: /* sort the eigenvalues */
921: for (i = 0; i < N; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
922: for (i = 0; i < N; i++) perm[i] = i;
923: PetscCall(PetscSortRealWithPermutation(N, modul, perm));
924: /* Save the norm of the largest eigenvalue */
925: if (dgmres->lambdaN < modul[perm[N - 1]]) dgmres->lambdaN = modul[perm[N - 1]];
926: /* Allocate space to extract the first r schur vectors */
927: if (!SR2) PetscCall(PetscMalloc1(aug1 * bmax, &SR2));
928: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
929: while (NbrEig < bmax) {
930: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
931: else NbrEig += 2;
932: }
933: if (NbrEig > bmax) NbrEig = bmax - 1;
934: r_old = r; /* previous size of r */
935: dgmres->r = r = NbrEig;
937: /* Select the eigenvalues to reorder */
938: PetscCall(PetscCalloc1(N, &select));
939: if (!dgmres->GreatestEig) {
940: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
941: } else {
942: for (j = 0; j < NbrEig; j++) select[perm[N - j - 1]] = 1;
943: }
944: /* Reorder and extract the new <r> schur vectors */
945: lwork = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig));
946: liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig));
947: PetscCall(PetscFree(work));
948: PetscCall(PetscMalloc1(lwork, &work));
949: PetscCall(PetscMalloc1(liwork, &iwork));
950: {
951: PetscBLASInt info = 0;
952: PetscReal Dif[2];
953: PetscBLASInt ijob = 2;
954: PetscBLASInt wantQ = 1, wantZ = 1;
955: PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
956: PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ill-conditioned problem.");
957: }
958: PetscCall(PetscFree(select));
960: for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&SR2[j * aug1], &(Z[j * N]), N));
962: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
963: -- save it temporarily in MU */
964: for (j = 0; j < r; j++) {
965: PetscCall(VecZeroEntries(MU[j]));
966: PetscCall(VecMAXPY(MU[j], r_old, &SR2[j * aug1], UU));
967: PetscCall(VecMAXPY(MU[j], neig, &SR2[j * aug1 + r_old], XX));
968: }
969: /* Form T = U'*MU*U */
970: for (j = 0; j < r; j++) {
971: PetscCall(VecCopy(MU[j], UU[j]));
972: PetscCall(KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP));
973: }
974: dgmres->matvecs += r;
975: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, UU, &TT[j * bmax]));
976: /* Factorize T */
977: PetscCall(PetscArraycpy(TTF, TT, bmax * r));
978: PetscCall(PetscBLASIntCast(r, &nr));
979: PetscCall(PetscBLASIntCast(bmax, &bm));
980: {
981: PetscBLASInt info;
982: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
983: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%d", (int)info);
984: }
985: /* Free Memory */
986: PetscCall(PetscFree(wr));
987: PetscCall(PetscFree(wi));
988: PetscCall(PetscFree(beta));
989: PetscCall(PetscFree(modul));
990: PetscCall(PetscFree(perm));
991: PetscCall(PetscFree(Q));
992: PetscCall(PetscFree(Z));
993: PetscCall(PetscFree(work));
994: PetscCall(PetscFree(iwork));
995: PetscFunctionReturn(PETSC_SUCCESS);
996: }
998: /*MC
999: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1000: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1001: stagnation occurs.
1003: Options Database Keys:
1004: GMRES Options (inherited):
1005: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1006: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1007: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1008: vectors are allocated as needed)
1009: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1010: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1011: . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
1012: stability of the classical Gram-Schmidt orthogonalization.
1013: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1015: DGMRES Options Database Keys:
1016: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1017: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1018: process
1019: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1020: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1021: parsed by `PetscOptionsGetViewer()`. If neig > 1, viewerspec should
1022: end with ":append". No vectors will be viewed if the adaptive
1023: strategy chooses not to deflate, so -ksp_dgmres_force should also
1024: be given.
1025: The deflation vectors span a subspace that may be a good
1026: approximation of the subspace of smallest eigenvectors of the
1027: preconditioned operator, so this option can aid in understanding
1028: the performance of a preconditioner.
1030: Level: beginner
1032: Note:
1033: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported
1035: References:
1036: + [1] - J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1037: - [2] - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1038: In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1040: Contributed by:
1041: Desire NUENTSA WAKAM, INRIA
1043: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPLGMRES`,
1044: `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
1045: `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
1046: `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
1047: M*/
1049: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1050: {
1051: KSP_DGMRES *dgmres;
1053: PetscFunctionBegin;
1054: PetscCall(PetscNew(&dgmres));
1055: ksp->data = (void *)dgmres;
1057: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
1058: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
1059: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
1061: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1062: ksp->ops->setup = KSPSetUp_DGMRES;
1063: ksp->ops->solve = KSPSolve_DGMRES;
1064: ksp->ops->destroy = KSPDestroy_DGMRES;
1065: ksp->ops->view = KSPView_DGMRES;
1066: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1067: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1068: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1070: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
1071: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
1072: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
1073: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
1074: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
1075: /* -- New functions defined in DGMRES -- */
1076: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
1077: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", KSPDGMRESSetMaxEigen_DGMRES));
1078: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", KSPDGMRESSetRatio_DGMRES));
1079: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", KSPDGMRESForce_DGMRES));
1080: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
1081: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
1082: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));
1083: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES));
1085: PetscCall(PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData));
1086: PetscCall(PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation));
1088: dgmres->haptol = 1.0e-30;
1089: dgmres->q_preallocate = 0;
1090: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1091: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1092: dgmres->nrs = NULL;
1093: dgmres->sol_temp = NULL;
1094: dgmres->max_k = GMRES_DEFAULT_MAXK;
1095: dgmres->Rsvd = NULL;
1096: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1097: dgmres->orthogwork = NULL;
1099: /* Default values for the deflation */
1100: dgmres->r = 0;
1101: dgmres->neig = DGMRES_DEFAULT_EIG;
1102: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG - 1;
1103: dgmres->lambdaN = 0.0;
1104: dgmres->smv = SMV;
1105: dgmres->matvecs = 0;
1106: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1107: dgmres->HasSchur = PETSC_FALSE;
1108: PetscFunctionReturn(PETSC_SUCCESS);
1109: }