Actual source code: dgmres.c

  1: /*
  2:     Implements deflated GMRES.
  3: */

  5: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>

  7: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;

  9: #define GMRES_DELTA_DIRECTIONS 10
 10: #define GMRES_DEFAULT_MAXK     30
 11: static PetscErrorCode KSPDGMRESGetNewVectors(KSP, PetscInt);
 12: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP, PetscInt, PetscBool, PetscReal *);
 13: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);

 15: PetscErrorCode KSPDGMRESSetEigen(KSP ksp, PetscInt nb_eig)
 16: {
 17:   PetscFunctionBegin;
 18:   PetscTryMethod((ksp), "KSPDGMRESSetEigen_C", (KSP, PetscInt), (ksp, nb_eig));
 19:   PetscFunctionReturn(PETSC_SUCCESS);
 20: }
 21: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp, PetscInt max_neig)
 22: {
 23:   PetscFunctionBegin;
 24:   PetscTryMethod((ksp), "KSPDGMRESSetMaxEigen_C", (KSP, PetscInt), (ksp, max_neig));
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }
 27: PetscErrorCode KSPDGMRESForce(KSP ksp, PetscBool force)
 28: {
 29:   PetscFunctionBegin;
 30:   PetscTryMethod((ksp), "KSPDGMRESForce_C", (KSP, PetscBool), (ksp, force));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }
 33: PetscErrorCode KSPDGMRESSetRatio(KSP ksp, PetscReal ratio)
 34: {
 35:   PetscFunctionBegin;
 36:   PetscTryMethod((ksp), "KSPDGMRESSetRatio_C", (KSP, PetscReal), (ksp, ratio));
 37:   PetscFunctionReturn(PETSC_SUCCESS);
 38: }
 39: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp, PetscInt *neig)
 40: {
 41:   PetscFunctionBegin;
 42:   PetscUseMethod((ksp), "KSPDGMRESComputeSchurForm_C", (KSP, PetscInt *), (ksp, neig));
 43:   PetscFunctionReturn(PETSC_SUCCESS);
 44: }
 45: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp, PetscInt *curneigh)
 46: {
 47:   PetscFunctionBegin;
 48:   PetscUseMethod((ksp), "KSPDGMRESComputeDeflationData_C", (KSP, PetscInt *), (ksp, curneigh));
 49:   PetscFunctionReturn(PETSC_SUCCESS);
 50: }
 51: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
 52: {
 53:   PetscFunctionBegin;
 54:   PetscUseMethod((ksp), "KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
 59: {
 60:   PetscFunctionBegin;
 61:   PetscUseMethod((ksp), "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
 62:   PetscFunctionReturn(PETSC_SUCCESS);
 63: }

 65: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
 66: {
 67:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
 68:   PetscInt    neig   = dgmres->neig + EIG_OFFSET;
 69:   PetscInt    max_k  = dgmres->max_k + 1;

 71:   PetscFunctionBegin;
 72:   PetscCall(KSPSetUp_GMRES(ksp));
 73:   if (!dgmres->neig) PetscFunctionReturn(PETSC_SUCCESS);

 75:   /* Allocate workspace for the Schur vectors*/
 76:   PetscCall(PetscMalloc1(neig * max_k, &SR));
 77:   dgmres->wr    = NULL;
 78:   dgmres->wi    = NULL;
 79:   dgmres->perm  = NULL;
 80:   dgmres->modul = NULL;
 81:   dgmres->Q     = NULL;
 82:   dgmres->Z     = NULL;

 84:   UU   = NULL;
 85:   XX   = NULL;
 86:   MX   = NULL;
 87:   AUU  = NULL;
 88:   XMX  = NULL;
 89:   XMU  = NULL;
 90:   UMX  = NULL;
 91:   AUAU = NULL;
 92:   TT   = NULL;
 93:   TTF  = NULL;
 94:   INVP = NULL;
 95:   X1   = NULL;
 96:   X2   = NULL;
 97:   MU   = NULL;
 98:   PetscFunctionReturn(PETSC_SUCCESS);
 99: }

101: /*
102:  Run GMRES, possibly with restart.  Return residual history if requested.
103:  input parameters:

105:  .       gmres  - structure containing parameters and work areas

107:  output parameters:
108:  .        nres    - residuals (from preconditioned system) at each step.
109:  If restarting, consider passing nres+it.  If null,
110:  ignored
111:  .        itcount - number of iterations used.  nres[0] to nres[itcount]
112:  are defined.  If null, ignored.

114:  Notes:
115:  On entry, the value in vector VEC_VV(0) should be the initial residual
116:  (this allows shortcuts where the initial preconditioned residual is 0).
117:  */
118: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount, KSP ksp)
119: {
120:   KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);
121:   PetscReal   res_norm, res, hapbnd, tt;
122:   PetscInt    it     = 0;
123:   PetscInt    max_k  = dgmres->max_k;
124:   PetscBool   hapend = PETSC_FALSE;
125:   PetscReal   res_old;
126:   PetscInt    test = 0;

128:   PetscFunctionBegin;
129:   PetscCall(VecNormalize(VEC_VV(0), &res_norm));
130:   KSPCheckNorm(ksp, res_norm);
131:   res     = res_norm;
132:   *GRS(0) = res_norm;

134:   /* check for the convergence */
135:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
136:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
137:   else ksp->rnorm = 0.0;
138:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
139:   dgmres->it = (it - 1);
140:   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
141:   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
142:   if (!res) {
143:     if (itcount) *itcount = 0;
144:     ksp->reason = KSP_CONVERGED_ATOL;
145:     PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
146:     PetscFunctionReturn(PETSC_SUCCESS);
147:   }
148:   /* record the residual norm to test if deflation is needed */
149:   res_old = res;

151:   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
152:   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
153:     if (it) {
154:       PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
155:       PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
156:     }
157:     dgmres->it = (it - 1);
158:     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPDGMRESGetNewVectors(ksp, it + 1));
159:     if (dgmres->r > 0) {
160:       if (ksp->pc_side == PC_LEFT) {
161:         /* Apply the first preconditioner */
162:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_TEMP, VEC_TEMP_MATOP));
163:         /* Then apply Deflation as a preconditioner */
164:         PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1 + it)));
165:       } else if (ksp->pc_side == PC_RIGHT) {
166:         PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP));
167:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1 + it), VEC_TEMP_MATOP));
168:       }
169:     } else {
170:       PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_VV(1 + it), VEC_TEMP_MATOP));
171:     }
172:     dgmres->matvecs += 1;
173:     /* update hessenberg matrix and do Gram-Schmidt */
174:     PetscCall((*dgmres->orthog)(ksp, it));

176:     /* vv(i+1) . vv(i+1) */
177:     PetscCall(VecNormalize(VEC_VV(it + 1), &tt));
178:     /* save the magnitude */
179:     *HH(it + 1, it)  = tt;
180:     *HES(it + 1, it) = tt;

182:     /* check for the happy breakdown */
183:     hapbnd = PetscAbsScalar(tt / *GRS(it));
184:     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
185:     if (tt < hapbnd) {
186:       PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %g tt = %g\n", (double)hapbnd, (double)tt));
187:       hapend = PETSC_TRUE;
188:     }
189:     PetscCall(KSPDGMRESUpdateHessenberg(ksp, it, hapend, &res));

191:     it++;
192:     dgmres->it = (it - 1); /* For converged */
193:     ksp->its++;
194:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
195:     else ksp->rnorm = 0.0;
196:     if (ksp->reason) break;

198:     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));

200:     /* Catch error in happy breakdown and signal convergence and break from loop */
201:     if (hapend) {
202:       if (!ksp->reason) {
203:         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res);
204:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
205:         break;
206:       }
207:     }
208:   }

210:   /* Monitor if we know that we will not return for a restart */
211:   if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
212:     PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
213:     PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
214:   }
215:   if (itcount) *itcount = it;

217:   /*
218:    Down here we have to solve for the "best" coefficients of the Krylov
219:    columns, add the solution values together, and possibly unwind the
220:    preconditioning from the solution
221:    */
222:   /* Form the solution (or the solution so far) */
223:   PetscCall(KSPDGMRESBuildSoln(GRS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 1));

225:   /* Compute data for the deflation to be used during the next restart */
226:   if (!ksp->reason && ksp->its < ksp->max_it) {
227:     test = max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old);
228:     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
229:     if ((test > dgmres->smv * (ksp->max_it - ksp->its)) || dgmres->force) PetscCall(KSPDGMRESComputeDeflationData(ksp, NULL));
230:   }
231:   PetscFunctionReturn(PETSC_SUCCESS);
232: }

234: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
235: {
236:   PetscInt    i, its, itcount;
237:   KSP_DGMRES *dgmres     = (KSP_DGMRES *)ksp->data;
238:   PetscBool   guess_zero = ksp->guess_zero;

240:   PetscFunctionBegin;
241:   PetscCheck(!ksp->calc_sings || dgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");

243:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
244:   ksp->its        = 0;
245:   dgmres->matvecs = 0;
246:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));

248:   itcount = 0;
249:   while (!ksp->reason) {
250:     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
251:     if (ksp->pc_side == PC_LEFT) {
252:       dgmres->matvecs += 1;
253:       if (dgmres->r > 0) {
254:         PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP));
255:         PetscCall(VecCopy(VEC_TEMP, VEC_VV(0)));
256:       }
257:     }

259:     PetscCall(KSPDGMRESCycle(&its, ksp));
260:     itcount += its;
261:     if (itcount >= ksp->max_it) {
262:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
263:       break;
264:     }
265:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
266:   }
267:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */

269:   for (i = 0; i < dgmres->r; i++) PetscCall(VecViewFromOptions(UU[i], (PetscObject)ksp, "-ksp_dgmres_view_deflation_vecs"));
270:   PetscFunctionReturn(PETSC_SUCCESS);
271: }

273: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
274: {
275:   KSP_DGMRES *dgmres   = (KSP_DGMRES *)ksp->data;
276:   PetscInt    neig1    = dgmres->neig + EIG_OFFSET;
277:   PetscInt    max_neig = dgmres->max_neig;

279:   PetscFunctionBegin;
280:   if (dgmres->r) {
281:     PetscCall(VecDestroyVecs(max_neig, &UU));
282:     PetscCall(VecDestroyVecs(max_neig, &MU));
283:     if (XX) {
284:       PetscCall(VecDestroyVecs(neig1, &XX));
285:       PetscCall(VecDestroyVecs(neig1, &MX));
286:     }
287:     PetscCall(PetscFree(TT));
288:     PetscCall(PetscFree(TTF));
289:     PetscCall(PetscFree(INVP));
290:     PetscCall(PetscFree(XMX));
291:     PetscCall(PetscFree(UMX));
292:     PetscCall(PetscFree(XMU));
293:     PetscCall(PetscFree(X1));
294:     PetscCall(PetscFree(X2));
295:     PetscCall(PetscFree(dgmres->work));
296:     PetscCall(PetscFree(dgmres->iwork));
297:     PetscCall(PetscFree(dgmres->wr));
298:     PetscCall(PetscFree(dgmres->wi));
299:     PetscCall(PetscFree(dgmres->modul));
300:     PetscCall(PetscFree(dgmres->Q));
301:     PetscCall(PetscFree(ORTH));
302:     PetscCall(PetscFree(AUAU));
303:     PetscCall(PetscFree(AUU));
304:     PetscCall(PetscFree(SR2));
305:   }
306:   PetscCall(PetscFree(SR));
307:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", NULL));
308:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", NULL));
309:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", NULL));
310:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", NULL));
311:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", NULL));
312:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", NULL));
313:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", NULL));
314:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", NULL));
315:   PetscCall(KSPDestroy_GMRES(ksp));
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: /*
320:  KSPDGMRESBuildSoln - create the solution from the starting vector and the
321:  current iterates.

323:  Input parameters:
324:  nrs - work area of size it + 1.
325:  vs  - index of initial guess
326:  vdest - index of result.  Note that vs may == vdest (replace
327:  guess with the solution).

329:  This is an internal routine that knows about the GMRES internals.
330:  */
331: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs, Vec vs, Vec vdest, KSP ksp, PetscInt it)
332: {
333:   PetscScalar tt;
334:   PetscInt    ii, k, j;
335:   KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);

337:   /* Solve for solution vector that minimizes the residual */

339:   PetscFunctionBegin;
340:   /* If it is < 0, no gmres steps have been performed */
341:   if (it < 0) {
342:     PetscCall(VecCopy(vs, vdest)); /* VecCopy() is smart, exists immediately if vguess == vdest */
343:     PetscFunctionReturn(PETSC_SUCCESS);
344:   }
345:   PetscCheck(*HH(it, it) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %" PetscInt_FMT " GRS(it) = %g", it, (double)PetscAbsScalar(*GRS(it)));
346:   if (*HH(it, it) != 0.0) nrs[it] = *GRS(it) / *HH(it, it);
347:   else nrs[it] = 0.0;

349:   for (ii = 1; ii <= it; ii++) {
350:     k  = it - ii;
351:     tt = *GRS(k);
352:     for (j = k + 1; j <= it; j++) tt = tt - *HH(k, j) * nrs[j];
353:     PetscCheck(*HH(k, k) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is singular. HH(k,k) is identically zero; it = %" PetscInt_FMT " k = %" PetscInt_FMT, it, k);
354:     nrs[k] = tt / *HH(k, k);
355:   }

357:   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
358:   PetscCall(VecSet(VEC_TEMP, 0.0));
359:   PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &VEC_VV(0)));

361:   /* Apply deflation */
362:   if (ksp->pc_side == PC_RIGHT && dgmres->r > 0) {
363:     PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP));
364:     PetscCall(VecCopy(VEC_TEMP_MATOP, VEC_TEMP));
365:   }
366:   PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP));

368:   /* add solution to previous solution */
369:   if (vdest != vs) PetscCall(VecCopy(vs, vdest));
370:   PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
371:   PetscFunctionReturn(PETSC_SUCCESS);
372: }

374: /*
375:  Do the scalar work for the orthogonalization.  Return new residual norm.
376:  */
377: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool hapend, PetscReal *res)
378: {
379:   PetscScalar *hh, *cc, *ss, tt;
380:   PetscInt     j;
381:   KSP_DGMRES  *dgmres = (KSP_DGMRES *)(ksp->data);

383:   PetscFunctionBegin;
384:   hh = HH(0, it);
385:   cc = CC(0);
386:   ss = SS(0);

388:   /* Apply all the previously computed plane rotations to the new column
389:    of the Hessenberg matrix */
390:   for (j = 1; j <= it; j++) {
391:     tt  = *hh;
392:     *hh = PetscConj(*cc) * tt + *ss * *(hh + 1);
393:     hh++;
394:     *hh = *cc++ * *hh - (*ss++ * tt);
395:   }

397:   /*
398:    compute the new plane rotation, and apply it to:
399:    1) the right-hand-side of the Hessenberg system
400:    2) the new column of the Hessenberg matrix
401:    thus obtaining the updated value of the residual
402:    */
403:   if (!hapend) {
404:     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh + 1)) * *(hh + 1));
405:     if (tt == 0.0) {
406:       ksp->reason = KSP_DIVERGED_NULL;
407:       PetscFunctionReturn(PETSC_SUCCESS);
408:     }
409:     *cc          = *hh / tt;
410:     *ss          = *(hh + 1) / tt;
411:     *GRS(it + 1) = -(*ss * *GRS(it));
412:     *GRS(it)     = PetscConj(*cc) * *GRS(it);
413:     *hh          = PetscConj(*cc) * *hh + *ss * *(hh + 1);
414:     *res         = PetscAbsScalar(*GRS(it + 1));
415:   } else {
416:     /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
417:      another rotation matrix (so RH doesn't change).  The new residual is
418:      always the new sine term times the residual from last time (GRS(it)),
419:      but now the new sine rotation would be zero...so the residual should
420:      be zero...so we will multiply "zero" by the last residual.  This might
421:      not be exactly what we want to do here -could just return "zero". */

423:     *res = 0.0;
424:   }
425:   PetscFunctionReturn(PETSC_SUCCESS);
426: }

428: /*
429:   Allocates more work vectors, starting from VEC_VV(it).
430:  */
431: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp, PetscInt it)
432: {
433:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
434:   PetscInt    nwork  = dgmres->nwork_alloc, k, nalloc;

436:   PetscFunctionBegin;
437:   nalloc = PetscMin(ksp->max_it, dgmres->delta_allocate);
438:   /* Adjust the number to allocate to make sure that we don't exceed the
439:    number of available slots */
440:   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
441:   if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);

443:   dgmres->vv_allocated += nalloc;

445:   PetscCall(KSPCreateVecs(ksp, nalloc, &dgmres->user_work[nwork], 0, NULL));

447:   dgmres->mwork_alloc[nwork] = nalloc;
448:   for (k = 0; k < nalloc; k++) dgmres->vecs[it + VEC_OFFSET + k] = dgmres->user_work[nwork][k];
449:   dgmres->nwork_alloc++;
450:   PetscFunctionReturn(PETSC_SUCCESS);
451: }

453: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp, Vec ptr, Vec *result)
454: {
455:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

457:   PetscFunctionBegin;
458:   if (!ptr) {
459:     if (!dgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &dgmres->sol_temp));
460:     ptr = dgmres->sol_temp;
461:   }
462:   if (!dgmres->nrs) {
463:     /* allocate the work area */
464:     PetscCall(PetscMalloc1(dgmres->max_k, &dgmres->nrs));
465:   }
466:   PetscCall(KSPDGMRESBuildSoln(dgmres->nrs, ksp->vec_sol, ptr, ksp, dgmres->it));
467:   if (result) *result = ptr;
468:   PetscFunctionReturn(PETSC_SUCCESS);
469: }

471: PetscErrorCode KSPView_DGMRES(KSP ksp, PetscViewer viewer)
472: {
473:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
474:   PetscBool   iascii, isharmonic;

476:   PetscFunctionBegin;
477:   PetscCall(KSPView_GMRES(ksp, viewer));
478:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
479:   if (iascii) {
480:     PetscCall(PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: %s\n", PetscBools[dgmres->force]));
481:     PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic));
482:     if (isharmonic) {
483:       PetscCall(PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %" PetscInt_FMT " using Harmonic Ritz values \n", dgmres->neig));
484:     } else {
485:       PetscCall(PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %" PetscInt_FMT " using Ritz values \n", dgmres->neig));
486:     }
487:     PetscCall(PetscViewerASCIIPrintf(viewer, "   Total number of extracted eigenvalues = %" PetscInt_FMT "\n", dgmres->r));
488:     PetscCall(PetscViewerASCIIPrintf(viewer, "   Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", dgmres->max_neig));
489:     PetscCall(PetscViewerASCIIPrintf(viewer, "   relaxation parameter for the adaptive strategy(smv)  = %g\n", (double)dgmres->smv));
490:     PetscCall(PetscViewerASCIIPrintf(viewer, "   Number of matvecs : %" PetscInt_FMT "\n", dgmres->matvecs));
491:   }
492:   PetscFunctionReturn(PETSC_SUCCESS);
493: }

495: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp, PetscInt neig)
496: {
497:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

499:   PetscFunctionBegin;
500:   PetscCheck(neig >= 0 && neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of neig must be positive and less than the restart value ");
501:   dgmres->neig = neig;
502:   PetscFunctionReturn(PETSC_SUCCESS);
503: }

505: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp, PetscInt max_neig)
506: {
507:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

509:   PetscFunctionBegin;
510:   PetscCheck(max_neig >= 0 && max_neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of max_neig must be positive and less than the restart value ");
511:   dgmres->max_neig = max_neig;
512:   PetscFunctionReturn(PETSC_SUCCESS);
513: }

515: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp, PetscReal ratio)
516: {
517:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

519:   PetscFunctionBegin;
520:   PetscCheck(ratio > 0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The relaxation parameter value must be positive");
521:   dgmres->smv = ratio;
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp, PetscBool force)
526: {
527:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

529:   PetscFunctionBegin;
530:   dgmres->force = force;
531:   PetscFunctionReturn(PETSC_SUCCESS);
532: }

534: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
535: {
536:   PetscInt    neig;
537:   PetscInt    max_neig;
538:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
539:   PetscBool   flg;

541:   PetscFunctionBegin;
542:   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
543:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP DGMRES Options");
544:   PetscCall(PetscOptionsInt("-ksp_dgmres_eigen", "Number of smallest eigenvalues to extract at each restart", "KSPDGMRESSetEigen", dgmres->neig, &neig, &flg));
545:   if (flg) PetscCall(KSPDGMRESSetEigen(ksp, neig));
546:   PetscCall(PetscOptionsInt("-ksp_dgmres_max_eigen", "Maximum Number of smallest eigenvalues to extract ", "KSPDGMRESSetMaxEigen", dgmres->max_neig, &max_neig, &flg));
547:   if (flg) PetscCall(KSPDGMRESSetMaxEigen(ksp, max_neig));
548:   PetscCall(PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &dgmres->smv, NULL));
549:   PetscCall(PetscOptionsBool("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &dgmres->improve, NULL));
550:   PetscCall(PetscOptionsBool("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &dgmres->force, NULL));
551:   PetscOptionsHeadEnd();
552:   PetscFunctionReturn(PETSC_SUCCESS);
553: }

555: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
556: {
557:   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
558:   PetscInt     i, j, k;
559:   PetscBLASInt nr, bmax;
560:   PetscInt     r = dgmres->r;
561:   PetscInt     neig;                                 /* number of eigenvalues to extract at each restart */
562:   PetscInt     neig1    = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
563:   PetscInt     max_neig = dgmres->max_neig;          /* Max number of eigenvalues to extract during the iterative process */
564:   PetscInt     N        = dgmres->max_k + 1;
565:   PetscInt     n        = dgmres->it + 1;
566:   PetscReal    alpha;

568:   PetscFunctionBegin;
569:   PetscCall(PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
570:   if (dgmres->neig == 0 || (max_neig < (r + neig1) && !dgmres->improve)) {
571:     PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
572:     PetscFunctionReturn(PETSC_SUCCESS);
573:   }

575:   PetscCall(KSPDGMRESComputeSchurForm(ksp, &neig));
576:   /* Form the extended Schur vectors X=VV*Sr */
577:   if (!XX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &XX));
578:   for (j = 0; j < neig; j++) {
579:     PetscCall(VecZeroEntries(XX[j]));
580:     PetscCall(VecMAXPY(XX[j], n, &SR[j * N], &VEC_VV(0)));
581:   }

583:   /* Orthogonalize X against U */
584:   if (!ORTH) PetscCall(PetscMalloc1(max_neig, &ORTH));
585:   if (r > 0) {
586:     /* modified Gram-Schmidt */
587:     for (j = 0; j < neig; j++) {
588:       for (i = 0; i < r; i++) {
589:         /* First, compute U'*X[j] */
590:         PetscCall(VecDot(XX[j], UU[i], &alpha));
591:         /* Then, compute X(j)=X(j)-U*U'*X(j) */
592:         PetscCall(VecAXPY(XX[j], -alpha, UU[i]));
593:       }
594:     }
595:   }
596:   /* Compute MX = M^{-1}*A*X */
597:   if (!MX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &MX));
598:   for (j = 0; j < neig; j++) PetscCall(KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP));
599:   dgmres->matvecs += neig;

601:   if ((r + neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
602:     PetscCall(KSPDGMRESImproveEig(ksp, neig));
603:     PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
604:     PetscFunctionReturn(PETSC_SUCCESS); /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
605:   }

607:   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
608:   if (!XMX) PetscCall(PetscMalloc1(neig1 * neig1, &XMX));
609:   for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, XX, &(XMX[j * neig1])));

611:   if (r > 0) {
612:     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
613:     if (!UMX) PetscCall(PetscMalloc1(max_neig * neig1, &UMX));
614:     for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, UU, &(UMX[j * max_neig])));
615:     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
616:     if (!XMU) PetscCall(PetscMalloc1(max_neig * neig1, &XMU));
617:     for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, XX, &(XMU[j * neig1])));
618:   }

620:   /* Form the new matrix T = [T UMX; XMU XMX]; */
621:   if (!TT) PetscCall(PetscMalloc1(max_neig * max_neig, &TT));
622:   if (r > 0) {
623:     /* Add XMU to T */
624:     for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&(TT[max_neig * j + r]), &(XMU[neig1 * j]), neig));
625:     /* Add [UMX; XMX] to T */
626:     for (j = 0; j < neig; j++) {
627:       k = r + j;
628:       PetscCall(PetscArraycpy(&(TT[max_neig * k]), &(UMX[max_neig * j]), r));
629:       PetscCall(PetscArraycpy(&(TT[max_neig * k + r]), &(XMX[neig1 * j]), neig));
630:     }
631:   } else { /* Add XMX to T */
632:     for (j = 0; j < neig; j++) PetscCall(PetscArraycpy(&(TT[max_neig * j]), &(XMX[neig1 * j]), neig));
633:   }

635:   dgmres->r += neig;
636:   r = dgmres->r;
637:   PetscCall(PetscBLASIntCast(r, &nr));
638:   /*LU Factorize T with Lapack xgetrf routine */

640:   PetscCall(PetscBLASIntCast(max_neig, &bmax));
641:   if (!TTF) PetscCall(PetscMalloc1(bmax * bmax, &TTF));
642:   PetscCall(PetscArraycpy(TTF, TT, bmax * r));
643:   if (!INVP) PetscCall(PetscMalloc1(bmax, &INVP));
644:   {
645:     PetscBLASInt info;
646:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
647:     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%d", (int)info);
648:   }

650:   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
651:   if (!UU) {
652:     PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &UU));
653:     PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &MU));
654:   }
655:   for (j = 0; j < neig; j++) {
656:     PetscCall(VecCopy(XX[j], UU[r - neig + j]));
657:     PetscCall(VecCopy(MX[j], MU[r - neig + j]));
658:   }
659:   PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
660:   PetscFunctionReturn(PETSC_SUCCESS);
661: }

663: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
664: {
665:   KSP_DGMRES   *dgmres = (KSP_DGMRES *)ksp->data;
666:   PetscInt      N = dgmres->max_k + 1, n = dgmres->it + 1;
667:   PetscBLASInt  bn;
668:   PetscReal    *A;
669:   PetscBLASInt  ihi;
670:   PetscBLASInt  ldA = 0; /* leading dimension of A */
671:   PetscBLASInt  ldQ;     /* leading dimension of Q */
672:   PetscReal    *Q;       /*  orthogonal matrix of  (left) Schur vectors */
673:   PetscReal    *work;    /* working vector */
674:   PetscBLASInt  lwork;   /* size of the working vector */
675:   PetscInt     *perm;    /* Permutation vector to sort eigenvalues */
676:   PetscInt      i, j;
677:   PetscBLASInt  NbrEig;          /* Number of eigenvalues really extracted */
678:   PetscReal    *wr, *wi, *modul; /* Real and imaginary part and modulus of the eigenvalues of A */
679:   PetscBLASInt *select;
680:   PetscBLASInt *iwork;
681:   PetscBLASInt  liwork;
682:   PetscScalar  *Ht;   /* Transpose of the Hessenberg matrix */
683:   PetscScalar  *t;    /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
684:   PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
685:   PetscBool     flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */

687:   PetscFunctionBegin;
688:   PetscCall(PetscBLASIntCast(n, &bn));
689:   PetscCall(PetscBLASIntCast(N, &ldA));
690:   ihi = ldQ = bn;
691:   PetscCall(PetscBLASIntCast(5 * N, &lwork));

693: #if defined(PETSC_USE_COMPLEX)
694:   SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
695: #endif

697:   PetscCall(PetscMalloc1(ldA * ldA, &A));
698:   PetscCall(PetscMalloc1(ldQ * n, &Q));
699:   PetscCall(PetscMalloc1(lwork, &work));
700:   if (!dgmres->wr) {
701:     PetscCall(PetscMalloc1(n, &dgmres->wr));
702:     PetscCall(PetscMalloc1(n, &dgmres->wi));
703:   }
704:   wr = dgmres->wr;
705:   wi = dgmres->wi;
706:   PetscCall(PetscMalloc1(n, &modul));
707:   PetscCall(PetscMalloc1(n, &perm));
708:   /* copy the Hessenberg matrix to work space */
709:   PetscCall(PetscArraycpy(A, dgmres->hes_origin, ldA * ldA));
710:   PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag));
711:   if (flag) {
712:     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
713:     /* Transpose the Hessenberg matrix */
714:     PetscCall(PetscMalloc1(bn * bn, &Ht));
715:     for (i = 0; i < bn; i++) {
716:       for (j = 0; j < bn; j++) Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
717:     }

719:     /* Solve the system H^T*t = h_{m+1,m}e_m */
720:     PetscCall(PetscCalloc1(bn, &t));
721:     t[bn - 1] = dgmres->hes_origin[(bn - 1) * ldA + bn]; /* Pick the last element H(m+1,m) */
722:     PetscCall(PetscMalloc1(bn, &ipiv));
723:     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
724:     {
725:       PetscBLASInt info;
726:       PetscBLASInt nrhs = 1;
727:       PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
728:       PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error while calling the Lapack routine DGESV");
729:     }
730:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
731:     for (i = 0; i < bn; i++) A[(bn - 1) * bn + i] += t[i];
732:     PetscCall(PetscFree(t));
733:     PetscCall(PetscFree(Ht));
734:   }
735:   /* Compute eigenvalues with the Schur form */
736:   {
737:     PetscBLASInt info = 0;
738:     PetscBLASInt ilo  = 1;
739:     PetscCallBLAS("LAPACKhseqr", LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
740:     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XHSEQR %d", (int)info);
741:   }
742:   PetscCall(PetscFree(work));

744:   /* sort the eigenvalues */
745:   for (i = 0; i < n; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
746:   for (i = 0; i < n; i++) perm[i] = i;

748:   PetscCall(PetscSortRealWithPermutation(n, modul, perm));
749:   /* save the complex modulus of the largest eigenvalue in magnitude */
750:   if (dgmres->lambdaN < modul[perm[n - 1]]) dgmres->lambdaN = modul[perm[n - 1]];
751:   /* count the number of extracted eigenvalues (with complex conjugates) */
752:   NbrEig = 0;
753:   while (NbrEig < dgmres->neig) {
754:     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
755:     else NbrEig += 1;
756:   }
757:   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

759:   PetscCall(PetscCalloc1(n, &select));

761:   if (!dgmres->GreatestEig) {
762:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
763:   } else {
764:     for (j = 0; j < NbrEig; j++) select[perm[n - j - 1]] = 1;
765:   }
766:   /* call Lapack dtrsen */
767:   lwork  = PetscMax(1, 4 * NbrEig * (bn - NbrEig));
768:   liwork = PetscMax(1, 2 * NbrEig * (bn - NbrEig));
769:   PetscCall(PetscMalloc1(lwork, &work));
770:   PetscCall(PetscMalloc1(liwork, &iwork));
771:   {
772:     PetscBLASInt info = 0;
773:     PetscReal    CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
774:     PetscReal    CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
775:     PetscCallBLAS("LAPACKtrsen", LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
776:     PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
777:   }
778:   PetscCall(PetscFree(select));

780:   /* Extract the Schur vectors */
781:   for (j = 0; j < NbrEig; j++) PetscCall(PetscArraycpy(&SR[j * N], &(Q[j * ldQ]), n));
782:   *neig = NbrEig;
783:   PetscCall(PetscFree(A));
784:   PetscCall(PetscFree(work));
785:   PetscCall(PetscFree(perm));
786:   PetscCall(PetscFree(work));
787:   PetscCall(PetscFree(iwork));
788:   PetscCall(PetscFree(modul));
789:   PetscCall(PetscFree(Q));
790:   PetscFunctionReturn(PETSC_SUCCESS);
791: }

793: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
794: {
795:   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
796:   PetscInt     i, r = dgmres->r;
797:   PetscReal    alpha    = 1.0;
798:   PetscInt     max_neig = dgmres->max_neig;
799:   PetscBLASInt br, bmax;
800:   PetscReal    lambda = dgmres->lambdaN;

802:   PetscFunctionBegin;
803:   PetscCall(PetscBLASIntCast(r, &br));
804:   PetscCall(PetscBLASIntCast(max_neig, &bmax));
805:   PetscCall(PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
806:   if (!r) {
807:     PetscCall(VecCopy(x, y));
808:     PetscFunctionReturn(PETSC_SUCCESS);
809:   }
810:   /* Compute U'*x */
811:   if (!X1) {
812:     PetscCall(PetscMalloc1(bmax, &X1));
813:     PetscCall(PetscMalloc1(bmax, &X2));
814:   }
815:   PetscCall(VecMDot(x, r, UU, X1));

817:   /* Solve T*X1=X2 for X1*/
818:   PetscCall(PetscArraycpy(X2, X1, br));
819:   {
820:     PetscBLASInt info;
821:     PetscBLASInt nrhs = 1;
822:     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
823:     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRS %d", (int)info);
824:   }
825:   /* Iterative refinement -- is it really necessary ?? */
826:   if (!WORK) {
827:     PetscCall(PetscMalloc1(3 * bmax, &WORK));
828:     PetscCall(PetscMalloc1(bmax, &IWORK));
829:   }
830:   {
831:     PetscBLASInt info;
832:     PetscReal    berr, ferr;
833:     PetscBLASInt nrhs = 1;
834:     PetscCallBLAS("LAPACKgerfs", LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax, X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
835:     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGERFS %d", (int)info);
836:   }

838:   for (i = 0; i < r; i++) X2[i] = X1[i] / lambda - X2[i];

840:   /* Compute X2=U*X2 */
841:   PetscCall(VecZeroEntries(y));
842:   PetscCall(VecMAXPY(y, r, X2, UU));
843:   PetscCall(VecAXPY(y, alpha, x));

845:   PetscCall(PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
846:   PetscFunctionReturn(PETSC_SUCCESS);
847: }

849: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
850: {
851:   KSP_DGMRES   *dgmres = (KSP_DGMRES *)ksp->data;
852:   PetscInt      j, r_old, r = dgmres->r;
853:   PetscBLASInt  i     = 0;
854:   PetscInt      neig1 = dgmres->neig + EIG_OFFSET;
855:   PetscInt      bmax  = dgmres->max_neig;
856:   PetscInt      aug   = r + neig;       /* actual size of the augmented invariant basis */
857:   PetscInt      aug1  = bmax + neig1;   /* maximum size of the augmented invariant basis */
858:   PetscBLASInt  ldA;                    /* leading dimension of AUAU and AUU*/
859:   PetscBLASInt  N;                      /* size of AUAU */
860:   PetscReal    *Q;                      /*  orthogonal matrix of  (left) schur vectors */
861:   PetscReal    *Z;                      /*  orthogonal matrix of  (right) schur vectors */
862:   PetscReal    *work;                   /* working vector */
863:   PetscBLASInt  lwork;                  /* size of the working vector */
864:   PetscInt     *perm;                   /* Permutation vector to sort eigenvalues */
865:   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modulus of the eigenvalues of A*/
866:   PetscBLASInt  NbrEig = 0, nr, bm;
867:   PetscBLASInt *select;
868:   PetscBLASInt  liwork, *iwork;

870:   PetscFunctionBegin;
871:   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
872:   if (!AUU) {
873:     PetscCall(PetscMalloc1(aug1 * aug1, &AUU));
874:     PetscCall(PetscMalloc1(aug1 * aug1, &AUAU));
875:   }
876:   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
877:    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
878:   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
879:   for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], r, MU, &AUU[j * aug1]));
880:   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
881:   for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], r, MU, &AUU[(r + j) * aug1]));
882:   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
883:   for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], neig, MX, &AUU[j * aug1 + r]));
884:   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
885:   for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], neig, MX, &AUU[(r + j) * aug1 + r]));

887:   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
888:   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
889:   for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, MU, &AUAU[j * aug1]));
890:   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
891:   for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, MU, &AUAU[(r + j) * aug1]));
892:   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
893:   for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, MX, &AUAU[j * aug1 + r]));
894:   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
895:   for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, MX, &AUAU[(r + j) * aug1 + r]));

897:   /* Computation of the eigenvectors */
898:   PetscCall(PetscBLASIntCast(aug1, &ldA));
899:   PetscCall(PetscBLASIntCast(aug, &N));
900:   lwork = 8 * N + 20; /* sizeof the working space */
901:   PetscCall(PetscMalloc1(N, &wr));
902:   PetscCall(PetscMalloc1(N, &wi));
903:   PetscCall(PetscMalloc1(N, &beta));
904:   PetscCall(PetscMalloc1(N, &modul));
905:   PetscCall(PetscMalloc1(N, &perm));
906:   PetscCall(PetscMalloc1(N * N, &Q));
907:   PetscCall(PetscMalloc1(N * N, &Z));
908:   PetscCall(PetscMalloc1(lwork, &work));
909:   {
910:     PetscBLASInt info = 0;
911:     PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
912:     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGGES %d", (int)info);
913:   }
914:   for (i = 0; i < N; i++) {
915:     if (beta[i] != 0.0) {
916:       wr[i] /= beta[i];
917:       wi[i] /= beta[i];
918:     }
919:   }
920:   /* sort the eigenvalues */
921:   for (i = 0; i < N; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
922:   for (i = 0; i < N; i++) perm[i] = i;
923:   PetscCall(PetscSortRealWithPermutation(N, modul, perm));
924:   /* Save the norm of the largest eigenvalue */
925:   if (dgmres->lambdaN < modul[perm[N - 1]]) dgmres->lambdaN = modul[perm[N - 1]];
926:   /* Allocate space to extract the first r schur vectors   */
927:   if (!SR2) PetscCall(PetscMalloc1(aug1 * bmax, &SR2));
928:   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
929:   while (NbrEig < bmax) {
930:     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
931:     else NbrEig += 2;
932:   }
933:   if (NbrEig > bmax) NbrEig = bmax - 1;
934:   r_old     = r; /* previous size of r */
935:   dgmres->r = r = NbrEig;

937:   /* Select the eigenvalues to reorder */
938:   PetscCall(PetscCalloc1(N, &select));
939:   if (!dgmres->GreatestEig) {
940:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
941:   } else {
942:     for (j = 0; j < NbrEig; j++) select[perm[N - j - 1]] = 1;
943:   }
944:   /* Reorder and extract the new <r> schur vectors */
945:   lwork  = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig));
946:   liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig));
947:   PetscCall(PetscFree(work));
948:   PetscCall(PetscMalloc1(lwork, &work));
949:   PetscCall(PetscMalloc1(liwork, &iwork));
950:   {
951:     PetscBLASInt info = 0;
952:     PetscReal    Dif[2];
953:     PetscBLASInt ijob  = 2;
954:     PetscBLASInt wantQ = 1, wantZ = 1;
955:     PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
956:     PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ill-conditioned problem.");
957:   }
958:   PetscCall(PetscFree(select));

960:   for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&SR2[j * aug1], &(Z[j * N]), N));

962:   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
963:    -- save it temporarily in MU */
964:   for (j = 0; j < r; j++) {
965:     PetscCall(VecZeroEntries(MU[j]));
966:     PetscCall(VecMAXPY(MU[j], r_old, &SR2[j * aug1], UU));
967:     PetscCall(VecMAXPY(MU[j], neig, &SR2[j * aug1 + r_old], XX));
968:   }
969:   /* Form T = U'*MU*U */
970:   for (j = 0; j < r; j++) {
971:     PetscCall(VecCopy(MU[j], UU[j]));
972:     PetscCall(KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP));
973:   }
974:   dgmres->matvecs += r;
975:   for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, UU, &TT[j * bmax]));
976:   /* Factorize T */
977:   PetscCall(PetscArraycpy(TTF, TT, bmax * r));
978:   PetscCall(PetscBLASIntCast(r, &nr));
979:   PetscCall(PetscBLASIntCast(bmax, &bm));
980:   {
981:     PetscBLASInt info;
982:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
983:     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%d", (int)info);
984:   }
985:   /* Free Memory */
986:   PetscCall(PetscFree(wr));
987:   PetscCall(PetscFree(wi));
988:   PetscCall(PetscFree(beta));
989:   PetscCall(PetscFree(modul));
990:   PetscCall(PetscFree(perm));
991:   PetscCall(PetscFree(Q));
992:   PetscCall(PetscFree(Z));
993:   PetscCall(PetscFree(work));
994:   PetscCall(PetscFree(iwork));
995:   PetscFunctionReturn(PETSC_SUCCESS);
996: }

998: /*MC
999:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1000:                  In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1001:                  stagnation occurs.

1003:    Options Database Keys:
1004:    GMRES Options (inherited):
1005: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1006: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1007: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1008:                              vectors are allocated as needed)
1009: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1010: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1011: .   -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
1012:                                    stability of the classical Gram-Schmidt  orthogonalization.
1013: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated

1015:    DGMRES Options Database Keys:
1016: +   -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1017: .   -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1018:                                        process
1019: .   -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1020: -   -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1021:                                                    parsed by `PetscOptionsGetViewer()`.  If neig > 1, viewerspec should
1022:                                                    end with ":append".  No vectors will be viewed if the adaptive
1023:                                                    strategy chooses not to deflate, so -ksp_dgmres_force should also
1024:                                                    be given.
1025:                                                    The deflation vectors span a subspace that may be a good
1026:                                                    approximation of the subspace of smallest eigenvectors of the
1027:                                                    preconditioned operator, so this option can aid in understanding
1028:                                                    the performance of a preconditioner.

1030:  Level: beginner

1032:  Note:
1033:     Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported

1035:  References:
1036: + [1] - J. Erhel, K. Burrage and B. Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1037: - [2] - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1038:    In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

1040:  Contributed by:
1041:   Desire NUENTSA WAKAM, INRIA

1043: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPLGMRES`,
1044:            `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
1045:            `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
1046:            `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
1047:  M*/

1049: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1050: {
1051:   KSP_DGMRES *dgmres;

1053:   PetscFunctionBegin;
1054:   PetscCall(PetscNew(&dgmres));
1055:   ksp->data = (void *)dgmres;

1057:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
1058:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
1059:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));

1061:   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1062:   ksp->ops->setup                        = KSPSetUp_DGMRES;
1063:   ksp->ops->solve                        = KSPSolve_DGMRES;
1064:   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1065:   ksp->ops->view                         = KSPView_DGMRES;
1066:   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1067:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1068:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1070:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
1071:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
1072:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
1073:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
1074:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
1075:   /* -- New functions defined in DGMRES -- */
1076:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
1077:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", KSPDGMRESSetMaxEigen_DGMRES));
1078:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", KSPDGMRESSetRatio_DGMRES));
1079:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", KSPDGMRESForce_DGMRES));
1080:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
1081:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
1082:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));
1083:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES));

1085:   PetscCall(PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData));
1086:   PetscCall(PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation));

1088:   dgmres->haptol         = 1.0e-30;
1089:   dgmres->q_preallocate  = 0;
1090:   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1091:   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1092:   dgmres->nrs            = NULL;
1093:   dgmres->sol_temp       = NULL;
1094:   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1095:   dgmres->Rsvd           = NULL;
1096:   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1097:   dgmres->orthogwork     = NULL;

1099:   /* Default values for the deflation */
1100:   dgmres->r           = 0;
1101:   dgmres->neig        = DGMRES_DEFAULT_EIG;
1102:   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG - 1;
1103:   dgmres->lambdaN     = 0.0;
1104:   dgmres->smv         = SMV;
1105:   dgmres->matvecs     = 0;
1106:   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1107:   dgmres->HasSchur    = PETSC_FALSE;
1108:   PetscFunctionReturn(PETSC_SUCCESS);
1109: }