Actual source code: sr1.c

  1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h>

  3: /*
  4:   Limited-memory Symmetric-Rank-1 method for approximating both
  5:   the forward product and inverse application of a Jacobian.
  6: */

  8: typedef struct {
  9:   Vec       *P, *Q;
 10:   Vec        work;
 11:   PetscBool  allocated, needP, needQ;
 12:   PetscReal *stp, *ytq;
 13: } Mat_LSR1;

 15: /*------------------------------------------------------------*/

 17: /*
 18:   The solution method is adapted from Algorithm 8 of Erway and Marcia
 19:   "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
 20:   (https://arxiv.org/abs/1510.06378).

 22:   Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
 23:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 24:   repeated calls of MatMult inside KSP solvers without unnecessarily
 25:   recomputing Q[i] terms in expensive nested-loops.

 27:   dX <- J0^{-1} * F
 28:   for i = 0,1,2,...,k
 29:     # Q[i] = S[i] - (B_i)^{-1}*Y[i]
 30:     zeta = (Q[i]^T F) / (Q[i]^T Y[i])
 31:     dX <- dX + (zeta * Q[i])
 32:   end
 33: */
 34: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
 35: {
 36:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
 37:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
 38:   PetscInt    i, j;
 39:   PetscScalar qjtyi, qtf, ytq;

 41:   PetscFunctionBegin;
 42:   VecCheckSameSize(F, 2, dX, 3);
 43:   VecCheckMatCompatible(B, dX, 3, F, 2);

 45:   if (lsr1->needQ) {
 46:     /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
 47:     for (i = 0; i <= lmvm->k; ++i) {
 48:       PetscCall(MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]));
 49:       PetscCall(VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]));
 50:       for (j = 0; j <= i - 1; ++j) {
 51:         PetscCall(VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi));
 52:         PetscCall(VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi) / lsr1->ytq[j], lsr1->Q[j]));
 53:       }
 54:       PetscCall(VecDot(lmvm->Y[i], lsr1->Q[i], &ytq));
 55:       lsr1->ytq[i] = PetscRealPart(ytq);
 56:     }
 57:     lsr1->needQ = PETSC_FALSE;
 58:   }

 60:   /* Invert the initial Jacobian onto F (or apply scaling) */
 61:   PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
 62:   /* Start outer loop */
 63:   for (i = 0; i <= lmvm->k; ++i) {
 64:     PetscCall(VecDot(lsr1->Q[i], F, &qtf));
 65:     PetscCall(VecAXPY(dX, PetscRealPart(qtf) / lsr1->ytq[i], lsr1->Q[i]));
 66:   }
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: /*------------------------------------------------------------*/

 72: /*
 73:   The forward product is the matrix-free implementation of
 74:   Equation (6.24) in Nocedal and Wright "Numerical Optimization"
 75:   2nd edition, pg 144.

 77:   Note that the structure of the forward product is identical to
 78:   the solution, with S and Y exchanging roles.

 80:   P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
 81:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 82:   repeated calls of MatMult inside KSP solvers without unnecessarily
 83:   recomputing P[i] terms in expensive nested-loops.

 85:   Z <- J0 * X
 86:   for i = 0,1,2,...,k
 87:     # P[i] = Y[i] - (B_i)*S[i]
 88:     zeta = (P[i]^T X) / (P[i]^T S[i])
 89:     Z <- Z + (zeta * P[i])
 90:   end
 91: */
 92: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
 93: {
 94:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
 95:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
 96:   PetscInt    i, j;
 97:   PetscScalar pjtsi, ptx, stp;

 99:   PetscFunctionBegin;
100:   VecCheckSameSize(X, 2, Z, 3);
101:   VecCheckMatCompatible(B, X, 2, Z, 3);

103:   if (lsr1->needP) {
104:     /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
105:     for (i = 0; i <= lmvm->k; ++i) {
106:       PetscCall(MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]));
107:       PetscCall(VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]));
108:       for (j = 0; j <= i - 1; ++j) {
109:         PetscCall(VecDot(lsr1->P[j], lmvm->S[i], &pjtsi));
110:         PetscCall(VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi) / lsr1->stp[j], lsr1->P[j]));
111:       }
112:       PetscCall(VecDot(lmvm->S[i], lsr1->P[i], &stp));
113:       lsr1->stp[i] = PetscRealPart(stp);
114:     }
115:     lsr1->needP = PETSC_FALSE;
116:   }

118:   PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
119:   for (i = 0; i <= lmvm->k; ++i) {
120:     PetscCall(VecDot(lsr1->P[i], X, &ptx));
121:     PetscCall(VecAXPY(Z, PetscRealPart(ptx) / lsr1->stp[i], lsr1->P[i]));
122:   }
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: /*------------------------------------------------------------*/

128: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
129: {
130:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
131:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
132:   PetscReal   snorm, pnorm;
133:   PetscScalar sktw;

135:   PetscFunctionBegin;
136:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
137:   if (lmvm->prev_set) {
138:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
139:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
140:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
141:     /* See if the updates can be accepted
142:        NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
143:     PetscCall(MatMult(B, lmvm->Xprev, lsr1->work));
144:     PetscCall(VecAYPX(lsr1->work, -1.0, lmvm->Fprev));
145:     PetscCall(VecDot(lmvm->Xprev, lsr1->work, &sktw));
146:     PetscCall(VecNorm(lmvm->Xprev, NORM_2, &snorm));
147:     PetscCall(VecNorm(lsr1->work, NORM_2, &pnorm));
148:     if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
149:       /* Update is good, accept it */
150:       lsr1->needP = lsr1->needQ = PETSC_TRUE;
151:       PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
152:     } else {
153:       /* Update is bad, skip it */
154:       ++lmvm->nrejects;
155:     }
156:   }
157:   /* Save the solution and function to be used in the next update */
158:   PetscCall(VecCopy(X, lmvm->Xprev));
159:   PetscCall(VecCopy(F, lmvm->Fprev));
160:   lmvm->prev_set = PETSC_TRUE;
161:   PetscFunctionReturn(PETSC_SUCCESS);
162: }

164: /*------------------------------------------------------------*/

166: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
167: {
168:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
169:   Mat_LSR1 *bctx  = (Mat_LSR1 *)bdata->ctx;
170:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
171:   Mat_LSR1 *mctx  = (Mat_LSR1 *)mdata->ctx;
172:   PetscInt  i;

174:   PetscFunctionBegin;
175:   mctx->needP = bctx->needP;
176:   mctx->needQ = bctx->needQ;
177:   for (i = 0; i <= bdata->k; ++i) {
178:     mctx->stp[i] = bctx->stp[i];
179:     mctx->ytq[i] = bctx->ytq[i];
180:     PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
181:     PetscCall(VecCopy(bctx->Q[i], mctx->Q[i]));
182:   }
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: /*------------------------------------------------------------*/

188: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
189: {
190:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
191:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

193:   PetscFunctionBegin;
194:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
195:   if (destructive && lsr1->allocated) {
196:     PetscCall(VecDestroy(&lsr1->work));
197:     PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
198:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
199:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
200:     lsr1->allocated = PETSC_FALSE;
201:   }
202:   PetscCall(MatReset_LMVM(B, destructive));
203:   PetscFunctionReturn(PETSC_SUCCESS);
204: }

206: /*------------------------------------------------------------*/

208: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
209: {
210:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
211:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

213:   PetscFunctionBegin;
214:   PetscCall(MatAllocate_LMVM(B, X, F));
215:   if (!lsr1->allocated) {
216:     PetscCall(VecDuplicate(X, &lsr1->work));
217:     PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
218:     if (lmvm->m > 0) {
219:       PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->P));
220:       PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->Q));
221:     }
222:     lsr1->allocated = PETSC_TRUE;
223:   }
224:   PetscFunctionReturn(PETSC_SUCCESS);
225: }

227: /*------------------------------------------------------------*/

229: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
230: {
231:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
232:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

234:   PetscFunctionBegin;
235:   if (lsr1->allocated) {
236:     PetscCall(VecDestroy(&lsr1->work));
237:     PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
238:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
239:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
240:     lsr1->allocated = PETSC_FALSE;
241:   }
242:   PetscCall(PetscFree(lmvm->ctx));
243:   PetscCall(MatDestroy_LMVM(B));
244:   PetscFunctionReturn(PETSC_SUCCESS);
245: }

247: /*------------------------------------------------------------*/

249: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
250: {
251:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
252:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

254:   PetscFunctionBegin;
255:   PetscCall(MatSetUp_LMVM(B));
256:   if (!lsr1->allocated && lmvm->m > 0) {
257:     PetscCall(VecDuplicate(lmvm->Xprev, &lsr1->work));
258:     PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
259:     if (lmvm->m > 0) {
260:       PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P));
261:       PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q));
262:     }
263:     lsr1->allocated = PETSC_TRUE;
264:   }
265:   PetscFunctionReturn(PETSC_SUCCESS);
266: }

268: /*------------------------------------------------------------*/

270: PetscErrorCode MatCreate_LMVMSR1(Mat B)
271: {
272:   Mat_LMVM *lmvm;
273:   Mat_LSR1 *lsr1;

275:   PetscFunctionBegin;
276:   PetscCall(MatCreate_LMVM(B));
277:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1));
278:   PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
279:   B->ops->setup   = MatSetUp_LMVMSR1;
280:   B->ops->destroy = MatDestroy_LMVMSR1;
281:   B->ops->solve   = MatSolve_LMVMSR1;

283:   lmvm                = (Mat_LMVM *)B->data;
284:   lmvm->square        = PETSC_TRUE;
285:   lmvm->ops->allocate = MatAllocate_LMVMSR1;
286:   lmvm->ops->reset    = MatReset_LMVMSR1;
287:   lmvm->ops->update   = MatUpdate_LMVMSR1;
288:   lmvm->ops->mult     = MatMult_LMVMSR1;
289:   lmvm->ops->copy     = MatCopy_LMVMSR1;

291:   PetscCall(PetscNew(&lsr1));
292:   lmvm->ctx       = (void *)lsr1;
293:   lsr1->allocated = PETSC_FALSE;
294:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
295:   PetscFunctionReturn(PETSC_SUCCESS);
296: }

298: /*------------------------------------------------------------*/

300: /*@
301:    MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
302:    matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
303:    guaranteed to be positive-definite.

305:    To use the L-SR1 matrix with other vector types, the matrix must be
306:    created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
307:    This ensures that the internal storage and work vectors are duplicated from the
308:    correct type of vector.

310:    Collective

312:    Input Parameters:
313: +  comm - MPI communicator
314: .  n - number of local rows for storage vectors
315: -  N - global size of the storage vectors

317:    Output Parameter:
318: .  B - the matrix

320:    Level: intermediate

322:    Note:
323:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
324:    paradigm instead of this routine directly.

326: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMSR1`, `MatCreateLMVMBFGS()`, `MatCreateLMVMDFP()`,
327:           `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
328: @*/
329: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
330: {
331:   PetscFunctionBegin;
332:   PetscCall(MatCreate(comm, B));
333:   PetscCall(MatSetSizes(*B, n, n, N, N));
334:   PetscCall(MatSetType(*B, MATLMVMSR1));
335:   PetscCall(MatSetUp(*B));
336:   PetscFunctionReturn(PETSC_SUCCESS);
337: }