Actual source code: sr1.c
1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h>
3: /*
4: Limited-memory Symmetric-Rank-1 method for approximating both
5: the forward product and inverse application of a Jacobian.
6: */
8: typedef struct {
9: Vec *P, *Q;
10: Vec work;
11: PetscBool allocated, needP, needQ;
12: PetscReal *stp, *ytq;
13: } Mat_LSR1;
15: /*------------------------------------------------------------*/
17: /*
18: The solution method is adapted from Algorithm 8 of Erway and Marcia
19: "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
20: (https://arxiv.org/abs/1510.06378).
22: Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
23: the matrix is updated with a new (S[i], Y[i]) pair. This allows
24: repeated calls of MatMult inside KSP solvers without unnecessarily
25: recomputing Q[i] terms in expensive nested-loops.
27: dX <- J0^{-1} * F
28: for i = 0,1,2,...,k
29: # Q[i] = S[i] - (B_i)^{-1}*Y[i]
30: zeta = (Q[i]^T F) / (Q[i]^T Y[i])
31: dX <- dX + (zeta * Q[i])
32: end
33: */
34: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
38: PetscInt i, j;
39: PetscScalar qjtyi, qtf, ytq;
41: PetscFunctionBegin;
42: VecCheckSameSize(F, 2, dX, 3);
43: VecCheckMatCompatible(B, dX, 3, F, 2);
45: if (lsr1->needQ) {
46: /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
47: for (i = 0; i <= lmvm->k; ++i) {
48: PetscCall(MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]));
49: PetscCall(VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]));
50: for (j = 0; j <= i - 1; ++j) {
51: PetscCall(VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi));
52: PetscCall(VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi) / lsr1->ytq[j], lsr1->Q[j]));
53: }
54: PetscCall(VecDot(lmvm->Y[i], lsr1->Q[i], &ytq));
55: lsr1->ytq[i] = PetscRealPart(ytq);
56: }
57: lsr1->needQ = PETSC_FALSE;
58: }
60: /* Invert the initial Jacobian onto F (or apply scaling) */
61: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
62: /* Start outer loop */
63: for (i = 0; i <= lmvm->k; ++i) {
64: PetscCall(VecDot(lsr1->Q[i], F, &qtf));
65: PetscCall(VecAXPY(dX, PetscRealPart(qtf) / lsr1->ytq[i], lsr1->Q[i]));
66: }
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: /*------------------------------------------------------------*/
72: /*
73: The forward product is the matrix-free implementation of
74: Equation (6.24) in Nocedal and Wright "Numerical Optimization"
75: 2nd edition, pg 144.
77: Note that the structure of the forward product is identical to
78: the solution, with S and Y exchanging roles.
80: P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
81: the matrix is updated with a new (S[i], Y[i]) pair. This allows
82: repeated calls of MatMult inside KSP solvers without unnecessarily
83: recomputing P[i] terms in expensive nested-loops.
85: Z <- J0 * X
86: for i = 0,1,2,...,k
87: # P[i] = Y[i] - (B_i)*S[i]
88: zeta = (P[i]^T X) / (P[i]^T S[i])
89: Z <- Z + (zeta * P[i])
90: end
91: */
92: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
93: {
94: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
95: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
96: PetscInt i, j;
97: PetscScalar pjtsi, ptx, stp;
99: PetscFunctionBegin;
100: VecCheckSameSize(X, 2, Z, 3);
101: VecCheckMatCompatible(B, X, 2, Z, 3);
103: if (lsr1->needP) {
104: /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
105: for (i = 0; i <= lmvm->k; ++i) {
106: PetscCall(MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]));
107: PetscCall(VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]));
108: for (j = 0; j <= i - 1; ++j) {
109: PetscCall(VecDot(lsr1->P[j], lmvm->S[i], &pjtsi));
110: PetscCall(VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi) / lsr1->stp[j], lsr1->P[j]));
111: }
112: PetscCall(VecDot(lmvm->S[i], lsr1->P[i], &stp));
113: lsr1->stp[i] = PetscRealPart(stp);
114: }
115: lsr1->needP = PETSC_FALSE;
116: }
118: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
119: for (i = 0; i <= lmvm->k; ++i) {
120: PetscCall(VecDot(lsr1->P[i], X, &ptx));
121: PetscCall(VecAXPY(Z, PetscRealPart(ptx) / lsr1->stp[i], lsr1->P[i]));
122: }
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: /*------------------------------------------------------------*/
128: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
129: {
130: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
131: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
132: PetscReal snorm, pnorm;
133: PetscScalar sktw;
135: PetscFunctionBegin;
136: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
137: if (lmvm->prev_set) {
138: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
139: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
140: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
141: /* See if the updates can be accepted
142: NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
143: PetscCall(MatMult(B, lmvm->Xprev, lsr1->work));
144: PetscCall(VecAYPX(lsr1->work, -1.0, lmvm->Fprev));
145: PetscCall(VecDot(lmvm->Xprev, lsr1->work, &sktw));
146: PetscCall(VecNorm(lmvm->Xprev, NORM_2, &snorm));
147: PetscCall(VecNorm(lsr1->work, NORM_2, &pnorm));
148: if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
149: /* Update is good, accept it */
150: lsr1->needP = lsr1->needQ = PETSC_TRUE;
151: PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
152: } else {
153: /* Update is bad, skip it */
154: ++lmvm->nrejects;
155: }
156: }
157: /* Save the solution and function to be used in the next update */
158: PetscCall(VecCopy(X, lmvm->Xprev));
159: PetscCall(VecCopy(F, lmvm->Fprev));
160: lmvm->prev_set = PETSC_TRUE;
161: PetscFunctionReturn(PETSC_SUCCESS);
162: }
164: /*------------------------------------------------------------*/
166: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
167: {
168: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
169: Mat_LSR1 *bctx = (Mat_LSR1 *)bdata->ctx;
170: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
171: Mat_LSR1 *mctx = (Mat_LSR1 *)mdata->ctx;
172: PetscInt i;
174: PetscFunctionBegin;
175: mctx->needP = bctx->needP;
176: mctx->needQ = bctx->needQ;
177: for (i = 0; i <= bdata->k; ++i) {
178: mctx->stp[i] = bctx->stp[i];
179: mctx->ytq[i] = bctx->ytq[i];
180: PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
181: PetscCall(VecCopy(bctx->Q[i], mctx->Q[i]));
182: }
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: /*------------------------------------------------------------*/
188: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
189: {
190: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
191: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
193: PetscFunctionBegin;
194: lsr1->needP = lsr1->needQ = PETSC_TRUE;
195: if (destructive && lsr1->allocated) {
196: PetscCall(VecDestroy(&lsr1->work));
197: PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
198: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
199: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
200: lsr1->allocated = PETSC_FALSE;
201: }
202: PetscCall(MatReset_LMVM(B, destructive));
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: /*------------------------------------------------------------*/
208: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
209: {
210: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
211: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
213: PetscFunctionBegin;
214: PetscCall(MatAllocate_LMVM(B, X, F));
215: if (!lsr1->allocated) {
216: PetscCall(VecDuplicate(X, &lsr1->work));
217: PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
218: if (lmvm->m > 0) {
219: PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->P));
220: PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->Q));
221: }
222: lsr1->allocated = PETSC_TRUE;
223: }
224: PetscFunctionReturn(PETSC_SUCCESS);
225: }
227: /*------------------------------------------------------------*/
229: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
230: {
231: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
232: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
234: PetscFunctionBegin;
235: if (lsr1->allocated) {
236: PetscCall(VecDestroy(&lsr1->work));
237: PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
238: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
239: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
240: lsr1->allocated = PETSC_FALSE;
241: }
242: PetscCall(PetscFree(lmvm->ctx));
243: PetscCall(MatDestroy_LMVM(B));
244: PetscFunctionReturn(PETSC_SUCCESS);
245: }
247: /*------------------------------------------------------------*/
249: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
250: {
251: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
252: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
254: PetscFunctionBegin;
255: PetscCall(MatSetUp_LMVM(B));
256: if (!lsr1->allocated && lmvm->m > 0) {
257: PetscCall(VecDuplicate(lmvm->Xprev, &lsr1->work));
258: PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
259: if (lmvm->m > 0) {
260: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P));
261: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q));
262: }
263: lsr1->allocated = PETSC_TRUE;
264: }
265: PetscFunctionReturn(PETSC_SUCCESS);
266: }
268: /*------------------------------------------------------------*/
270: PetscErrorCode MatCreate_LMVMSR1(Mat B)
271: {
272: Mat_LMVM *lmvm;
273: Mat_LSR1 *lsr1;
275: PetscFunctionBegin;
276: PetscCall(MatCreate_LMVM(B));
277: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1));
278: PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
279: B->ops->setup = MatSetUp_LMVMSR1;
280: B->ops->destroy = MatDestroy_LMVMSR1;
281: B->ops->solve = MatSolve_LMVMSR1;
283: lmvm = (Mat_LMVM *)B->data;
284: lmvm->square = PETSC_TRUE;
285: lmvm->ops->allocate = MatAllocate_LMVMSR1;
286: lmvm->ops->reset = MatReset_LMVMSR1;
287: lmvm->ops->update = MatUpdate_LMVMSR1;
288: lmvm->ops->mult = MatMult_LMVMSR1;
289: lmvm->ops->copy = MatCopy_LMVMSR1;
291: PetscCall(PetscNew(&lsr1));
292: lmvm->ctx = (void *)lsr1;
293: lsr1->allocated = PETSC_FALSE;
294: lsr1->needP = lsr1->needQ = PETSC_TRUE;
295: PetscFunctionReturn(PETSC_SUCCESS);
296: }
298: /*------------------------------------------------------------*/
300: /*@
301: MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
302: matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
303: guaranteed to be positive-definite.
305: To use the L-SR1 matrix with other vector types, the matrix must be
306: created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
307: This ensures that the internal storage and work vectors are duplicated from the
308: correct type of vector.
310: Collective
312: Input Parameters:
313: + comm - MPI communicator
314: . n - number of local rows for storage vectors
315: - N - global size of the storage vectors
317: Output Parameter:
318: . B - the matrix
320: Level: intermediate
322: Note:
323: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
324: paradigm instead of this routine directly.
326: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMSR1`, `MatCreateLMVMBFGS()`, `MatCreateLMVMDFP()`,
327: `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
328: @*/
329: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
330: {
331: PetscFunctionBegin;
332: PetscCall(MatCreate(comm, B));
333: PetscCall(MatSetSizes(*B, n, n, N, N));
334: PetscCall(MatSetType(*B, MATLMVMSR1));
335: PetscCall(MatSetUp(*B));
336: PetscFunctionReturn(PETSC_SUCCESS);
337: }