Actual source code: brdn.c

  1: #include <../src/ksp/ksp/utils/lmvm/brdn/brdn.h>

  3: /*------------------------------------------------------------*/

  5: /*
  6:   The solution method is the matrix-free implementation of the inverse Hessian
  7:   representation in page 312 of Griewank "Broyden Updating, The Good and The Bad!"
  8:   (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).

 10:   Q[i] = (B_i)^{-1}*S[i] terms are computed ahead of time whenever
 11:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 12:   repeated calls of MatSolve without incurring redundant computation.

 14:   dX <- J0^{-1} * F

 16:   for i=0,1,2,...,k
 17:     # Q[i] = (B_i)^{-1} * Y[i]
 18:     tau = (S[i]^T dX) / (S[i]^T Q[i])
 19:     dX <- dX + (tau * (S[i] - Q[i]))
 20:   end
 21:  */

 23: static PetscErrorCode MatSolve_LMVMBrdn(Mat B, Vec F, Vec dX)
 24: {
 25:   Mat_LMVM   *lmvm  = (Mat_LMVM *)B->data;
 26:   Mat_Brdn   *lbrdn = (Mat_Brdn *)lmvm->ctx;
 27:   PetscInt    i, j;
 28:   PetscScalar sjtqi, stx, stq;

 30:   PetscFunctionBegin;
 31:   VecCheckSameSize(F, 2, dX, 3);
 32:   VecCheckMatCompatible(B, dX, 3, F, 2);

 34:   if (lbrdn->needQ) {
 35:     /* Pre-compute (Q[i] = (B_i)^{-1} * Y[i]) */
 36:     for (i = 0; i <= lmvm->k; ++i) {
 37:       PetscCall(MatLMVMApplyJ0Inv(B, lmvm->Y[i], lbrdn->Q[i]));
 38:       for (j = 0; j <= i - 1; ++j) {
 39:         PetscCall(VecDot(lmvm->S[j], lbrdn->Q[i], &sjtqi));
 40:         PetscCall(VecAXPBYPCZ(lbrdn->Q[i], PetscRealPart(sjtqi) / lbrdn->stq[j], -PetscRealPart(sjtqi) / lbrdn->stq[j], 1.0, lmvm->S[j], lbrdn->Q[j]));
 41:       }
 42:       PetscCall(VecDot(lmvm->S[i], lbrdn->Q[i], &stq));
 43:       lbrdn->stq[i] = PetscRealPart(stq);
 44:     }
 45:     lbrdn->needQ = PETSC_FALSE;
 46:   }

 48:   PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
 49:   for (i = 0; i <= lmvm->k; ++i) {
 50:     PetscCall(VecDot(lmvm->S[i], dX, &stx));
 51:     PetscCall(VecAXPBYPCZ(dX, PetscRealPart(stx) / lbrdn->stq[i], -PetscRealPart(stx) / lbrdn->stq[i], 1.0, lmvm->S[i], lbrdn->Q[i]));
 52:   }
 53:   PetscFunctionReturn(PETSC_SUCCESS);
 54: }

 56: /*------------------------------------------------------------*/

 58: /*
 59:   The forward product is the matrix-free implementation of Equation 2 in
 60:   page 302 of Griewank "Broyden Updating, The Good and The Bad!"
 61:   (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).

 63:   P[i] = (B_i)*S[i] terms are computed ahead of time whenever
 64:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 65:   repeated calls of MatMult inside KSP solvers without unnecessarily
 66:   recomputing P[i] terms in expensive nested-loops.

 68:   Z <- J0 * X

 70:   for i=0,1,2,...,k
 71:     # P[i] = B_i * S[i]
 72:     tau = (S[i]^T X) / (S[i]^T S[i])
 73:     dX <- dX + (tau * (Y[i] - P[i]))
 74:   end
 75:  */

 77: static PetscErrorCode MatMult_LMVMBrdn(Mat B, Vec X, Vec Z)
 78: {
 79:   Mat_LMVM   *lmvm  = (Mat_LMVM *)B->data;
 80:   Mat_Brdn   *lbrdn = (Mat_Brdn *)lmvm->ctx;
 81:   PetscInt    i, j;
 82:   PetscScalar sjtsi, stx;

 84:   PetscFunctionBegin;
 85:   VecCheckSameSize(X, 2, Z, 3);
 86:   VecCheckMatCompatible(B, X, 2, Z, 3);

 88:   if (lbrdn->needP) {
 89:     /* Pre-compute (P[i] = (B_i) * S[i]) */
 90:     for (i = 0; i <= lmvm->k; ++i) {
 91:       PetscCall(MatLMVMApplyJ0Fwd(B, lmvm->S[i], lbrdn->P[i]));
 92:       for (j = 0; j <= i - 1; ++j) {
 93:         PetscCall(VecDot(lmvm->S[j], lmvm->S[i], &sjtsi));
 94:         PetscCall(VecAXPBYPCZ(lbrdn->P[i], PetscRealPart(sjtsi) / lbrdn->sts[j], -PetscRealPart(sjtsi) / lbrdn->sts[j], 1.0, lmvm->Y[j], lbrdn->P[j]));
 95:       }
 96:     }
 97:     lbrdn->needP = PETSC_FALSE;
 98:   }

100:   PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
101:   for (i = 0; i <= lmvm->k; ++i) {
102:     PetscCall(VecDot(lmvm->S[i], X, &stx));
103:     PetscCall(VecAXPBYPCZ(Z, PetscRealPart(stx) / lbrdn->sts[i], -PetscRealPart(stx) / lbrdn->sts[i], 1.0, lmvm->Y[i], lbrdn->P[i]));
104:   }
105:   PetscFunctionReturn(PETSC_SUCCESS);
106: }

108: /*------------------------------------------------------------*/

110: static PetscErrorCode MatUpdate_LMVMBrdn(Mat B, Vec X, Vec F)
111: {
112:   Mat_LMVM   *lmvm  = (Mat_LMVM *)B->data;
113:   Mat_Brdn   *lbrdn = (Mat_Brdn *)lmvm->ctx;
114:   PetscInt    old_k, i;
115:   PetscScalar sts;

117:   PetscFunctionBegin;
118:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
119:   if (lmvm->prev_set) {
120:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
121:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
122:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
123:     /* Accept the update */
124:     lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
125:     old_k                       = lmvm->k;
126:     PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
127:     /* If we hit the memory limit, shift the sts array */
128:     if (old_k == lmvm->k) {
129:       for (i = 0; i <= lmvm->k - 1; ++i) lbrdn->sts[i] = lbrdn->sts[i + 1];
130:     }
131:     PetscCall(VecDot(lmvm->S[lmvm->k], lmvm->S[lmvm->k], &sts));
132:     lbrdn->sts[lmvm->k] = PetscRealPart(sts);
133:   }
134:   /* Save the solution and function to be used in the next update */
135:   PetscCall(VecCopy(X, lmvm->Xprev));
136:   PetscCall(VecCopy(F, lmvm->Fprev));
137:   lmvm->prev_set = PETSC_TRUE;
138:   PetscFunctionReturn(PETSC_SUCCESS);
139: }

141: /*------------------------------------------------------------*/

143: static PetscErrorCode MatCopy_LMVMBrdn(Mat B, Mat M, MatStructure str)
144: {
145:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
146:   Mat_Brdn *bctx  = (Mat_Brdn *)bdata->ctx;
147:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
148:   Mat_Brdn *mctx  = (Mat_Brdn *)mdata->ctx;
149:   PetscInt  i;

151:   PetscFunctionBegin;
152:   mctx->needP = bctx->needP;
153:   mctx->needQ = bctx->needQ;
154:   for (i = 0; i <= bdata->k; ++i) {
155:     mctx->sts[i] = bctx->sts[i];
156:     mctx->stq[i] = bctx->stq[i];
157:     PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
158:     PetscCall(VecCopy(bctx->Q[i], mctx->Q[i]));
159:   }
160:   PetscFunctionReturn(PETSC_SUCCESS);
161: }

163: /*------------------------------------------------------------*/

165: static PetscErrorCode MatReset_LMVMBrdn(Mat B, PetscBool destructive)
166: {
167:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
168:   Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;

170:   PetscFunctionBegin;
171:   lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
172:   if (destructive && lbrdn->allocated) {
173:     PetscCall(PetscFree2(lbrdn->sts, lbrdn->stq));
174:     PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->P));
175:     PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->Q));
176:     lbrdn->allocated = PETSC_FALSE;
177:   }
178:   PetscCall(MatReset_LMVM(B, destructive));
179:   PetscFunctionReturn(PETSC_SUCCESS);
180: }

182: /*------------------------------------------------------------*/

184: static PetscErrorCode MatAllocate_LMVMBrdn(Mat B, Vec X, Vec F)
185: {
186:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
187:   Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;

189:   PetscFunctionBegin;
190:   PetscCall(MatAllocate_LMVM(B, X, F));
191:   if (!lbrdn->allocated) {
192:     PetscCall(PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq));
193:     if (lmvm->m > 0) {
194:       PetscCall(VecDuplicateVecs(X, lmvm->m, &lbrdn->P));
195:       PetscCall(VecDuplicateVecs(X, lmvm->m, &lbrdn->Q));
196:     }
197:     lbrdn->allocated = PETSC_TRUE;
198:   }
199:   PetscFunctionReturn(PETSC_SUCCESS);
200: }

202: /*------------------------------------------------------------*/

204: static PetscErrorCode MatDestroy_LMVMBrdn(Mat B)
205: {
206:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
207:   Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;

209:   PetscFunctionBegin;
210:   if (lbrdn->allocated) {
211:     PetscCall(PetscFree2(lbrdn->sts, lbrdn->stq));
212:     PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->P));
213:     PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->Q));
214:     lbrdn->allocated = PETSC_FALSE;
215:   }
216:   PetscCall(PetscFree(lmvm->ctx));
217:   PetscCall(MatDestroy_LMVM(B));
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }

221: /*------------------------------------------------------------*/

223: static PetscErrorCode MatSetUp_LMVMBrdn(Mat B)
224: {
225:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
226:   Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;

228:   PetscFunctionBegin;
229:   PetscCall(MatSetUp_LMVM(B));
230:   if (!lbrdn->allocated) {
231:     PetscCall(PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq));
232:     if (lmvm->m > 0) {
233:       PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->P));
234:       PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->Q));
235:     }
236:     lbrdn->allocated = PETSC_TRUE;
237:   }
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: /*------------------------------------------------------------*/

243: PetscErrorCode MatCreate_LMVMBrdn(Mat B)
244: {
245:   Mat_LMVM *lmvm;
246:   Mat_Brdn *lbrdn;

248:   PetscFunctionBegin;
249:   PetscCall(MatCreate_LMVM(B));
250:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMBROYDEN));
251:   B->ops->setup   = MatSetUp_LMVMBrdn;
252:   B->ops->destroy = MatDestroy_LMVMBrdn;
253:   B->ops->solve   = MatSolve_LMVMBrdn;

255:   lmvm                = (Mat_LMVM *)B->data;
256:   lmvm->square        = PETSC_TRUE;
257:   lmvm->ops->allocate = MatAllocate_LMVMBrdn;
258:   lmvm->ops->reset    = MatReset_LMVMBrdn;
259:   lmvm->ops->mult     = MatMult_LMVMBrdn;
260:   lmvm->ops->update   = MatUpdate_LMVMBrdn;
261:   lmvm->ops->copy     = MatCopy_LMVMBrdn;

263:   PetscCall(PetscNew(&lbrdn));
264:   lmvm->ctx        = (void *)lbrdn;
265:   lbrdn->allocated = PETSC_FALSE;
266:   lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
267:   PetscFunctionReturn(PETSC_SUCCESS);
268: }

270: /*------------------------------------------------------------*/

272: /*@
273:    MatCreateLMVMBroyden - Creates a limited-memory "good" Broyden-type approximation
274:    matrix used for a Jacobian. L-Brdn is not guaranteed to be symmetric or
275:    positive-definite.

277:    To use the L-Brdn matrix with other vector types, the matrix must be
278:    created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
279:    This ensures that the internal storage and work vectors are duplicated from the
280:    correct type of vector.

282:    Collective

284:    Input Parameters:
285: +  comm - MPI communicator
286: .  n - number of local rows for storage vectors
287: -  N - global size of the storage vectors

289:    Output Parameter:
290: .  B - the matrix

292:    Level: intermediate

294:    Note:
295:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
296:    paradigm instead of this routine directly.

298: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMBRDN`, `MatCreateLMVMDFP()`, `MatCreateLMVMSR1()`,
299:           `MatCreateLMVMBFGS()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
300: @*/
301: PetscErrorCode MatCreateLMVMBroyden(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
302: {
303:   PetscFunctionBegin;
304:   PetscCall(MatCreate(comm, B));
305:   PetscCall(MatSetSizes(*B, n, n, N, N));
306:   PetscCall(MatSetType(*B, MATLMVMBROYDEN));
307:   PetscCall(MatSetUp(*B));
308:   PetscFunctionReturn(PETSC_SUCCESS);
309: }