Actual source code: ex9.c


  2: static char help[] = "The solution of 2 different linear systems with different linear solvers.\n\
  3: Also, this example illustrates the repeated\n\
  4: solution of linear systems, while reusing matrix, vector, and solver data\n\
  5: structures throughout the process.  Note the various stages of event logging.\n\n";

  7: /*
  8:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
  9:   automatically includes:
 10:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 11:      petscmat.h - matrices
 12:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 13:      petscviewer.h - viewers               petscpc.h  - preconditioners
 14: */
 15: #include <petscksp.h>

 17: /*
 18:    Declare user-defined routines
 19: */
 20: extern PetscErrorCode CheckError(Vec, Vec, Vec, PetscInt, PetscReal, PetscLogEvent);
 21: extern PetscErrorCode MyKSPMonitor(KSP, PetscInt, PetscReal, void *);

 23: int main(int argc, char **args)
 24: {
 25:   Vec           x1, b1, x2, b2; /* solution and RHS vectors for systems #1 and #2 */
 26:   Vec           u;              /* exact solution vector */
 27:   Mat           C1, C2;         /* matrices for systems #1 and #2 */
 28:   KSP           ksp1, ksp2;     /* KSP contexts for systems #1 and #2 */
 29:   PetscInt      ntimes = 3;     /* number of times to solve the linear systems */
 30:   PetscLogEvent CHECK_ERROR;    /* event number for error checking */
 31:   PetscInt      ldim, low, high, iglobal, Istart, Iend, Istart2, Iend2;
 32:   PetscInt      Ii, J, i, j, m = 3, n = 2, its, t;
 33:   PetscBool     flg = PETSC_FALSE, unsym = PETSC_TRUE;
 34:   PetscScalar   v;
 35:   PetscMPIInt   rank, size;
 36: #if defined(PETSC_USE_LOG)
 37:   PetscLogStage stages[3];
 38: #endif

 40:   PetscFunctionBeginUser;
 41:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 42:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 43:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-t", &ntimes, NULL));
 44:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-unsym", &unsym, NULL));
 45:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 46:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 47:   n = 2 * size;

 49:   /*
 50:      Register various stages for profiling
 51:   */
 52:   PetscCall(PetscLogStageRegister("Prelim setup", &stages[0]));
 53:   PetscCall(PetscLogStageRegister("Linear System 1", &stages[1]));
 54:   PetscCall(PetscLogStageRegister("Linear System 2", &stages[2]));

 56:   /*
 57:      Register a user-defined event for profiling (error checking).
 58:   */
 59:   CHECK_ERROR = 0;
 60:   PetscCall(PetscLogEventRegister("Check Error", KSP_CLASSID, &CHECK_ERROR));

 62:   /* - - - - - - - - - - - - Stage 0: - - - - - - - - - - - - - -
 63:                         Preliminary Setup
 64:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

 66:   PetscCall(PetscLogStagePush(stages[0]));

 68:   /*
 69:      Create data structures for first linear system.
 70:       - Create parallel matrix, specifying only its global dimensions.
 71:         When using MatCreate(), the matrix format can be specified at
 72:         runtime. Also, the parallel partitioning of the matrix is
 73:         determined by PETSc at runtime.
 74:       - Create parallel vectors.
 75:         - When using VecSetSizes(), we specify only the vector's global
 76:           dimension; the parallel partitioning is determined at runtime.
 77:         - Note: We form 1 vector from scratch and then duplicate as needed.
 78:   */
 79:   PetscCall(MatCreate(PETSC_COMM_WORLD, &C1));
 80:   PetscCall(MatSetSizes(C1, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
 81:   PetscCall(MatSetFromOptions(C1));
 82:   PetscCall(MatSetUp(C1));
 83:   PetscCall(MatGetOwnershipRange(C1, &Istart, &Iend));
 84:   PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
 85:   PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
 86:   PetscCall(VecSetFromOptions(u));
 87:   PetscCall(VecDuplicate(u, &b1));
 88:   PetscCall(VecDuplicate(u, &x1));

 90:   /*
 91:      Create first linear solver context.
 92:      Set runtime options (e.g., -pc_type <type>).
 93:      Note that the first linear system uses the default option
 94:      names, while the second linear system uses a different
 95:      options prefix.
 96:   */
 97:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp1));
 98:   PetscCall(KSPSetFromOptions(ksp1));

100:   /*
101:      Set user-defined monitoring routine for first linear system.
102:   */
103:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-my_ksp_monitor", &flg, NULL));
104:   if (flg) PetscCall(KSPMonitorSet(ksp1, MyKSPMonitor, NULL, 0));

106:   /*
107:      Create data structures for second linear system.
108:   */
109:   PetscCall(MatCreate(PETSC_COMM_WORLD, &C2));
110:   PetscCall(MatSetSizes(C2, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
111:   PetscCall(MatSetFromOptions(C2));
112:   PetscCall(MatSetUp(C2));
113:   PetscCall(MatGetOwnershipRange(C2, &Istart2, &Iend2));
114:   PetscCall(VecDuplicate(u, &b2));
115:   PetscCall(VecDuplicate(u, &x2));

117:   /*
118:      Create second linear solver context
119:   */
120:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp2));

122:   /*
123:      Set different options prefix for second linear system.
124:      Set runtime options (e.g., -s2_pc_type <type>)
125:   */
126:   PetscCall(KSPAppendOptionsPrefix(ksp2, "s2_"));
127:   PetscCall(KSPSetFromOptions(ksp2));

129:   /*
130:      Assemble exact solution vector in parallel.  Note that each
131:      processor needs to set only its local part of the vector.
132:   */
133:   PetscCall(VecGetLocalSize(u, &ldim));
134:   PetscCall(VecGetOwnershipRange(u, &low, &high));
135:   for (i = 0; i < ldim; i++) {
136:     iglobal = i + low;
137:     v       = (PetscScalar)(i + 100 * rank);
138:     PetscCall(VecSetValues(u, 1, &iglobal, &v, ADD_VALUES));
139:   }
140:   PetscCall(VecAssemblyBegin(u));
141:   PetscCall(VecAssemblyEnd(u));

143:   /*
144:      Log the number of flops for computing vector entries
145:   */
146:   PetscCall(PetscLogFlops(2.0 * ldim));

148:   /*
149:      End current profiling stage
150:   */
151:   PetscCall(PetscLogStagePop());

153:   /* --------------------------------------------------------------
154:                         Linear solver loop:
155:       Solve 2 different linear systems several times in succession
156:      -------------------------------------------------------------- */

158:   for (t = 0; t < ntimes; t++) {
159:     /* - - - - - - - - - - - - Stage 1: - - - - - - - - - - - - - -
160:                  Assemble and solve first linear system
161:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

163:     /*
164:        Begin profiling stage #1
165:     */
166:     PetscCall(PetscLogStagePush(stages[1]));

168:     /*
169:        Initialize all matrix entries to zero.  MatZeroEntries() retains
170:        the nonzero structure of the matrix for sparse formats.
171:     */
172:     if (t > 0) PetscCall(MatZeroEntries(C1));

174:     /*
175:        Set matrix entries in parallel.  Also, log the number of flops
176:        for computing matrix entries.
177:         - Each processor needs to insert only elements that it owns
178:           locally (but any non-local elements will be sent to the
179:           appropriate processor during matrix assembly).
180:         - Always specify global row and columns of matrix entries.
181:     */
182:     for (Ii = Istart; Ii < Iend; Ii++) {
183:       v = -1.0;
184:       i = Ii / n;
185:       j = Ii - i * n;
186:       if (i > 0) {
187:         J = Ii - n;
188:         PetscCall(MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES));
189:       }
190:       if (i < m - 1) {
191:         J = Ii + n;
192:         PetscCall(MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES));
193:       }
194:       if (j > 0) {
195:         J = Ii - 1;
196:         PetscCall(MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES));
197:       }
198:       if (j < n - 1) {
199:         J = Ii + 1;
200:         PetscCall(MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES));
201:       }
202:       v = 4.0;
203:       PetscCall(MatSetValues(C1, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
204:     }
205:     if (unsym) {
206:       for (Ii = Istart; Ii < Iend; Ii++) { /* Make matrix nonsymmetric */
207:         v = -1.0 * (t + 0.5);
208:         i = Ii / n;
209:         if (i > 0) {
210:           J = Ii - n;
211:           PetscCall(MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES));
212:         }
213:       }
214:       PetscCall(PetscLogFlops(2.0 * (Iend - Istart)));
215:     }

217:     /*
218:        Assemble matrix, using the 2-step process:
219:          MatAssemblyBegin(), MatAssemblyEnd()
220:        Computations can be done while messages are in transition
221:        by placing code between these two statements.
222:     */
223:     PetscCall(MatAssemblyBegin(C1, MAT_FINAL_ASSEMBLY));
224:     PetscCall(MatAssemblyEnd(C1, MAT_FINAL_ASSEMBLY));

226:     /*
227:        Indicate same nonzero structure of successive linear system matrices
228:     */
229:     PetscCall(MatSetOption(C1, MAT_NEW_NONZERO_LOCATIONS, PETSC_TRUE));

231:     /*
232:        Compute right-hand-side vector
233:     */
234:     PetscCall(MatMult(C1, u, b1));

236:     /*
237:        Set operators. Here the matrix that defines the linear system
238:        also serves as the preconditioning matrix.
239:     */
240:     PetscCall(KSPSetOperators(ksp1, C1, C1));

242:     /*
243:        Use the previous solution of linear system #1 as the initial
244:        guess for the next solve of linear system #1.  The user MUST
245:        call KSPSetInitialGuessNonzero() in indicate use of an initial
246:        guess vector; otherwise, an initial guess of zero is used.
247:     */
248:     if (t > 0) PetscCall(KSPSetInitialGuessNonzero(ksp1, PETSC_TRUE));

250:     /*
251:        Solve the first linear system.  Here we explicitly call
252:        KSPSetUp() for more detailed performance monitoring of
253:        certain preconditioners, such as ICC and ILU.  This call
254:        is optional, ase KSPSetUp() will automatically be called
255:        within KSPSolve() if it hasn't been called already.
256:     */
257:     PetscCall(KSPSetUp(ksp1));
258:     PetscCall(KSPSolve(ksp1, b1, x1));
259:     PetscCall(KSPGetIterationNumber(ksp1, &its));

261:     /*
262:        Check error of solution to first linear system
263:     */
264:     PetscCall(CheckError(u, x1, b1, its, 1.e-4, CHECK_ERROR));

266:     /* - - - - - - - - - - - - Stage 2: - - - - - - - - - - - - - -
267:                  Assemble and solve second linear system
268:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

270:     /*
271:        Conclude profiling stage #1; begin profiling stage #2
272:     */
273:     PetscCall(PetscLogStagePop());
274:     PetscCall(PetscLogStagePush(stages[2]));

276:     /*
277:        Initialize all matrix entries to zero
278:     */
279:     if (t > 0) PetscCall(MatZeroEntries(C2));

281:     /*
282:        Assemble matrix in parallel. Also, log the number of flops
283:        for computing matrix entries.
284:         - To illustrate the features of parallel matrix assembly, we
285:           intentionally set the values differently from the way in
286:           which the matrix is distributed across the processors.  Each
287:           entry that is not owned locally will be sent to the appropriate
288:           processor during MatAssemblyBegin() and MatAssemblyEnd().
289:         - For best efficiency the user should strive to set as many
290:           entries locally as possible.
291:      */
292:     for (i = 0; i < m; i++) {
293:       for (j = 2 * rank; j < 2 * rank + 2; j++) {
294:         v  = -1.0;
295:         Ii = j + n * i;
296:         if (i > 0) {
297:           J = Ii - n;
298:           PetscCall(MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES));
299:         }
300:         if (i < m - 1) {
301:           J = Ii + n;
302:           PetscCall(MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES));
303:         }
304:         if (j > 0) {
305:           J = Ii - 1;
306:           PetscCall(MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES));
307:         }
308:         if (j < n - 1) {
309:           J = Ii + 1;
310:           PetscCall(MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES));
311:         }
312:         v = 6.0 + t * 0.5;
313:         PetscCall(MatSetValues(C2, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
314:       }
315:     }
316:     if (unsym) {
317:       for (Ii = Istart2; Ii < Iend2; Ii++) { /* Make matrix nonsymmetric */
318:         v = -1.0 * (t + 0.5);
319:         i = Ii / n;
320:         if (i > 0) {
321:           J = Ii - n;
322:           PetscCall(MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES));
323:         }
324:       }
325:     }
326:     PetscCall(MatAssemblyBegin(C2, MAT_FINAL_ASSEMBLY));
327:     PetscCall(MatAssemblyEnd(C2, MAT_FINAL_ASSEMBLY));
328:     PetscCall(PetscLogFlops(2.0 * (Iend - Istart)));

330:     /*
331:        Indicate same nonzero structure of successive linear system matrices
332:     */
333:     PetscCall(MatSetOption(C2, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE));

335:     /*
336:        Compute right-hand-side vector
337:     */
338:     PetscCall(MatMult(C2, u, b2));

340:     /*
341:        Set operators. Here the matrix that defines the linear system
342:        also serves as the preconditioning matrix.  Indicate same nonzero
343:        structure of successive preconditioner matrices by setting flag
344:        SAME_NONZERO_PATTERN.
345:     */
346:     PetscCall(KSPSetOperators(ksp2, C2, C2));

348:     /*
349:        Solve the second linear system
350:     */
351:     PetscCall(KSPSetUp(ksp2));
352:     PetscCall(KSPSolve(ksp2, b2, x2));
353:     PetscCall(KSPGetIterationNumber(ksp2, &its));

355:     /*
356:        Check error of solution to second linear system
357:     */
358:     PetscCall(CheckError(u, x2, b2, its, 1.e-4, CHECK_ERROR));

360:     /*
361:        Conclude profiling stage #2
362:     */
363:     PetscCall(PetscLogStagePop());
364:   }
365:   /* --------------------------------------------------------------
366:                        End of linear solver loop
367:      -------------------------------------------------------------- */

369:   /*
370:      Free work space.  All PETSc objects should be destroyed when they
371:      are no longer needed.
372:   */
373:   PetscCall(KSPDestroy(&ksp1));
374:   PetscCall(KSPDestroy(&ksp2));
375:   PetscCall(VecDestroy(&x1));
376:   PetscCall(VecDestroy(&x2));
377:   PetscCall(VecDestroy(&b1));
378:   PetscCall(VecDestroy(&b2));
379:   PetscCall(MatDestroy(&C1));
380:   PetscCall(MatDestroy(&C2));
381:   PetscCall(VecDestroy(&u));

383:   PetscCall(PetscFinalize());
384:   return 0;
385: }
386: /* ------------------------------------------------------------- */
387: /*
388:     CheckError - Checks the error of the solution.

390:     Input Parameters:
391:     u - exact solution
392:     x - approximate solution
393:     b - work vector
394:     its - number of iterations for convergence
395:     tol - tolerance
396:     CHECK_ERROR - the event number for error checking
397:                   (for use with profiling)

399:     Notes:
400:     In order to profile this section of code separately from the
401:     rest of the program, we register it as an "event" with
402:     PetscLogEventRegister() in the main program.  Then, we indicate
403:     the start and end of this event by respectively calling
404:         PetscLogEventBegin(CHECK_ERROR,u,x,b,0);
405:         PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
406:     Here, we specify the objects most closely associated with
407:     the event (the vectors u,x,b).  Such information is optional;
408:     we could instead just use 0 instead for all objects.
409: */
410: PetscErrorCode CheckError(Vec u, Vec x, Vec b, PetscInt its, PetscReal tol, PetscLogEvent CHECK_ERROR)
411: {
412:   PetscScalar none = -1.0;
413:   PetscReal   norm;

415:   PetscFunctionBeginUser;
416:   PetscCall(PetscLogEventBegin(CHECK_ERROR, u, x, b, 0));

418:   /*
419:      Compute error of the solution, using b as a work vector.
420:   */
421:   PetscCall(VecCopy(x, b));
422:   PetscCall(VecAXPY(b, none, u));
423:   PetscCall(VecNorm(b, NORM_2, &norm));
424:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, Iterations %" PetscInt_FMT "\n", (double)norm, its));
425:   PetscCall(PetscLogEventEnd(CHECK_ERROR, u, x, b, 0));
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }
428: /* ------------------------------------------------------------- */
429: /*
430:    MyKSPMonitor - This is a user-defined routine for monitoring
431:    the KSP iterative solvers.

433:    Input Parameters:
434:      ksp   - iterative context
435:      n     - iteration number
436:      rnorm - 2-norm (preconditioned) residual value (may be estimated)
437:      dummy - optional user-defined monitor context (unused here)
438: */
439: PetscErrorCode MyKSPMonitor(KSP ksp, PetscInt n, PetscReal rnorm, void *dummy)
440: {
441:   Vec x;

443:   PetscFunctionBeginUser;
444:   /*
445:      Build the solution vector
446:   */
447:   PetscCall(KSPBuildSolution(ksp, NULL, &x));

449:   /*
450:      Write the solution vector and residual norm to stdout.
451:       - PetscPrintf() handles output for multiprocessor jobs
452:         by printing from only one processor in the communicator.
453:       - The parallel viewer PETSC_VIEWER_STDOUT_WORLD handles
454:         data from multiple processors so that the output
455:         is not jumbled.
456:   */
457:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "iteration %" PetscInt_FMT " solution vector:\n", n));
458:   PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
459:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "iteration %" PetscInt_FMT " KSP Residual norm %14.12e \n", n, (double)rnorm));
460:   PetscFunctionReturn(PETSC_SUCCESS);
461: }

463: /*TEST

465:    test:
466:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -ksp_gmres_cgs_refinement_type refine_always -s2_ksp_type bcgs -s2_pc_type jacobi -s2_ksp_monitor_short

468:    test:
469:       requires: hpddm
470:       suffix: hpddm
471:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type {{gmres hpddm}} -s2_ksp_type {{gmres hpddm}} -s2_pc_type jacobi -s2_ksp_monitor_short

473:    test:
474:       requires: hpddm
475:       suffix: hpddm_2
476:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -s2_ksp_type hpddm -s2_ksp_hpddm_type gcrodr -s2_ksp_hpddm_recycle 10 -s2_pc_type jacobi -s2_ksp_monitor_short

478:    testset:
479:       requires: hpddm
480:       output_file: output/ex9_hpddm_cg.out
481:       args: -unsym 0 -t 2 -pc_type jacobi -ksp_monitor_short -s2_pc_type jacobi -s2_ksp_monitor_short -ksp_rtol 1.e-2 -s2_ksp_rtol 1.e-2
482:       test:
483:          suffix: hpddm_cg_p_p
484:          args: -ksp_type cg -s2_ksp_type cg
485:       test:
486:          suffix: hpddm_cg_p_h
487:          args: -ksp_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}
488:       test:
489:          suffix: hpddm_cg_h_h
490:          args: -ksp_type hpddm -ksp_hpddm_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}

492: TEST*/