Actual source code: ex19.c
2: static char help[] = "Solve a 2D 5-point stencil in parallel with Kokkos batched KSP and ASM solvers.\n\
3: Input parameters include:\n\
4: -n : number of mesh points in x direction\n\
5: -m : number of mesh points in y direction\n\
6: -num_local_blocks : number of local sub domains for block jacobi\n\n";
8: /*
9: Include "petscksp.h" so that we can use KSP solvers.
10: */
11: #include <petscksp.h>
12: #include <petscmat.h>
14: int main(int argc, char **args)
15: {
16: Vec x, b, u; /* approx solution, RHS, exact solution */
17: Mat A, Pmat; /* linear system matrix */
18: KSP ksp; /* linear solver context */
19: PetscReal norm; /* norm of solution error */
20: PetscInt i, j, Ii, J, Istart, Iend, n = 7, m = 8, its, nblocks = 2;
21: PetscBool flg;
22: PetscScalar v;
23: PetscMPIInt size, rank;
24: IS *is_loc = NULL;
25: PC pc;
27: PetscFunctionBeginUser;
28: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
29: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
30: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
31: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
32: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
33: PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_local_blocks", &nblocks, NULL));
34: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35: Compute the matrix and right-hand-side vector that define
36: the linear system, Ax = b.
37: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
38: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
39: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
40: PetscCall(MatSetFromOptions(A));
41: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
42: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 3, NULL));
43: // MatDuplicate keeps the zero
44: PetscCall(MatCreate(PETSC_COMM_WORLD, &Pmat));
45: PetscCall(MatSetSizes(Pmat, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
46: PetscCall(MatSetFromOptions(Pmat));
47: PetscCall(MatSeqAIJSetPreallocation(Pmat, 5, NULL));
48: PetscCall(MatMPIAIJSetPreallocation(Pmat, 5, NULL, 3, NULL));
49: /*
50: Currently, all PETSc parallel matrix formats are partitioned by
51: contiguous chunks of rows across the processors. Determine which
52: rows of the matrix are locally owned.
53: */
54: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
55: /*
56: Set matrix elements for the 2-D, five-point stencil.
57: */
58: for (Ii = Istart; Ii < Iend; Ii++) {
59: v = -1.0;
60: i = Ii / n;
61: j = Ii - i * n;
62: if (i > 0) {
63: J = Ii - n;
64: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
65: }
66: if (i < m - 1) {
67: J = Ii + n;
68: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
69: }
70: if (j > 0) {
71: J = Ii - 1;
72: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
73: }
74: if (j < n - 1) {
75: J = Ii + 1;
76: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
77: }
78: v = 4.0;
79: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
80: }
81: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
82: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
83: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
84: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
85: PetscCall(MatCreateVecs(A, &u, &b));
86: PetscCall(MatCreateVecs(A, &x, NULL));
87: /*
88: Set exact solution; then compute right-hand-side vector.
89: By default we use an exact solution of a vector with all
90: elements of 1.0;
91: */
92: PetscCall(VecSet(u, 1.0));
93: PetscCall(MatMult(A, u, b));
94: /*
95: View the exact solution vector if desired
96: */
97: flg = PETSC_FALSE;
98: PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
99: if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));
100: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
101: Setup ASM solver and batched KSP solver data
102: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
103: PetscCall(PCASMCreateSubdomains(A, nblocks, &is_loc));
104: {
105: MatScalar *AA;
106: PetscInt *AJ, maxcols = 0, ncols;
107: for (PetscInt row = Istart; row < Iend; row++) {
108: PetscCall(MatGetRow(A, row, &ncols, NULL, NULL));
109: if (ncols > maxcols) maxcols = ncols;
110: PetscCall(MatRestoreRow(A, row, &ncols, NULL, NULL));
111: }
112: PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
113: /* make explicit block matrix for batch solver */
114: //if (rank==1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] nblocks = %d\n", rank, nblocks));
115: for (PetscInt bid = 0; bid < nblocks; bid++) {
116: IS blk_is = is_loc[bid];
117: //if (rank==1) PetscCall(ISView(blk_is, PETSC_VIEWER_STDOUT_SELF));
118: const PetscInt *subdom, *cols;
119: PetscInt n, ncol_row, jj;
120: PetscCall(ISGetIndices(blk_is, &subdom));
121: PetscCall(ISGetSize(blk_is, &n));
122: //if (rank==1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "\t[%d] n[%d] = %d\n",rank,bid,n));
123: for (PetscInt ii = 0; ii < n; ii++) {
124: const MatScalar *vals;
125: //if (rank==1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "\t\t[%d] subdom[%d] = %d\n",rank,ii,subdom[ii]));
126: PetscInt rowB = subdom[ii]; // global
127: PetscCall(MatGetRow(A, rowB, &ncols, &cols, &vals));
128: for (jj = ncol_row = 0; jj < ncols; jj++) {
129: PetscInt idx, colj = cols[jj];
130: PetscCall(ISLocate(blk_is, colj, &idx));
131: if (idx >= 0) {
132: AJ[ncol_row] = cols[jj];
133: AA[ncol_row] = vals[jj];
134: ncol_row++;
135: }
136: }
137: PetscCall(MatRestoreRow(A, rowB, &ncols, &cols, &vals));
138: PetscCall(MatSetValues(Pmat, 1, &rowB, ncol_row, AJ, AA, INSERT_VALUES));
139: }
140: PetscCall(ISRestoreIndices(blk_is, &subdom));
141: }
142: PetscCall(PetscFree2(AA, AJ));
143: PetscCall(MatAssemblyBegin(Pmat, MAT_FINAL_ASSEMBLY));
144: PetscCall(MatAssemblyEnd(Pmat, MAT_FINAL_ASSEMBLY));
145: PetscCall(MatViewFromOptions(Pmat, NULL, "-view_c"));
146: }
147: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
148: Create the linear solver and set various options
149: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
150: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
151: PetscCall(KSPSetOperators(ksp, A, Pmat));
152: PetscCall(KSPSetFromOptions(ksp));
153: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
154: Setup ASM solver
155: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
156: PetscCall(KSPGetPC(ksp, &pc));
157: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
158: if (flg && nblocks > 0) { PetscCall(PCASMSetLocalSubdomains(pc, nblocks, is_loc, NULL)); }
159: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
160: Solve the linear system
161: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
162: PetscCall(KSPSolve(ksp, b, x));
163: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164: Check the solution and clean up
165: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
166: PetscCall(VecAXPY(x, -1.0, u));
167: PetscCall(VecNorm(x, NORM_2, &norm));
168: PetscCall(KSPGetIterationNumber(ksp, &its));
169: /*
170: Print convergence information.
171: */
172: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));
173: /*
174: cleanup
175: */
176: PetscCall(KSPDestroy(&ksp));
177: if (is_loc) {
178: if (0) {
179: for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(ISDestroy(&is_loc[bid]));
180: PetscCall(PetscFree(is_loc));
181: } else {
182: PetscCall(PCASMDestroySubdomains(nblocks, is_loc, NULL));
183: }
184: }
185: PetscCall(VecDestroy(&u));
186: PetscCall(VecDestroy(&x));
187: PetscCall(VecDestroy(&b));
188: PetscCall(MatDestroy(&A));
189: PetscCall(MatDestroy(&Pmat));
190: PetscCall(PetscFinalize());
191: return 0;
192: }
194: /*TEST
195: build:
196: requires: parmetis kokkos_kernels
197: testset:
198: args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 4
199: nsize: 4
200: output_file: output/ex19_0.out
201: test:
202: suffix: batch
203: args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_rtol 1e-1 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
204: test:
205: suffix: asm
206: args: -ksp_type cg -pc_type asm -sub_pc_type jacobi -sub_ksp_type tfqmr -sub_ksp_rtol 1e-3
208: TEST*/