Actual source code: ex96.c


  2: static char help[] = "Tests sequential and parallel DMCreateMatrix(), MatMatMult() and MatPtAP()\n\
  3:   -Mx <xg>, where <xg> = number of coarse grid points in the x-direction\n\
  4:   -My <yg>, where <yg> = number of coarse grid points in the y-direction\n\
  5:   -Mz <zg>, where <zg> = number of coarse grid points in the z-direction\n\
  6:   -Npx <npx>, where <npx> = number of processors in the x-direction\n\
  7:   -Npy <npy>, where <npy> = number of processors in the y-direction\n\
  8:   -Npz <npz>, where <npz> = number of processors in the z-direction\n\n";

 10: /*
 11:     This test is modified from ~src/ksp/tests/ex19.c.
 12:     Example of usage: mpiexec -n 3 ./ex96 -Mx 10 -My 10 -Mz 10
 13: */

 15: #include <petscdm.h>
 16: #include <petscdmda.h>

 18: /* User-defined application contexts */
 19: typedef struct {
 20:   PetscInt mx, my, mz;     /* number grid points in x, y and z direction */
 21:   Vec      localX, localF; /* local vectors with ghost region */
 22:   DM       da;
 23:   Vec      x, b, r; /* global vectors */
 24:   Mat      J;       /* Jacobian on grid */
 25: } GridCtx;
 26: typedef struct {
 27:   GridCtx  fine;
 28:   GridCtx  coarse;
 29:   PetscInt ratio;
 30:   Mat      Ii; /* interpolation from coarse to fine */
 31: } AppCtx;

 33: #define COARSE_LEVEL 0
 34: #define FINE_LEVEL   1

 36: /*
 37:       Mm_ratio - ration of grid lines between fine and coarse grids.
 38: */
 39: int main(int argc, char **argv)
 40: {
 41:   AppCtx          user;
 42:   PetscInt        Npx = PETSC_DECIDE, Npy = PETSC_DECIDE, Npz = PETSC_DECIDE;
 43:   PetscMPIInt     size, rank;
 44:   PetscInt        m, n, M, N, i, nrows;
 45:   PetscScalar     one  = 1.0;
 46:   PetscReal       fill = 2.0;
 47:   Mat             A, A_tmp, P, C, C1, C2;
 48:   PetscScalar    *array, none = -1.0, alpha;
 49:   Vec             x, v1, v2, v3, v4;
 50:   PetscReal       norm, norm_tmp, norm_tmp1, tol = 100. * PETSC_MACHINE_EPSILON;
 51:   PetscRandom     rdm;
 52:   PetscBool       Test_MatMatMult = PETSC_TRUE, Test_MatPtAP = PETSC_TRUE, Test_3D = PETSC_TRUE, flg;
 53:   const PetscInt *ia, *ja;

 55:   PetscFunctionBeginUser;
 56:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 57:   PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL));

 59:   user.ratio     = 2;
 60:   user.coarse.mx = 20;
 61:   user.coarse.my = 20;
 62:   user.coarse.mz = 20;

 64:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Mx", &user.coarse.mx, NULL));
 65:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-My", &user.coarse.my, NULL));
 66:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Mz", &user.coarse.mz, NULL));
 67:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-ratio", &user.ratio, NULL));

 69:   if (user.coarse.mz) Test_3D = PETSC_TRUE;

 71:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 72:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 73:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Npx", &Npx, NULL));
 74:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Npy", &Npy, NULL));
 75:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Npz", &Npz, NULL));

 77:   /* Set up distributed array for fine grid */
 78:   if (!Test_3D) {
 79:     PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, user.coarse.mx, user.coarse.my, Npx, Npy, 1, 1, NULL, NULL, &user.coarse.da));
 80:   } else {
 81:     PetscCall(DMDACreate3d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, user.coarse.mx, user.coarse.my, user.coarse.mz, Npx, Npy, Npz, 1, 1, NULL, NULL, NULL, &user.coarse.da));
 82:   }
 83:   PetscCall(DMSetFromOptions(user.coarse.da));
 84:   PetscCall(DMSetUp(user.coarse.da));

 86:   /* This makes sure the coarse DMDA has the same partition as the fine DMDA */
 87:   PetscCall(DMRefine(user.coarse.da, PetscObjectComm((PetscObject)user.coarse.da), &user.fine.da));

 89:   /* Test DMCreateMatrix()                                         */
 90:   /*------------------------------------------------------------*/
 91:   PetscCall(DMSetMatType(user.fine.da, MATAIJ));
 92:   PetscCall(DMCreateMatrix(user.fine.da, &A));
 93:   PetscCall(DMSetMatType(user.fine.da, MATBAIJ));
 94:   PetscCall(DMCreateMatrix(user.fine.da, &C));

 96:   PetscCall(MatConvert(C, MATAIJ, MAT_INITIAL_MATRIX, &A_tmp)); /* not work for mpisbaij matrix! */
 97:   PetscCall(MatEqual(A, A_tmp, &flg));
 98:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "A != C");
 99:   PetscCall(MatDestroy(&C));
100:   PetscCall(MatDestroy(&A_tmp));

102:   /*------------------------------------------------------------*/

104:   PetscCall(MatGetLocalSize(A, &m, &n));
105:   PetscCall(MatGetSize(A, &M, &N));
106:   /* if (rank == 0) printf("A %d, %d\n",M,N); */

108:   /* set val=one to A */
109:   if (size == 1) {
110:     PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &nrows, &ia, &ja, &flg));
111:     if (flg) {
112:       PetscCall(MatSeqAIJGetArray(A, &array));
113:       for (i = 0; i < ia[nrows]; i++) array[i] = one;
114:       PetscCall(MatSeqAIJRestoreArray(A, &array));
115:     }
116:     PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &nrows, &ia, &ja, &flg));
117:   } else {
118:     Mat AA, AB;
119:     PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, NULL));
120:     PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nrows, &ia, &ja, &flg));
121:     if (flg) {
122:       PetscCall(MatSeqAIJGetArray(AA, &array));
123:       for (i = 0; i < ia[nrows]; i++) array[i] = one;
124:       PetscCall(MatSeqAIJRestoreArray(AA, &array));
125:     }
126:     PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nrows, &ia, &ja, &flg));
127:     PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nrows, &ia, &ja, &flg));
128:     if (flg) {
129:       PetscCall(MatSeqAIJGetArray(AB, &array));
130:       for (i = 0; i < ia[nrows]; i++) array[i] = one;
131:       PetscCall(MatSeqAIJRestoreArray(AB, &array));
132:     }
133:     PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nrows, &ia, &ja, &flg));
134:   }
135:   /* PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); */

137:   /* Create interpolation between the fine and coarse grids */
138:   PetscCall(DMCreateInterpolation(user.coarse.da, user.fine.da, &P, NULL));
139:   PetscCall(MatGetLocalSize(P, &m, &n));
140:   PetscCall(MatGetSize(P, &M, &N));
141:   /* if (rank == 0) printf("P %d, %d\n",M,N); */

143:   /* Create vectors v1 and v2 that are compatible with A */
144:   PetscCall(VecCreate(PETSC_COMM_WORLD, &v1));
145:   PetscCall(MatGetLocalSize(A, &m, NULL));
146:   PetscCall(VecSetSizes(v1, m, PETSC_DECIDE));
147:   PetscCall(VecSetFromOptions(v1));
148:   PetscCall(VecDuplicate(v1, &v2));
149:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm));
150:   PetscCall(PetscRandomSetFromOptions(rdm));

152:   /* Test MatMatMult(): C = A*P */
153:   /*----------------------------*/
154:   if (Test_MatMatMult) {
155:     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A_tmp));
156:     PetscCall(MatMatMult(A_tmp, P, MAT_INITIAL_MATRIX, fill, &C));

158:     /* Test MAT_REUSE_MATRIX - reuse symbolic C */
159:     alpha = 1.0;
160:     for (i = 0; i < 2; i++) {
161:       alpha -= 0.1;
162:       PetscCall(MatScale(A_tmp, alpha));
163:       PetscCall(MatMatMult(A_tmp, P, MAT_REUSE_MATRIX, fill, &C));
164:     }
165:     /* Free intermediate data structures created for reuse of C=Pt*A*P */
166:     PetscCall(MatProductClear(C));

168:     /* Test MatDuplicate()        */
169:     /*----------------------------*/
170:     PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &C1));
171:     PetscCall(MatDuplicate(C1, MAT_COPY_VALUES, &C2));
172:     PetscCall(MatDestroy(&C1));
173:     PetscCall(MatDestroy(&C2));

175:     /* Create vector x that is compatible with P */
176:     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
177:     PetscCall(MatGetLocalSize(P, NULL, &n));
178:     PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
179:     PetscCall(VecSetFromOptions(x));

181:     norm = 0.0;
182:     for (i = 0; i < 10; i++) {
183:       PetscCall(VecSetRandom(x, rdm));
184:       PetscCall(MatMult(P, x, v1));
185:       PetscCall(MatMult(A_tmp, v1, v2)); /* v2 = A*P*x */
186:       PetscCall(MatMult(C, x, v1));      /* v1 = C*x   */
187:       PetscCall(VecAXPY(v1, none, v2));
188:       PetscCall(VecNorm(v1, NORM_1, &norm_tmp));
189:       PetscCall(VecNorm(v2, NORM_1, &norm_tmp1));
190:       norm_tmp /= norm_tmp1;
191:       if (norm_tmp > norm) norm = norm_tmp;
192:     }
193:     if (norm >= tol && rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Error: MatMatMult(), |v1 - v2|/|v2|: %g\n", (double)norm));

195:     PetscCall(VecDestroy(&x));
196:     PetscCall(MatDestroy(&C));
197:     PetscCall(MatDestroy(&A_tmp));
198:   }

200:   /* Test P^T * A * P - MatPtAP() */
201:   /*------------------------------*/
202:   if (Test_MatPtAP) {
203:     PetscCall(MatPtAP(A, P, MAT_INITIAL_MATRIX, fill, &C));
204:     PetscCall(MatGetLocalSize(C, &m, &n));

206:     /* Test MAT_REUSE_MATRIX - reuse symbolic C */
207:     alpha = 1.0;
208:     for (i = 0; i < 1; i++) {
209:       alpha -= 0.1;
210:       PetscCall(MatScale(A, alpha));
211:       PetscCall(MatPtAP(A, P, MAT_REUSE_MATRIX, fill, &C));
212:     }

214:     /* Free intermediate data structures created for reuse of C=Pt*A*P */
215:     PetscCall(MatProductClear(C));

217:     /* Test MatDuplicate()        */
218:     /*----------------------------*/
219:     PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &C1));
220:     PetscCall(MatDuplicate(C1, MAT_COPY_VALUES, &C2));
221:     PetscCall(MatDestroy(&C1));
222:     PetscCall(MatDestroy(&C2));

224:     /* Create vector x that is compatible with P */
225:     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
226:     PetscCall(MatGetLocalSize(P, &m, &n));
227:     PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
228:     PetscCall(VecSetFromOptions(x));

230:     PetscCall(VecCreate(PETSC_COMM_WORLD, &v3));
231:     PetscCall(VecSetSizes(v3, n, PETSC_DECIDE));
232:     PetscCall(VecSetFromOptions(v3));
233:     PetscCall(VecDuplicate(v3, &v4));

235:     norm = 0.0;
236:     for (i = 0; i < 10; i++) {
237:       PetscCall(VecSetRandom(x, rdm));
238:       PetscCall(MatMult(P, x, v1));
239:       PetscCall(MatMult(A, v1, v2)); /* v2 = A*P*x */

241:       PetscCall(MatMultTranspose(P, v2, v3)); /* v3 = Pt*A*P*x */
242:       PetscCall(MatMult(C, x, v4));           /* v3 = C*x   */
243:       PetscCall(VecAXPY(v4, none, v3));
244:       PetscCall(VecNorm(v4, NORM_1, &norm_tmp));
245:       PetscCall(VecNorm(v3, NORM_1, &norm_tmp1));

247:       norm_tmp /= norm_tmp1;
248:       if (norm_tmp > norm) norm = norm_tmp;
249:     }
250:     if (norm >= tol && rank == 0) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Error: MatPtAP(), |v3 - v4|/|v3|: %g\n", (double)norm));
251:     PetscCall(MatDestroy(&C));
252:     PetscCall(VecDestroy(&v3));
253:     PetscCall(VecDestroy(&v4));
254:     PetscCall(VecDestroy(&x));
255:   }

257:   /* Clean up */
258:   PetscCall(MatDestroy(&A));
259:   PetscCall(PetscRandomDestroy(&rdm));
260:   PetscCall(VecDestroy(&v1));
261:   PetscCall(VecDestroy(&v2));
262:   PetscCall(DMDestroy(&user.fine.da));
263:   PetscCall(DMDestroy(&user.coarse.da));
264:   PetscCall(MatDestroy(&P));
265:   PetscCall(PetscFinalize());
266:   return 0;
267: }

269: /*TEST

271:    test:
272:       args: -Mx 10 -My 5 -Mz 10
273:       output_file: output/ex96_1.out

275:    test:
276:       suffix: nonscalable
277:       nsize: 3
278:       args: -Mx 10 -My 5 -Mz 10
279:       output_file: output/ex96_1.out

281:    test:
282:       suffix: scalable
283:       nsize: 3
284:       args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable
285:       output_file: output/ex96_1.out

287:    test:
288:      suffix: seq_scalable
289:      nsize: 3
290:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm scalable -inner_offdiag_mat_product_algorithm scalable
291:      output_file: output/ex96_1.out

293:    test:
294:      suffix: seq_sorted
295:      nsize: 3
296:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm sorted -inner_offdiag_mat_product_algorithm sorted
297:      output_file: output/ex96_1.out

299:    test:
300:      suffix: seq_scalable_fast
301:      nsize: 3
302:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm scalable_fast -inner_offdiag_mat_product_algorithm scalable_fast
303:      output_file: output/ex96_1.out

305:    test:
306:      suffix: seq_heap
307:      nsize: 3
308:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm heap -inner_offdiag_mat_product_algorithm heap
309:      output_file: output/ex96_1.out

311:    test:
312:      suffix: seq_btheap
313:      nsize: 3
314:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm btheap -inner_offdiag_mat_product_algorithm btheap
315:      output_file: output/ex96_1.out

317:    test:
318:      suffix: seq_llcondensed
319:      nsize: 3
320:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm llcondensed -inner_offdiag_mat_product_algorithm llcondensed
321:      output_file: output/ex96_1.out

323:    test:
324:      suffix: seq_rowmerge
325:      nsize: 3
326:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via scalable -inner_diag_mat_product_algorithm rowmerge -inner_offdiag_mat_product_algorithm rowmerge
327:      output_file: output/ex96_1.out

329:    test:
330:      suffix: allatonce
331:      nsize: 3
332:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via allatonce
333:      output_file: output/ex96_1.out

335:    test:
336:      suffix: allatonce_merged
337:      nsize: 3
338:      args: -Mx 10 -My 5 -Mz 10 -matmatmult_via scalable -matptap_via allatonce_merged
339:      output_file: output/ex96_1.out

341: TEST*/