Actual source code: ex236.c
1: static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n";
3: #include <petscmat.h>
5: int main(int argc, char **argv)
6: {
7: PetscMPIInt rank, size;
8: Mat A;
9: PetscInt i, j, k, n = 3, vstart, rstart, rend, margin;
10: Vec x, y;
12: PetscFunctionBeginUser;
13: PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
14: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
15: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
17: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
18: PetscCall(MatSetSizes(A, n, n, PETSC_DECIDE, PETSC_DECIDE));
19: PetscCall(MatSetFromOptions(A));
21: PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, 0, NULL));
22: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
23: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
24: PetscCall(MatCreateVecs(A, &x, &y));
25: PetscCall(VecSet(x, 1.0));
27: /*
28: Matrix A only has nonzeros in the diagonal block, which is of size 3x3.
29: We do three successive assemblies on A. The first two have the same non-zero
30: pattern but different values, and the third breaks the non-zero pattern. The
31: first two assemblies have enough zero-rows that triggers compressed-row storage
32: in MATAIJ and MATAIJCUSPARSE.
34: These settings are used to test memory management and correctness in MatMult
35: and MatMultTransposeAdd.
36: */
38: for (k = 0; k < 3; k++) { /* Three assemblies */
39: vstart = (size * k + rank) * n * n + 1;
40: margin = (k == 2) ? 0 : 2; /* Create two zero-rows in the first two assemblies */
41: for (i = rstart; i < rend - margin; i++) {
42: for (j = rstart; j < rend; j++) {
43: PetscCall(MatSetValue(A, i, j, (PetscScalar)vstart, INSERT_VALUES));
44: vstart++;
45: }
46: }
47: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
48: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
49: PetscCall(MatMult(A, x, y));
50: PetscCall(MatMultTransposeAdd(A, x, y, y)); /* y[i] = sum of row i and column i of A */
51: PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD));
52: }
54: PetscCall(MatDestroy(&A));
55: PetscCall(VecDestroy(&x));
56: PetscCall(VecDestroy(&y));
57: PetscCall(PetscFinalize());
59: /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */
60: /*cudaDeviceReset();*/
61: return 0;
62: }
64: /*TEST
66: testset:
67: nsize: 2
68: output_file: output/ex236_1.out
69: filter: grep -v type
71: test:
72: args: -mat_type aij
74: test:
75: requires: cuda
76: suffix: cuda
77: args: -mat_type aijcusparse
78: TEST*/