Actual source code: ex5k.kokkos.cxx
1: static char help[] = "Test of Kokkos matrix assemble with 1D Laplacian. Kokkos version of ex5cu \n\n";
3: #include <petscconf.h>
4: #include <petscmat.h>
6: /*
7: Include Kokkos files
8: */
9: #include <Kokkos_Core.hpp>
10: #include <Kokkos_OffsetView.hpp>
12: #include <petscaijdevice.h>
14: int main(int argc, char **argv)
15: {
16: Mat A;
17: PetscInt N = 11, nz = 3, Istart, Iend, num_threads = 128;
18: PetscSplitCSRDataStructure d_mat;
19: PetscLogEvent event;
20: Vec x, y;
21: PetscMPIInt rank;
23: PetscFunctionBeginUser;
24: PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
25: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
26: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
27: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nz_row", &nz, NULL)); // for debugging, will be wrong if nz<3
28: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
29: PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_threads", &num_threads, NULL));
30: if (nz > N + 1) {
31: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "warning decreasing nz\n"));
32: nz = N + 1;
33: }
34: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
36: PetscCall(PetscLogEventRegister("GPU operator", MAT_CLASSID, &event));
37: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
38: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N));
39: PetscCall(MatSetType(A, MATAIJKOKKOS));
40: PetscCall(MatSeqAIJSetPreallocation(A, nz, NULL));
41: PetscCall(MatMPIAIJSetPreallocation(A, nz, NULL, nz - 1, NULL));
42: PetscCall(MatSetFromOptions(A));
43: PetscCall(MatSetOption(A, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE));
44: PetscCall(MatCreateVecs(A, &x, &y));
45: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
47: // assemble end on CPU. We are not assembling redundant here, and ignoring off proc entries, but we could
48: for (int i = Istart; i < Iend + 1; i++) {
49: PetscScalar values[] = {1, 1, 1, 1};
50: PetscInt js[] = {i - 1, i}, nn = (i == N) ? 1 : 2; // negative indices are ignored but >= N are not, so clip end
51: PetscCall(MatSetValues(A, nn, js, nn, js, values, ADD_VALUES));
52: }
53: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
54: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
56: // test Kokkos
57: PetscCall(VecSet(x, 1.0));
58: PetscCall(MatMult(A, x, y));
59: PetscCall(VecViewFromOptions(y, NULL, "-ex5_vec_view"));
61: // assemble on GPU
62: if (Iend < N) Iend++; // elements, ignore off processor entries so do redundant
63: PetscCall(PetscLogEventBegin(event, 0, 0, 0, 0));
64: PetscCall(MatKokkosGetDeviceMatWrite(A, &d_mat));
65: Kokkos::fence();
66: Kokkos::parallel_for(
67: Kokkos::RangePolicy<>(Istart, Iend + 1), KOKKOS_LAMBDA(int i) {
68: PetscScalar values[] = {1, 1, 1, 1};
69: PetscInt js[] = {i - 1, i}, nn = (i == N) ? 1 : 2;
70: static_cast<void>(MatSetValuesDevice(d_mat, nn, js, nn, js, values, ADD_VALUES));
71: });
72: Kokkos::fence();
73: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
74: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
76: PetscCall(VecSet(x, 1.0));
77: PetscCall(MatMult(A, x, y));
78: PetscCall(VecViewFromOptions(y, NULL, "-ex5_vec_view"));
79: PetscCall(PetscLogEventEnd(event, 0, 0, 0, 0));
81: PetscCall(MatDestroy(&A));
82: PetscCall(VecDestroy(&x));
83: PetscCall(VecDestroy(&y));
84: #else
85: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_COR, "Kokkos kernels required");
86: #endif
87: PetscCall(PetscFinalize());
88: return 0;
89: }
91: /*TEST
93: build:
94: requires: kokkos_kernels !defined(PETSC_HAVE_CUDA_CLANG)
96: test:
97: suffix: 0
98: requires: kokkos_kernels double !complex !single
99: args: -n 11 -ex5_vec_view
100: nsize: 2
102: TEST*/