Actual source code: performance.c
1: static char help[] = "Time vector operations on GPU\n";
2: /* This program produces the results for Argonne Technical Report ANL-19/41.
3: The technical report and resources for generating data can be found in the
4: repository: https://gitlab.com/hannah_mairs/summit-performance */
6: #include <petscvec.h>
8: int main(int argc, char **argv)
9: {
10: Vec v, w, x;
11: PetscInt n = 15;
12: PetscScalar val;
13: PetscReal norm1, norm2;
14: PetscRandom rctx;
15: #if defined(PETSC_USE_LOG)
16: PetscLogStage stage;
17: #endif
19: PetscFunctionBeginUser;
20: PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
21: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
22: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
23: PetscCall(PetscRandomSetFromOptions(rctx));
24: PetscCall(VecCreate(PETSC_COMM_WORLD, &v));
25: PetscCall(VecSetSizes(v, PETSC_DECIDE, n));
26: PetscCall(VecSetFromOptions(v));
27: PetscCall(VecDuplicate(v, &w));
28: PetscCall(VecSetRandom(v, rctx));
29: PetscCall(VecSetRandom(w, rctx));
31: /* create dummy vector to clear cache */
32: PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
33: PetscCall(VecSetSizes(x, PETSC_DECIDE, 10000000));
34: PetscCall(VecSetFromOptions(x));
35: PetscCall(VecSetRandom(x, rctx));
37: /* send v to GPU */
38: PetscCall(PetscBarrier(NULL));
39: PetscCall(VecNorm(v, NORM_1, &norm1));
41: /* register a stage work on GPU */
42: PetscCall(PetscLogStageRegister("Work on GPU", &stage));
43: PetscCall(PetscLogStagePush(stage));
44: PetscCall(VecNorm(w, NORM_1, &norm1)); /* send w to GPU */
45: PetscCall(VecNorm(x, NORM_1, &norm1)); /* clear cache */
46: PetscCall(PetscBarrier(NULL));
47: PetscCall(VecAXPY(w, 1.0, v));
48: PetscCall(VecNorm(x, NORM_INFINITY, &norm1));
49: PetscCall(PetscBarrier(NULL));
50: PetscCall(VecDot(w, v, &val));
51: PetscCall(VecNorm(x, NORM_1, &norm1));
52: PetscCall(PetscBarrier(NULL));
53: PetscCall(VecSet(v, 0.0));
54: PetscCall(VecNorm(x, NORM_2, &norm2));
55: PetscCall(PetscBarrier(NULL));
56: PetscCall(VecCopy(v, w));
57: PetscCall(PetscLogStagePop());
59: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test completed successfully!\n"));
60: PetscCall(VecDestroy(&v));
61: PetscCall(VecDestroy(&w));
62: PetscCall(VecDestroy(&x));
63: PetscCall(PetscRandomDestroy(&rctx));
64: PetscCall(PetscFinalize());
65: return 0;
66: }
68: /*TEST
70: testset:
71: nsize: 2
72: output_file: output/performance_cuda.out
74: test:
75: suffix: cuda
76: args: -vec_type mpicuda
77: requires: cuda
79: test:
80: suffix: hip
81: args: -vec_type mpihip
82: requires: hip
84: TEST*/