Actual source code: performance.c

  1: static char help[] = "Time vector operations on GPU\n";
  2: /* This program produces the results for Argonne Technical Report ANL-19/41.
  3:    The technical report and resources for generating data can be found in the
  4:    repository:  https://gitlab.com/hannah_mairs/summit-performance */

  6: #include <petscvec.h>

  8: int main(int argc, char **argv)
  9: {
 10:   Vec         v, w, x;
 11:   PetscInt    n = 15;
 12:   PetscScalar val;
 13:   PetscReal   norm1, norm2;
 14:   PetscRandom rctx;
 15: #if defined(PETSC_USE_LOG)
 16:   PetscLogStage stage;
 17: #endif

 19:   PetscFunctionBeginUser;
 20:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 21:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 22:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
 23:   PetscCall(PetscRandomSetFromOptions(rctx));
 24:   PetscCall(VecCreate(PETSC_COMM_WORLD, &v));
 25:   PetscCall(VecSetSizes(v, PETSC_DECIDE, n));
 26:   PetscCall(VecSetFromOptions(v));
 27:   PetscCall(VecDuplicate(v, &w));
 28:   PetscCall(VecSetRandom(v, rctx));
 29:   PetscCall(VecSetRandom(w, rctx));

 31:   /* create dummy vector to clear cache */
 32:   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
 33:   PetscCall(VecSetSizes(x, PETSC_DECIDE, 10000000));
 34:   PetscCall(VecSetFromOptions(x));
 35:   PetscCall(VecSetRandom(x, rctx));

 37:   /* send v to GPU */
 38:   PetscCall(PetscBarrier(NULL));
 39:   PetscCall(VecNorm(v, NORM_1, &norm1));

 41:   /* register a stage work on GPU */
 42:   PetscCall(PetscLogStageRegister("Work on GPU", &stage));
 43:   PetscCall(PetscLogStagePush(stage));
 44:   PetscCall(VecNorm(w, NORM_1, &norm1)); /* send w to GPU */
 45:   PetscCall(VecNorm(x, NORM_1, &norm1)); /* clear cache */
 46:   PetscCall(PetscBarrier(NULL));
 47:   PetscCall(VecAXPY(w, 1.0, v));
 48:   PetscCall(VecNorm(x, NORM_INFINITY, &norm1));
 49:   PetscCall(PetscBarrier(NULL));
 50:   PetscCall(VecDot(w, v, &val));
 51:   PetscCall(VecNorm(x, NORM_1, &norm1));
 52:   PetscCall(PetscBarrier(NULL));
 53:   PetscCall(VecSet(v, 0.0));
 54:   PetscCall(VecNorm(x, NORM_2, &norm2));
 55:   PetscCall(PetscBarrier(NULL));
 56:   PetscCall(VecCopy(v, w));
 57:   PetscCall(PetscLogStagePop());

 59:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test completed successfully!\n"));
 60:   PetscCall(VecDestroy(&v));
 61:   PetscCall(VecDestroy(&w));
 62:   PetscCall(VecDestroy(&x));
 63:   PetscCall(PetscRandomDestroy(&rctx));
 64:   PetscCall(PetscFinalize());
 65:   return 0;
 66: }

 68: /*TEST

 70:    testset:
 71:       nsize: 2
 72:       output_file: output/performance_cuda.out

 74:       test:
 75:         suffix: cuda
 76:         args: -vec_type mpicuda
 77:         requires: cuda

 79:       test:
 80:         suffix: hip
 81:         args: -vec_type mpihip
 82:         requires: hip

 84: TEST*/