Actual source code: ex1hip.hip.cpp

  1: static char help[] = "Benchmarking HIP kernel launch time\n";
  2: /*
  3:   Running example on Crusher at OLCF:
  4:   # run with 1 mpi rank (-n1), 32 CPUs (-c32), and map the process to CPU 0 and GPU 0
  5:   $ srun -n1 -c32 --cpu-bind=map_cpu:0 --gpus-per-node=8 --gpu-bind=map_gpu:0 ./ex1hip
  6:   Average asynchronous HIP kernel launch time = 1.34 microseconds
  7:   Average synchronous  HIP kernel launch time = 6.66 microseconds
  8: */
  9: #include <petscsys.h>
 10: #include <petscdevice_hip.h>

 12: __global__ void NullKernel() { }

 14: int main(int argc, char **argv)
 15: {
 16:   PetscInt       i, n = 100000;
 17:   PetscLogDouble tstart, tend, time;

 19:   PetscFunctionBeginUser;
 20:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 21:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 22:   PetscCallHIP(hipStreamSynchronize(NULL)); /* Initialize HIP runtime to get more accurate timing below */

 24:   /* Launch a sequence of kernels asynchronously. Previous launched kernels do not need to be completed before launching a new one */
 25:   PetscCall(PetscTime(&tstart));
 26:   for (i = 0; i < n; i++) hipLaunchKernelGGL(NullKernel, dim3(1), dim3(1), 0, NULL);
 27:   PetscCall(PetscTime(&tend));
 28:   PetscCallHIP(hipStreamSynchronize(NULL)); /* Sync after tend since we don't want to count kernel execution time */
 29:   time = (tend - tstart) * 1e6 / n;
 30:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average asynchronous HIP kernel launch time = %.2f microseconds\n", time));

 32:   /* Launch a sequence of kernels synchronously. Only launch a new kernel after the one before it has been completed */
 33:   PetscCall(PetscTime(&tstart));
 34:   for (i = 0; i < n; i++) {
 35:     hipLaunchKernelGGL(NullKernel, dim3(1), dim3(1), 0, NULL);
 36:     PetscCallHIP(hipStreamSynchronize(NULL));
 37:   }
 38:   PetscCall(PetscTime(&tend));
 39:   time = (tend - tstart) * 1e6 / n;
 40:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average synchronous  HIP kernel launch time = %.2f microseconds\n", time));

 42:   PetscCall(PetscFinalize());
 43:   return 0;
 44: }

 46: /*TEST
 47:   build:
 48:     requires: hip

 50:   test:
 51:     requires: hip
 52:     args: -n 2
 53:     output_file: output/empty.out
 54:     filter: grep "DOES_NOT_EXIST"

 56: TEST*/