Actual source code: ex1hip.hip.cpp
1: static char help[] = "Benchmarking HIP kernel launch time\n";
2: /*
3: Running example on Crusher at OLCF:
4: # run with 1 mpi rank (-n1), 32 CPUs (-c32), and map the process to CPU 0 and GPU 0
5: $ srun -n1 -c32 --cpu-bind=map_cpu:0 --gpus-per-node=8 --gpu-bind=map_gpu:0 ./ex1hip
6: Average asynchronous HIP kernel launch time = 1.34 microseconds
7: Average synchronous HIP kernel launch time = 6.66 microseconds
8: */
9: #include <petscsys.h>
10: #include <petscdevice_hip.h>
12: __global__ void NullKernel() { }
14: int main(int argc, char **argv)
15: {
16: PetscInt i, n = 100000;
17: PetscLogDouble tstart, tend, time;
19: PetscFunctionBeginUser;
20: PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
21: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
22: PetscCallHIP(hipStreamSynchronize(NULL)); /* Initialize HIP runtime to get more accurate timing below */
24: /* Launch a sequence of kernels asynchronously. Previous launched kernels do not need to be completed before launching a new one */
25: PetscCall(PetscTime(&tstart));
26: for (i = 0; i < n; i++) hipLaunchKernelGGL(NullKernel, dim3(1), dim3(1), 0, NULL);
27: PetscCall(PetscTime(&tend));
28: PetscCallHIP(hipStreamSynchronize(NULL)); /* Sync after tend since we don't want to count kernel execution time */
29: time = (tend - tstart) * 1e6 / n;
30: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average asynchronous HIP kernel launch time = %.2f microseconds\n", time));
32: /* Launch a sequence of kernels synchronously. Only launch a new kernel after the one before it has been completed */
33: PetscCall(PetscTime(&tstart));
34: for (i = 0; i < n; i++) {
35: hipLaunchKernelGGL(NullKernel, dim3(1), dim3(1), 0, NULL);
36: PetscCallHIP(hipStreamSynchronize(NULL));
37: }
38: PetscCall(PetscTime(&tend));
39: time = (tend - tstart) * 1e6 / n;
40: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average synchronous HIP kernel launch time = %.2f microseconds\n", time));
42: PetscCall(PetscFinalize());
43: return 0;
44: }
46: /*TEST
47: build:
48: requires: hip
50: test:
51: requires: hip
52: args: -n 2
53: output_file: output/empty.out
54: filter: grep "DOES_NOT_EXIST"
56: TEST*/