Actual source code: petscdevice_hip.h
1: #ifndef PETSCDEVICE_HIP_H
2: #define PETSCDEVICE_HIP_H
4: #include <petscdevice.h>
5: #include <petscpkg_version.h>
7: #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
8: #define PETSC_USING_HCC 1
9: #endif
11: #if PetscDefined(HAVE_HIP)
12: #include <hip/hip_runtime.h>
14: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
15: #include <hipblas/hipblas.h>
16: #include <hipsparse/hipsparse.h>
17: #else
18: #include <hipblas.h>
19: #include <hipsparse.h>
20: #endif
22: #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
23: #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
24: #endif
26: #if defined(__HIP_PLATFORM_NVCC__)
27: #include <cusolverDn.h>
28: #else // __HIP_PLATFORM_HCC__
29: #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
30: #include <hipsolver/hipsolver.h>
31: #else
32: #include <hipsolver.h>
33: #endif
34: #endif // __HIP_PLATFORM_NVCC__
35: #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
37: // REMOVE ME
38: #define WaitForHIP() hipDeviceSynchronize()
40: /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
41: PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
42: PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
43: PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
45: #define PetscCallHIP(...) \
46: do { \
47: const hipError_t _p_hip_err__ = __VA_ARGS__; \
48: if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
49: const char *name = hipGetErrorName(_p_hip_err__); \
50: const char *descr = hipGetErrorString(_p_hip_err__); \
51: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
52: } \
53: } while (0)
54: #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
56: #define PetscHIPCheckLaunch \
57: do { \
58: /* Check synchronous errors, i.e. pre-launch */ \
59: PetscCallHIP(hipGetLastError()); \
60: /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
61: PetscCallHIP(hipDeviceSynchronize()); \
62: } while (0)
64: #define PetscCallHIPBLAS(...) \
65: do { \
66: const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
67: if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
68: const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
69: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
70: } \
71: } while (0)
72: #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
74: #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
75: /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
76: #define PetscCallHIPSPARSE(...) \
77: do { \
78: const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
79: if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
80: const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
81: PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
82: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
83: } \
84: } while (0)
85: #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
87: #define PetscCallHIPSOLVER(...) \
88: do { \
89: const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
90: if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
91: const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
92: if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
93: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
94: "hipSolver error %d (%s). " \
95: "This indicates the GPU may have run out resources", \
96: (PetscErrorCode)_p_hipsolver_stat__, name); \
97: } else { \
98: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
99: } \
100: } \
101: } while (0)
102: #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
104: #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
105: /* hipSolver does not exist yet so we work around it
106: rocSOLVER users rocBLAS for the handle
107: * */
108: #if defined(__HIP_PLATFORM_NVCC__)
109: #include <cusolverDn.h>
110: typedef cusolverDnHandle_t hipsolverHandle_t;
111: typedef cusolverStatus_t hipsolverStatus_t;
113: /* Alias hipsolverDestroy to cusolverDnDestroy */
114: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
115: {
116: return cusolverDnDestroy(hipsolverhandle);
117: }
119: /* Alias hipsolverCreate to cusolverDnCreate */
120: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
121: {
122: return cusolverDnCreate(hipsolverhandle);
123: }
125: /* Alias hipsolverGetStream to cusolverDnGetStream */
126: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
127: {
128: return cusolverDnGetStream(handle, stream);
129: }
131: /* Alias hipsolverSetStream to cusolverDnSetStream */
132: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
133: {
134: return cusolveDnSetStream(handle, stream);
135: }
136: #else /* __HIP_PLATFORM_HCC__ */
137: #include <rocsolver.h>
138: #include <rocblas.h>
139: typedef rocblas_handle hipsolverHandle_t;
140: typedef rocblas_status hipsolverStatus_t;
142: /* Alias hipsolverDestroy to rocblas_destroy_handle */
143: static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
144: {
145: return rocblas_destroy_handle(hipsolverhandle);
146: }
148: /* Alias hipsolverCreate to rocblas_destroy_handle */
149: static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
150: {
151: return rocblas_create_handle(hipsolverhandle);
152: }
154: // Alias hipsolverGetStream to rocblas_get_stream
155: static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
156: {
157: return rocblas_get_stream(handle, stream);
158: }
160: // Alias hipsolverSetStream to rocblas_set_stream
161: static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
162: {
163: return rocblas_set_stream(handle, stream);
164: }
165: #endif // __HIP_PLATFORM_NVCC__
166: #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
167: // REMOVE ME
168: PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc
169: PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
170: PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
172: #endif // PETSC_HAVE_HIP
174: // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
175: // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
176: // would already be defined, but they would be empty since we cannot be using NVCC at the same
177: // time.
178: #if PetscDefined(USING_HCC)
179: #undef PETSC_HOST_DECL
180: #undef PETSC_DEVICE_DECL
181: #undef PETSC_KERNEL_DECL
182: #undef PETSC_SHAREDMEM_DECL
183: #undef PETSC_FORCEINLINE
184: #undef PETSC_CONSTMEM_DECL
186: #define PETSC_HOST_DECL __host__
187: #define PETSC_DEVICE_DECL __device__
188: #define PETSC_KERNEL_DECL __global__
189: #define PETSC_SHAREDMEM_DECL __shared__
190: #define PETSC_FORCEINLINE __forceinline__
191: #define PETSC_CONSTMEM_DECL __constant__
192: #endif
194: #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
195: #define PETSC_HOST_DECL
196: #define PETSC_DEVICE_DECL
197: #define PETSC_KERNEL_DECL
198: #define PETSC_SHAREDMEM_DECL
199: #define PETSC_FORCEINLINE inline
200: #define PETSC_CONSTMEM_DECL
201: #endif
203: #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
204: #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
205: #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
206: #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
207: #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
208: #endif
210: #endif // PETSCDEVICE_HIP_H