Actual source code: cupmallocator.hpp

  1: #ifndef CUPMALLOCATOR_HPP
  2: #define CUPMALLOCATOR_HPP

  4: #if defined(__cplusplus)
  5: #include <petsc/private/cpp/object_pool.hpp>

  7:   #include "../segmentedmempool.hpp"
  8:   #include "cupmthrustutility.hpp"

 10:   #include <thrust/device_ptr.h>
 11:   #include <thrust/fill.h>

 13:   #include <limits> // std::numeric_limits

 15: namespace Petsc
 16: {

 18: namespace device
 19: {

 21: namespace cupm
 22: {

 24: // ==========================================================================================
 25: // CUPM Host Allocator
 26: // ==========================================================================================

 28: template <DeviceType T, typename PetscType = char>
 29: class HostAllocator;

 31: // Allocator class to allocate pinned host memory for use with device
 32: template <DeviceType T, typename PetscType>
 33: class HostAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
 34: public:
 35:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
 36:   using base_type       = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
 37:   using real_value_type = typename base_type::real_value_type;
 38:   using size_type       = typename base_type::size_type;
 39:   using value_type      = typename base_type::value_type;

 41:   template <typename U>
 42:   static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
 43:   template <typename U>
 44:   static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
 45:   template <typename U>
 46:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
 47: };

 49: template <DeviceType T, typename P>
 50: template <typename U>
 51: inline PetscErrorCode HostAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *) noexcept
 52: {
 53:   PetscFunctionBegin;
 54:   PetscCall(PetscCUPMMallocHost(ptr, n));
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: template <DeviceType T, typename P>
 59: template <typename U>
 60: inline PetscErrorCode HostAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *) noexcept
 61: {
 62:   PetscFunctionBegin;
 63:   PetscCallCUPM(cupmFreeHost(ptr));
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }

 67: template <DeviceType T, typename P>
 68: template <typename U>
 69: inline PetscErrorCode HostAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
 70: {
 71:   PetscFunctionBegin;
 72:   PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyHostToHost, stream->get_stream(), true));
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }

 76: // ==========================================================================================
 77: // CUPM Device Allocator
 78: // ==========================================================================================

 80: template <DeviceType T, typename PetscType = char>
 81: class DeviceAllocator;

 83: template <DeviceType T, typename PetscType>
 84: class DeviceAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
 85: public:
 86:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
 87:   using base_type       = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
 88:   using real_value_type = typename base_type::real_value_type;
 89:   using size_type       = typename base_type::size_type;
 90:   using value_type      = typename base_type::value_type;

 92:   template <typename U>
 93:   static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
 94:   template <typename U>
 95:   static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
 96:   template <typename U>
 97:   static PetscErrorCode zero(value_type *, size_type, const StreamBase<U> *) noexcept;
 98:   template <typename U>
 99:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
100:   template <typename U>
101:   static PetscErrorCode set_canary(value_type *, size_type, const StreamBase<U> *) noexcept;
102: };

104: template <DeviceType T, typename P>
105: template <typename U>
106: inline PetscErrorCode DeviceAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *stream) noexcept
107: {
108:   PetscFunctionBegin;
109:   PetscCall(PetscCUPMMallocAsync(ptr, n, stream->get_stream()));
110:   PetscFunctionReturn(PETSC_SUCCESS);
111: }

113: template <DeviceType T, typename P>
114: template <typename U>
115: inline PetscErrorCode DeviceAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *stream) noexcept
116: {
117:   PetscFunctionBegin;
118:   PetscCallCUPM(cupmFreeAsync(ptr, stream->get_stream()));
119:   PetscFunctionReturn(PETSC_SUCCESS);
120: }

122: template <DeviceType T, typename P>
123: template <typename U>
124: inline PetscErrorCode DeviceAllocator<T, P>::zero(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
125: {
126:   PetscFunctionBegin;
127:   PetscCall(PetscCUPMMemsetAsync(ptr, 0, n, stream->get_stream(), true));
128:   PetscFunctionReturn(PETSC_SUCCESS);
129: }

131: template <DeviceType T, typename P>
132: template <typename U>
133: inline PetscErrorCode DeviceAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
134: {
135:   PetscFunctionBegin;
136:   PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyDeviceToDevice, stream->get_stream(), true));
137:   PetscFunctionReturn(PETSC_SUCCESS);
138: }

140: template <DeviceType T, typename P>
141: template <typename U>
142: inline PetscErrorCode DeviceAllocator<T, P>::set_canary(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
143: {
144:   using limit_t           = std::numeric_limits<real_value_type>;
145:   const value_type canary = limit_t::has_signaling_NaN ? limit_t::signaling_NaN() : limit_t::max();
146:   const auto       xptr   = thrust::device_pointer_cast(ptr);

148:   PetscFunctionBegin;
149:   PetscCallThrust(THRUST_CALL(thrust::fill, stream->get_stream(), xptr, xptr + n, canary));
150:   PetscFunctionReturn(PETSC_SUCCESS);
151: }

153: } // namespace cupm

155: } // namespace device

157: } // namespace Petsc

159: #endif // __cplusplus

161: #endif // CUPMALLOCATOR_HPP