Actual source code: cupmevent.hpp

  1: #ifndef PETSC_CUPMEVENT_HPP
  2: #define PETSC_CUPMEVENT_HPP

  4: #include <petsc/private/cupminterface.hpp>
  5: #include <petsc/private/cpp/memory.hpp>
  6: #include <petsc/private/cpp/object_pool.hpp>

  8: #if defined(__cplusplus)
  9:   #include <stack>
 10: namespace Petsc
 11: {

 13: namespace device
 14: {

 16: namespace cupm
 17: {

 19: // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
 20: // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
 21: // speedup.
 22: template <DeviceType T, unsigned long flags>
 23: class CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> {
 24: public:
 25:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);

 27:   PetscErrorCode allocate(cupmEvent_t *) noexcept;
 28:   PetscErrorCode deallocate(cupmEvent_t *) noexcept;

 30:   PetscErrorCode finalize_() noexcept;

 32: private:
 33:   std::stack<cupmEvent_t> pool_;
 34: };

 36: template <DeviceType T, unsigned long flags>
 37: inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept
 38: {
 39:   PetscFunctionBegin;
 40:   while (!pool_.empty()) {
 41:     PetscCallCUPM(cupmEventDestroy(std::move(pool_.top())));
 42:     PetscCallCXX(pool_.pop());
 43:   }
 44:   PetscFunctionReturn(PETSC_SUCCESS);
 45: }

 47: template <DeviceType T, unsigned long flags>
 48: inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept
 49: {
 50:   PetscFunctionBegin;
 52:   if (pool_.empty()) {
 53:     PetscCall(this->register_finalize());
 54:     PetscCallCUPM(cupmEventCreateWithFlags(event, flags));
 55:   } else {
 56:     PetscCallCXX(*event = std::move(pool_.top()));
 57:     PetscCallCXX(pool_.pop());
 58:   }
 59:   PetscFunctionReturn(PETSC_SUCCESS);
 60: }

 62: template <DeviceType T, unsigned long flags>
 63: inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept
 64: {
 65:   PetscFunctionBegin;
 67:   if (auto event = std::exchange(*in_event, cupmEvent_t{})) {
 68:     if (this->registered()) {
 69:       PetscCallCXX(pool_.push(std::move(event)));
 70:     } else {
 71:       PetscCallCUPM(cupmEventDestroy(event));
 72:     }
 73:   }
 74:   PetscFunctionReturn(PETSC_SUCCESS);
 75: }

 77: template <DeviceType T, unsigned long flags>
 78: CUPMEventPool<T, flags> &cupm_event_pool() noexcept
 79: {
 80:   static CUPMEventPool<T, flags> pool;
 81:   return pool;
 82: }

 84: // pool of events with timing disabled
 85: template <DeviceType T>
 86: inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
 87: {
 88:   return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
 89: }

 91: // pool of events with timing enabled
 92: template <DeviceType T>
 93: inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
 94: {
 95:   return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
 96: }

 98: // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
 99: // event-stream pairing for the async allocator. It is also used as the data member of
100: // PetscEvent.
101: template <DeviceType T>
102: class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated<CUPMEvent<T>> {
103:   using pool_type = memory::PoolAllocated<CUPMEvent<T>>;

105: public:
106:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);

108:   constexpr CUPMEvent() noexcept = default;
109:   ~CUPMEvent() noexcept;

111:   CUPMEvent(CUPMEvent &&) noexcept;
112:   CUPMEvent &operator=(CUPMEvent &&) noexcept;

114:   // event is not copyable
115:   CUPMEvent(const CUPMEvent &)            = delete;
116:   CUPMEvent &operator=(const CUPMEvent &) = delete;

118:   PETSC_NODISCARD cupmEvent_t get() noexcept;
119:   PetscErrorCode              record(cupmStream_t) noexcept;

121:   explicit operator bool() const noexcept;

123: private:
124:   cupmEvent_t event_{};
125: };

127: template <DeviceType T>
128: inline CUPMEvent<T>::~CUPMEvent() noexcept
129: {
130:   PetscFunctionBegin;
131:   PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
132:   PetscFunctionReturnVoid();
133: }

135: template <DeviceType T>
136: inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
137: {
138:   static_assert(std::is_empty<impl::Interface<T>>::value, "");
139: }

141: template <DeviceType T>
142: inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
143: {
144:   PetscFunctionBegin;
145:   if (this != &other) {
146:     pool_type::operator=(std::move(other));
147:     PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
148:     event_ = util::exchange(other.event_, cupmEvent_t{});
149:   }
150:   PetscFunctionReturn(*this);
151: }

153: template <DeviceType T>
154: inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
155: {
156:   PetscFunctionBegin;
157:   if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_));
158:   PetscFunctionReturn(event_);
159: }

161: template <DeviceType T>
162: inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
163: {
164:   PetscFunctionBegin;
165:   PetscCallCUPM(cupmEventRecord(get(), stream));
166:   PetscFunctionReturn(PETSC_SUCCESS);
167: }

169: template <DeviceType T>
170: inline CUPMEvent<T>::operator bool() const noexcept
171: {
172:   return event_ != cupmEvent_t{};
173: }

175: } // namespace cupm

177: } // namespace device

179: } // namespace Petsc
180: #endif // __cplusplus

182: #endif // PETSC_CUPMEVENT_HPP