Actual source code: cupmevent.hpp
1: #ifndef PETSC_CUPMEVENT_HPP
2: #define PETSC_CUPMEVENT_HPP
4: #include <petsc/private/cupminterface.hpp>
5: #include <petsc/private/cpp/memory.hpp>
6: #include <petsc/private/cpp/object_pool.hpp>
8: #if defined(__cplusplus)
9: #include <stack>
10: namespace Petsc
11: {
13: namespace device
14: {
16: namespace cupm
17: {
19: // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
20: // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
21: // speedup.
22: template <DeviceType T, unsigned long flags>
23: class CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> {
24: public:
25: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
27: PetscErrorCode allocate(cupmEvent_t *) noexcept;
28: PetscErrorCode deallocate(cupmEvent_t *) noexcept;
30: PetscErrorCode finalize_() noexcept;
32: private:
33: std::stack<cupmEvent_t> pool_;
34: };
36: template <DeviceType T, unsigned long flags>
37: inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept
38: {
39: PetscFunctionBegin;
40: while (!pool_.empty()) {
41: PetscCallCUPM(cupmEventDestroy(std::move(pool_.top())));
42: PetscCallCXX(pool_.pop());
43: }
44: PetscFunctionReturn(PETSC_SUCCESS);
45: }
47: template <DeviceType T, unsigned long flags>
48: inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept
49: {
50: PetscFunctionBegin;
52: if (pool_.empty()) {
53: PetscCall(this->register_finalize());
54: PetscCallCUPM(cupmEventCreateWithFlags(event, flags));
55: } else {
56: PetscCallCXX(*event = std::move(pool_.top()));
57: PetscCallCXX(pool_.pop());
58: }
59: PetscFunctionReturn(PETSC_SUCCESS);
60: }
62: template <DeviceType T, unsigned long flags>
63: inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept
64: {
65: PetscFunctionBegin;
67: if (auto event = std::exchange(*in_event, cupmEvent_t{})) {
68: if (this->registered()) {
69: PetscCallCXX(pool_.push(std::move(event)));
70: } else {
71: PetscCallCUPM(cupmEventDestroy(event));
72: }
73: }
74: PetscFunctionReturn(PETSC_SUCCESS);
75: }
77: template <DeviceType T, unsigned long flags>
78: CUPMEventPool<T, flags> &cupm_event_pool() noexcept
79: {
80: static CUPMEventPool<T, flags> pool;
81: return pool;
82: }
84: // pool of events with timing disabled
85: template <DeviceType T>
86: inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
87: {
88: return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
89: }
91: // pool of events with timing enabled
92: template <DeviceType T>
93: inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
94: {
95: return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
96: }
98: // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
99: // event-stream pairing for the async allocator. It is also used as the data member of
100: // PetscEvent.
101: template <DeviceType T>
102: class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated<CUPMEvent<T>> {
103: using pool_type = memory::PoolAllocated<CUPMEvent<T>>;
105: public:
106: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
108: constexpr CUPMEvent() noexcept = default;
109: ~CUPMEvent() noexcept;
111: CUPMEvent(CUPMEvent &&) noexcept;
112: CUPMEvent &operator=(CUPMEvent &&) noexcept;
114: // event is not copyable
115: CUPMEvent(const CUPMEvent &) = delete;
116: CUPMEvent &operator=(const CUPMEvent &) = delete;
118: PETSC_NODISCARD cupmEvent_t get() noexcept;
119: PetscErrorCode record(cupmStream_t) noexcept;
121: explicit operator bool() const noexcept;
123: private:
124: cupmEvent_t event_{};
125: };
127: template <DeviceType T>
128: inline CUPMEvent<T>::~CUPMEvent() noexcept
129: {
130: PetscFunctionBegin;
131: PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
132: PetscFunctionReturnVoid();
133: }
135: template <DeviceType T>
136: inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
137: {
138: static_assert(std::is_empty<impl::Interface<T>>::value, "");
139: }
141: template <DeviceType T>
142: inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
143: {
144: PetscFunctionBegin;
145: if (this != &other) {
146: pool_type::operator=(std::move(other));
147: PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
148: event_ = util::exchange(other.event_, cupmEvent_t{});
149: }
150: PetscFunctionReturn(*this);
151: }
153: template <DeviceType T>
154: inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
155: {
156: PetscFunctionBegin;
157: if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_));
158: PetscFunctionReturn(event_);
159: }
161: template <DeviceType T>
162: inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
163: {
164: PetscFunctionBegin;
165: PetscCallCUPM(cupmEventRecord(get(), stream));
166: PetscFunctionReturn(PETSC_SUCCESS);
167: }
169: template <DeviceType T>
170: inline CUPMEvent<T>::operator bool() const noexcept
171: {
172: return event_ != cupmEvent_t{};
173: }
175: } // namespace cupm
177: } // namespace device
179: } // namespace Petsc
180: #endif // __cplusplus
182: #endif // PETSC_CUPMEVENT_HPP