Actual source code: segmentedmempool.hpp

  1: #ifndef PETSC_SEGMENTEDMEMPOOL_HPP
  2: #define PETSC_SEGMENTEDMEMPOOL_HPP

  4: #include <petsc/private/deviceimpl.h>

  6: #include <petsc/private/cpp/macros.hpp>
  7: #include <petsc/private/cpp/type_traits.hpp>
  8: #include <petsc/private/cpp/utility.hpp>
  9: #include <petsc/private/cpp/register_finalize.hpp>
 10: #include <petsc/private/cpp/memory.hpp>

 12: #include <limits>
 13: #include <deque>
 14: #include <vector>

 16: namespace Petsc
 17: {

 19: namespace device
 20: {

 22: template <typename T>
 23: class StreamBase {
 24: public:
 25:   using id_type      = int;
 26:   using derived_type = T;

 28:   static const id_type INVALID_ID;

 30:   // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
 31:   template <typename U = T>
 32:   PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());

 34:   PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }

 36:   template <typename E>
 37:   PetscErrorCode record_event(E &&event) const noexcept
 38:   {
 39:     return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
 40:   }

 42:   template <typename E>
 43:   PetscErrorCode wait_for_event(E &&event) const noexcept
 44:   {
 45:     return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
 46:   }

 48: protected:
 49:   constexpr StreamBase() noexcept = default;

 51:   struct default_event_type { };
 52:   using default_stream_type = std::nullptr_t;

 54:   PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }

 56:   PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }

 58:   template <typename U = T>
 59:   static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
 60:   {
 61:     return PETSC_SUCCESS;
 62:   }

 64:   template <typename U = T>
 65:   static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
 66:   {
 67:     return PETSC_SUCCESS;
 68:   }
 69: };

 71: template <typename T>
 72: const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;

 74: struct DefaultStream : StreamBase<DefaultStream> {
 75:   using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
 76:   using id_type     = typename StreamBase<DefaultStream>::id_type;
 77:   using event_type  = typename StreamBase<DefaultStream>::default_event_type;
 78: };

 80: } // namespace device

 82: namespace memory
 83: {

 85: namespace impl
 86: {

 88: // ==========================================================================================
 89: // MemoryChunk
 90: //
 91: // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
 92: // MemoryBlock and its size/capacity
 93: // ==========================================================================================

 95: template <typename EventType>
 96: class MemoryChunk {
 97: public:
 98:   using event_type = EventType;
 99:   using size_type  = std::size_t;

101:   MemoryChunk(size_type, size_type) noexcept;
102:   explicit MemoryChunk(size_type) noexcept;

104:   MemoryChunk(MemoryChunk &&) noexcept;
105:   MemoryChunk &operator=(MemoryChunk &&) noexcept;

107:   MemoryChunk(const MemoryChunk &) noexcept            = delete;
108:   MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;

110:   PETSC_NODISCARD size_type start() const noexcept { return start_; }
111:   PETSC_NODISCARD size_type size() const noexcept { return size_; }
112:   // REVIEW ME:
113:   // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
114:   // theory only the last chunk needs to do this
115:   PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
116:   PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }

118:   template <typename U>
119:   PetscErrorCode release(const device::StreamBase<U> *) noexcept;
120:   template <typename U>
121:   PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
122:   template <typename U>
123:   PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
124:   PetscErrorCode       resize(size_type) noexcept;
125:   PETSC_NODISCARD bool contains(size_type) const noexcept;

127: private:
128:   // clang-format off
129:   event_type      event_{};          // event recorded when the chunk was released
130:   bool            open_      = true; // is this chunk open?
131:   // id of the last stream to use the chunk, populated on release
132:   int             stream_id_ = device::DefaultStream::INVALID_ID;
133:   size_type       size_      = 0;    // size of the chunk
134:   const size_type start_     = 0;    // offset from the start of the owning block
135:   // clang-format on

137:   template <typename U>
138:   PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
139: };

141: // ==========================================================================================
142: // MemoryChunk - Private API
143: // ==========================================================================================

145: // asks and answers the question: can this stream claim this chunk without serializing?
146: template <typename E>
147: template <typename U>
148: inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
149: {
150:   return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
151: }

153: // ==========================================================================================
154: // MemoryChunk - Public API
155: // ==========================================================================================

157: template <typename E>
158: inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
159: {
160: }

162: template <typename E>
163: inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
164: {
165: }

167: template <typename E>
168: inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
169:   event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
170: {
171: }

173: template <typename E>
174: inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
175: {
176:   PetscFunctionBegin;
177:   if (this != &other) {
178:     event_     = std::move(other.event_);
179:     open_      = util::exchange(other.open_, false);
180:     stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
181:     size_      = util::exchange(other.size_, 0);
182:     start_     = std::move(other.start_);
183:   }
184:   PetscFunctionReturn(*this);
185: }

187: /*
188:   MemoryChunk::release - release a chunk on a stream

190:   Input Parameter:
191: . stream - the stream to release the chunk with

193:   Notes:
194:   Inserts a release operation on stream and records the state of stream at the time this
195:   routine was called.

197:   Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
198:   the chunk without serialization.

200:   If another stream attempts to claim the chunk they must wait for the recorded event before
201:   claiming the chunk.
202: */
203: template <typename E>
204: template <typename U>
205: inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
206: {
207:   PetscFunctionBegin;
208:   open_      = true;
209:   stream_id_ = stream->get_id();
210:   PetscCall(stream->record_event(event_));
211:   PetscFunctionReturn(PETSC_SUCCESS);
212: }

214: /*
215:   MemoryChunk::claim - attempt to claim a particular chunk

217:   Input Parameters:
218: + stream    - the stream on which to attempt to claim
219: . req_size  - the requested size (in elements) to attempt to claim
220: - serialize - (optional, false) whether the claimant allows serialization

222:   Output Parameter:
223: . success - true if the chunk was claimed, false otherwise
224: */
225: template <typename E>
226: template <typename U>
227: inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
228: {
229:   PetscFunctionBegin;
230:   if ((*success = can_claim(stream, req_size, serialize))) {
231:     if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
232:     PetscCall(resize(req_size));
233:     open_ = false;
234:   }
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }

238: /*
239:   MemoryChunk::can_claim - test whether a particular chunk can be claimed

241:   Input Parameters:
242: + stream    - the stream on which to attempt to claim
243: . req_size  - the requested size (in elements) to attempt to claim
244: - serialize - whether the claimant allows serialization

246:   Output:
247: . [return] - true if the chunk is claimable given the configuration, false otherwise
248: */
249: template <typename E>
250: template <typename U>
251: inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
252: {
253:   if (open_ && (req_size <= capacity())) {
254:     // fully compatible
255:     if (stream_compat_(stream)) return true;
256:     // stream wasn't compatible, but could claim if we serialized
257:     if (serialize) return true;
258:     // incompatible stream and did not want to serialize
259:   }
260:   return false;
261: }

263: /*
264:   MemoryChunk::resize - grow a chunk to new size

266:   Input Parameter:
267: . newsize - the new size Requested

269:   Notes:
270:   newsize cannot be larger than capacity
271: */
272: template <typename E>
273: inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
274: {
275:   PetscFunctionBegin;
276:   PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
277:   size_ = newsize;
278:   PetscFunctionReturn(PETSC_SUCCESS);
279: }

281: /*
282:   MemoryChunk::contains - query whether a memory chunk contains a particular offset

284:   Input Parameters:
285: . offset - The offset from the MemoryBlock start

287:   Notes:
288:   Returns true if the chunk contains the offset, false otherwise
289: */
290: template <typename E>
291: inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
292: {
293:   return (offset >= start()) && (offset < total_offset());
294: }

296: // ==========================================================================================
297: // MemoryBlock
298: //
299: // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
300: // and restoring a block is thread-safe (so may be used by multiple device streams).
301: // ==========================================================================================

303: template <typename T, typename AllocatorType, typename StreamType>
304: class MemoryBlock {
305: public:
306:   using value_type      = T;
307:   using allocator_type  = AllocatorType;
308:   using stream_type     = StreamType;
309:   using event_type      = typename stream_type::event_type;
310:   using chunk_type      = MemoryChunk<event_type>;
311:   using size_type       = typename chunk_type::size_type;
312:   using chunk_list_type = std::vector<chunk_type>;

314:   template <typename U>
315:   MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;

317:   ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);

319:   MemoryBlock(MemoryBlock &&) noexcept;
320:   MemoryBlock &operator=(MemoryBlock &&) noexcept;

322:   // memory blocks are not copyable
323:   MemoryBlock(const MemoryBlock &)            = delete;
324:   MemoryBlock &operator=(const MemoryBlock &) = delete;

326:   /* --- actual functions --- */
327:   PetscErrorCode       try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
328:   PetscErrorCode       try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
329:   PetscErrorCode       try_find_chunk(const T *, chunk_type **) noexcept;
330:   PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;

332:   PETSC_NODISCARD size_type size() const noexcept { return size_; }
333:   PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
334:   PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }

336: private:
337:   value_type     *mem_{};
338:   allocator_type *allocator_{};
339:   size_type       size_{};
340:   chunk_list_type chunks_{};

342:   PetscErrorCode clear_(const stream_type *) noexcept;
343: };

345: // ==========================================================================================
346: // MemoryBlock - Private API
347: // ==========================================================================================

349: // clear the memory block, called from destructors and move assignment/construction
350: template <typename T, typename A, typename S>
351: PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
352: {
353:   PetscFunctionBegin;
354:   if (PetscLikely(mem_)) {
355:     PetscCall(allocator_->deallocate(mem_, stream));
356:     mem_ = nullptr;
357:   }
358:   size_ = 0;
359:   PetscCallCXX(chunks_.clear());
360:   PetscFunctionReturn(PETSC_SUCCESS);
361: }

363: // ==========================================================================================
364: // MemoryBlock - Public API
365: // ==========================================================================================

367: // default constructor, allocates memory immediately
368: template <typename T, typename A, typename S>
369: template <typename U>
370: MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
371: {
372:   PetscFunctionBegin;
373:   PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
374:   PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
375:   PetscFunctionReturnVoid();
376: }

378: template <typename T, typename A, typename S>
379: MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
380: {
381:   stream_type stream;

383:   PetscFunctionBegin;
384:   PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
385:   PetscFunctionReturnVoid();
386: }

388: template <typename T, typename A, typename S>
389: MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
390: {
391: }

393: template <typename T, typename A, typename S>
394: MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
395: {
396:   PetscFunctionBegin;
397:   if (this != &other) {
398:     stream_type stream;

400:     PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
401:     mem_       = util::exchange(other.mem_, nullptr);
402:     allocator_ = other.allocator_;
403:     size_      = util::exchange(other.size_, 0);
404:     chunks_    = std::move(other.chunks_);
405:   }
406:   PetscFunctionReturn(*this);
407: }

409: /*
410:   MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
411: */
412: template <typename T, typename A, typename S>
413: inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
414: {
415:   // each pool is linear in memory, so it suffices to check the bounds
416:   return (ptr >= mem_) && (ptr < std::next(mem_, size()));
417: }

419: /*
420:   MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock

422:   Input Parameters:
423: + req_size - the requested size of the allocation (in elements)
424: . ptr      - ptr to fill
425: - stream   - stream to fill the pointer on

427:   Output Parameter:
428: . success  - true if chunk was gotten, false otherwise

430:   Notes:
431:   If the current memory could not satisfy the memory request, ptr is unchanged
432: */
433: template <typename T, typename A, typename S>
434: inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
435: {
436:   PetscFunctionBegin;
437:   *success = false;
438:   if (req_size <= size()) {
439:     const auto try_create_chunk = [&]() {
440:       const auto was_empty     = chunks_.empty();
441:       const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();

443:       PetscFunctionBegin;
444:       if (block_alloced + req_size <= size()) {
445:         PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
446:         PetscCall(chunks_.back().claim(stream, req_size, success));
447:         *ptr = mem_ + block_alloced;
448:         if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
449:       }
450:       PetscFunctionReturn(PETSC_SUCCESS);
451:     };
452:     const auto try_find_open_chunk = [&](bool serialize = false) {
453:       PetscFunctionBegin;
454:       for (auto &chunk : chunks_) {
455:         PetscCall(chunk.claim(stream, req_size, success, serialize));
456:         if (*success) {
457:           *ptr = mem_ + chunk.start();
458:           break;
459:         }
460:       }
461:       PetscFunctionReturn(PETSC_SUCCESS);
462:     };
463:     const auto try_steal_other_stream_chunk = [&]() {
464:       PetscFunctionBegin;
465:       PetscCall(try_find_open_chunk(true));
466:       PetscFunctionReturn(PETSC_SUCCESS);
467:     };

469:     // search previously distributed chunks, but only claim one if it is on the same stream
470:     // as us
471:     PetscCall(try_find_open_chunk());

473:     // if we are here we couldn't reuse one of our own chunks so check first if the pool
474:     // has room for a new one
475:     if (!*success) PetscCall(try_create_chunk());

477:     // try pruning dead chunks off the back, note we do this regardless of whether we are
478:     // successful
479:     while (chunks_.back().can_claim(stream, 0, false)) {
480:       PetscCallCXX(chunks_.pop_back());
481:       if (chunks_.empty()) {
482:         // if chunks are empty it implies we have managed to claim (and subsequently destroy)
483:         // our own chunk twice! something has gone wrong
484:         PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
485:         break;
486:       }
487:     }

489:     // if previously unsuccessful see if enough space has opened up due to pruning. note that
490:     // if the chunk list was emptied from the pruning this call must succeed in allocating a
491:     // chunk, otherwise something is wrong
492:     if (!*success) PetscCall(try_create_chunk());

494:     // last resort, iterate over all chunks and see if we can steal one by waiting on the
495:     // current owner to finish using it
496:     if (!*success) PetscCall(try_steal_other_stream_chunk());
497:   }
498:   PetscFunctionReturn(PETSC_SUCCESS);
499: }

501: /*
502:   MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock

504:   Input Parameters:
505: + ptr     - ptr to restore
506: - stream  - stream to restore the pointer on

508:   Output Parameter:
509: . success - true if chunk was restored, false otherwise

511:   Notes:
512:   ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
513:   by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
514:   without synchronization, but other streams may not do so until either serializing or the
515:   stream is idle again.
516: */
517: template <typename T, typename A, typename S>
518: inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
519: {
520:   chunk_type *chunk = nullptr;

522:   PetscFunctionBegin;
523:   PetscCall(try_find_chunk(*ptr, &chunk));
524:   if (chunk) {
525:     PetscCall(chunk->release(stream));
526:     *ptr     = nullptr;
527:     *success = true;
528:   } else {
529:     *success = false;
530:   }
531:   PetscFunctionReturn(PETSC_SUCCESS);
532: }

534: /*
535:   MemoryBlock::try_find_chunk - try to find the chunk which owns ptr

537:   Input Parameter:
538: . ptr - the pointer to look for

540:   Output Parameter:
541: . ret_chunk - pointer to the owning chunk or nullptr if not found
542: */
543: template <typename T, typename A, typename S>
544: inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
545: {
546:   PetscFunctionBegin;
547:   *ret_chunk = nullptr;
548:   if (owns_pointer(ptr)) {
549:     const auto offset = static_cast<size_type>(ptr - mem_);

551:     for (auto &chunk : chunks_) {
552:       if (chunk.contains(offset)) {
553:         *ret_chunk = &chunk;
554:         break;
555:       }
556:     }

558:     PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
559:   }
560:   PetscFunctionReturn(PETSC_SUCCESS);
561: }

563: namespace detail
564: {

566: template <typename T>
567: struct real_type {
568:   using type = T;
569: };

571: template <>
572: struct real_type<PetscScalar> {
573:   using type = PetscReal;
574: };

576: } // namespace detail

578: template <typename T>
579: struct SegmentedMemoryPoolAllocatorBase {
580:   using value_type      = T;
581:   using size_type       = std::size_t;
582:   using real_value_type = typename detail::real_type<T>::type;

584:   template <typename U>
585:   static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
586:   template <typename U>
587:   static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
588:   template <typename U>
589:   static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
590:   template <typename U>
591:   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
592:   template <typename U>
593:   static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
594: };

596: template <typename T>
597: template <typename U>
598: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
599: {
600:   PetscFunctionBegin;
601:   PetscCall(PetscMalloc1(n, ptr));
602:   PetscFunctionReturn(PETSC_SUCCESS);
603: }

605: template <typename T>
606: template <typename U>
607: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
608: {
609:   PetscFunctionBegin;
610:   PetscCall(PetscFree(ptr));
611:   PetscFunctionReturn(PETSC_SUCCESS);
612: }

614: template <typename T>
615: template <typename U>
616: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
617: {
618:   PetscFunctionBegin;
619:   PetscCall(PetscArrayzero(ptr, n));
620:   PetscFunctionReturn(PETSC_SUCCESS);
621: }

623: template <typename T>
624: template <typename U>
625: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
626: {
627:   PetscFunctionBegin;
628:   PetscCall(PetscArraycpy(dest, src, n));
629:   PetscFunctionReturn(PETSC_SUCCESS);
630: }

632: template <typename T>
633: template <typename U>
634: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
635: {
636:   using limit_type            = std::numeric_limits<real_value_type>;
637:   constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();

639:   PetscFunctionBegin;
640:   for (size_type i = 0; i < n; ++i) ptr[i] = canary;
641:   PetscFunctionReturn(PETSC_SUCCESS);
642: }

644: } // namespace impl

646: // ==========================================================================================
647: // SegmentedMemoryPool
648: //
649: // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
650: // allocated buffer. This buffer is further split into memory "chunks" which control
651: // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
652: //
653: // 1. Open:
654: //    The chunk is free to be claimed by the next suitable allocation request. If the
655: //    allocation request is made on the same stream as the chunk was deallocated on, no
656: //    serialization needs to occur. If not, the allocating stream must wait for the
657: //    event. Claiming the chunk "closes" the chunk.
658: //
659: // 2. Closed:
660: //    The chunk has been claimed by an allocation request. It cannot be opened again until it
661: //    is deallocated; doing so "opens" the chunk.
662: //
663: // Note that there does not need to be a chunk for every region, chunks are created to satisfy
664: // an allocation request.
665: //
666: // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
667: // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
668: // request. This region exists _only_ at the end, as there are no gaps between chunks.
669: //
670: //
671: // |-----------------------------------------------------------------------------------------
672: // | SegmentedMemoryPool
673: // |
674: // | ||-------------||
675: // | ||             ||    -------------------------------------------------------------------
676: // | ||             ||    | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
677: // | ||             ||    | |             |      |                   |            |
678: // | ||             ||    | x-----x-------x-----xx---------x---------x------x-----x
679: // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
680: // | ||             ||    | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
681: // | ||             ||    | ---------------------------------------------------------
682: // | ||             ||    -------------------------------------------------------------------
683: // | ||-------------||
684: // | ||             ||
685: // | ||     ...     ||
686: // | ||             ||
687: // ==========================================================================================

689: template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
690: class SegmentedMemoryPool;

692: // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
693: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
694: class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
695: public:
696:   using value_type     = MemType;
697:   using stream_type    = StreamType;
698:   using allocator_type = AllocType;
699:   using block_type     = impl::MemoryBlock<value_type, allocator_type, stream_type>;
700:   using pool_type      = std::deque<block_type>;
701:   using size_type      = typename block_type::size_type;

703:   explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);

705:   PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
706:   PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
707:   PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;

709: private:
710:   pool_type      pool_;
711:   allocator_type allocator_;
712:   size_type      chunk_size_;

714:   PetscErrorCode make_block_(size_type, const stream_type *) noexcept;

716:   friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
717:   PetscErrorCode register_finalize_(const stream_type *) noexcept;
718:   PetscErrorCode finalize_() noexcept;

720:   PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
721: };

723: // ==========================================================================================
724: // SegmentedMemoryPool - Private API
725: // ==========================================================================================

727: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
728: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
729: {
730:   const auto block_size = std::max(size, chunk_size_);

732:   PetscFunctionBegin;
733:   PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
734:   PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
735:   PetscFunctionReturn(PETSC_SUCCESS);
736: }

738: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
739: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
740: {
741:   PetscFunctionBegin;
742:   PetscCall(make_block_(chunk_size_, stream));
743:   PetscFunctionReturn(PETSC_SUCCESS);
744: }

746: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
747: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
748: {
749:   PetscFunctionBegin;
750:   PetscCallCXX(pool_.clear());
751:   chunk_size_ = DefaultChunkSize;
752:   PetscFunctionReturn(PETSC_SUCCESS);
753: }

755: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
756: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
757: {
758:   auto found = false;

760:   PetscFunctionBegin;
761:   PetscCall(this->register_finalize(stream));
762:   for (auto &block : pool_) {
763:     PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
764:     if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS);
765:   }

767:   PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
768:   // if we are here we couldn't find an open block in the pool, so make a new block
769:   PetscCall(make_block_(size, stream));
770:   // and assign it
771:   PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
772:   PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
773:   PetscFunctionReturn(PETSC_SUCCESS);
774: }

776: // ==========================================================================================
777: // SegmentedMemoryPool - Public API
778: // ==========================================================================================

780: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
781: inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
782: {
783: }

785: /*
786:   SegmentedMemoryPool::allocate - get an allocation from the memory pool

788:   Input Parameters:
789: + req_size - size (in elements) to get
790: . ptr      - the pointer to hold the allocation
791: - stream   - the stream on which to get the allocation

793:   Output Parameter:
794: . ptr - the pointer holding the allocation

796:   Notes:
797:   req_size cannot be negative. If req_size if zero, ptr is set to nullptr
798: */
799: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
800: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
801: {
802:   value_type *ret_ptr = nullptr;

804:   PetscFunctionBegin;
805:   PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
808:   if (req_size) {
809:     const auto size         = static_cast<size_type>(req_size);
810:     auto       aligned_size = alignment == alignof(char) ? size : size + alignment;
811:     void      *vptr         = nullptr;

813:     PetscCall(allocate_(aligned_size, &ret_ptr, stream));
814:     vptr = ret_ptr;
815:     std::align(alignment, size, vptr, aligned_size);
816:     ret_ptr = reinterpret_cast<value_type *>(vptr);
817:     // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
818:     // accesses.
819:     if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream));
820:   }
821:   *ptr = ret_ptr;
822:   PetscFunctionReturn(PETSC_SUCCESS);
823: }

825: /*
826:   SegmentedMemoryPool::deallocate - release a pointer back to the memory pool

828:   Input Parameters:
829: + ptr    - the pointer to release
830: - stream - the stream to release it on

832:   Notes:
833:   If ptr is not owned by the pool it is unchanged.
834: */
835: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
836: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
837: {
838:   PetscFunctionBegin;
841:   // nobody owns a nullptr, and if they do then they have bigger problems
842:   if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS);
843:   for (auto &block : pool_) {
844:     auto found = false;

846:     PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
847:     if (PetscLikely(found)) break;
848:   }
849:   PetscFunctionReturn(PETSC_SUCCESS);
850: }

852: /*
853:   SegmentedMemoryPool::reallocate - Resize an allocated buffer

855:   Input Parameters:
856: + new_req_size - the new buffer size
857: . ptr          - pointer to the buffer
858: - stream       - stream to resize with

860:   Output Parameter:
861: . ptr - pointer to the new region

863:   Notes:
864:   ptr must have been allocated by the pool.

866:   It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
867: */
868: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
869: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
870: {
871:   using chunk_type = typename block_type::chunk_type;

873:   const auto  new_size = static_cast<size_type>(new_req_size);
874:   const auto  old_ptr  = *ptr;
875:   chunk_type *chunk    = nullptr;

877:   PetscFunctionBegin;
878:   PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);

882:   // if reallocating to zero, just free
883:   if (PetscUnlikely(new_size == 0)) {
884:     PetscCall(deallocate(ptr, stream));
885:     PetscFunctionReturn(PETSC_SUCCESS);
886:   }

888:   // search the blocks for the owning chunk
889:   for (auto &block : pool_) {
890:     PetscCall(block.try_find_chunk(old_ptr, &chunk));
891:     if (chunk) break; // found
892:   }
893:   PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);

895:   if (chunk->capacity() < new_size) {
896:     // chunk does not have enough room, need to grab a fresh chunk and copy to it
897:     *ptr = nullptr;
898:     PetscCall(chunk->release(stream));
899:     PetscCall(allocate(new_size, ptr, stream));
900:     PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
901:   } else {
902:     // chunk had enough room we can simply grow (or shrink) to fit the new size
903:     PetscCall(chunk->resize(new_size));
904:   }
905:   PetscFunctionReturn(PETSC_SUCCESS);
906: }

908: } // namespace memory

910: } // namespace Petsc

912: #endif // PETSC_SEGMENTEDMEMPOOL_HPP