Actual source code: matdensecupmimpl.h

  1: #ifndef PETSCMATDENSECUPMIMPL_H
  2: #define PETSCMATDENSECUPMIMPL_H

  4: #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
  5: #include <petsc/private/matimpl.h>

  7: #ifdef __cplusplus
  8: #include <petsc/private/deviceimpl.h>
  9: #include <petsc/private/cupmsolverinterface.hpp>
 10: #include <petsc/private/cupmobject.hpp>

 12:   #include "../src/sys/objects/device/impls/cupm/cupmthrustutility.hpp"
 13:   #include "../src/sys/objects/device/impls/cupm/kernels.hpp"

 15:   #include <thrust/device_vector.h>
 16:   #include <thrust/device_ptr.h>
 17:   #include <thrust/iterator/counting_iterator.h>
 18:   #include <thrust/iterator/transform_iterator.h>
 19:   #include <thrust/iterator/permutation_iterator.h>
 20:   #include <thrust/transform.h>

 22: namespace Petsc
 23: {

 25: namespace vec
 26: {

 28: namespace cupm
 29: {

 31: namespace impl
 32: {

 34: template <device::cupm::DeviceType>
 35: class VecSeq_CUPM;
 36: template <device::cupm::DeviceType>
 37: class VecMPI_CUPM;

 39: } // namespace impl

 41: } // namespace cupm

 43: } // namespace vec

 45: namespace mat
 46: {

 48: namespace cupm
 49: {

 51: namespace impl
 52: {

 54: // ==========================================================================================
 55: // MatDense_CUPM_Base
 56: //
 57: // A base class to separate out the CRTP code from the common CUPM stuff (like the composed
 58: // function names).
 59: // ==========================================================================================

 61: template <device::cupm::DeviceType T>
 62: class MatDense_CUPM_Base : protected device::cupm::impl::CUPMObject<T> {
 63: public:
 64:   PETSC_CUPMOBJECT_HEADER(T);

 66:   #define MatDenseCUPMComposedOpDecl(OP_NAME) \
 67:     PETSC_NODISCARD static constexpr const char *PetscConcat(MatDenseCUPM, OP_NAME)() noexcept \
 68:     { \
 69:       return T == device::cupm::DeviceType::CUDA ? PetscStringize(PetscConcat(MatDenseCUDA, OP_NAME)) : PetscStringize(PetscConcat(MatDenseHIP, OP_NAME)); \
 70:     }

 72:   // clang-format off
 73:   MatDenseCUPMComposedOpDecl(GetArray_C)
 74:   MatDenseCUPMComposedOpDecl(GetArrayRead_C)
 75:   MatDenseCUPMComposedOpDecl(GetArrayWrite_C)
 76:   MatDenseCUPMComposedOpDecl(RestoreArray_C)
 77:   MatDenseCUPMComposedOpDecl(RestoreArrayRead_C)
 78:   MatDenseCUPMComposedOpDecl(RestoreArrayWrite_C)
 79:   MatDenseCUPMComposedOpDecl(PlaceArray_C)
 80:   MatDenseCUPMComposedOpDecl(ReplaceArray_C)
 81:   MatDenseCUPMComposedOpDecl(ResetArray_C)
 82:     // clang-format on

 84:   #undef MatDenseCUPMComposedOpDecl

 86:     PETSC_NODISCARD static constexpr MatType MATSEQDENSECUPM() noexcept;
 87:   PETSC_NODISCARD static constexpr MatType       MATMPIDENSECUPM() noexcept;
 88:   PETSC_NODISCARD static constexpr MatType       MATDENSECUPM() noexcept;
 89:   PETSC_NODISCARD static constexpr MatSolverType MATSOLVERCUPM() noexcept;
 90: };

 92: // ==========================================================================================
 93: // MatDense_CUPM_Base -- Public API
 94: // ==========================================================================================

 96: template <device::cupm::DeviceType T>
 97: inline constexpr MatType MatDense_CUPM_Base<T>::MATSEQDENSECUPM() noexcept
 98: {
 99:   return T == device::cupm::DeviceType::CUDA ? MATSEQDENSECUDA : MATSEQDENSEHIP;
100: }

102: template <device::cupm::DeviceType T>
103: inline constexpr MatType MatDense_CUPM_Base<T>::MATMPIDENSECUPM() noexcept
104: {
105:   return T == device::cupm::DeviceType::CUDA ? MATMPIDENSECUDA : MATMPIDENSEHIP;
106: }

108: template <device::cupm::DeviceType T>
109: inline constexpr MatType MatDense_CUPM_Base<T>::MATDENSECUPM() noexcept
110: {
111:   return T == device::cupm::DeviceType::CUDA ? MATDENSECUDA : MATDENSEHIP;
112: }

114: template <device::cupm::DeviceType T>
115: inline constexpr MatSolverType MatDense_CUPM_Base<T>::MATSOLVERCUPM() noexcept
116: {
117:   return T == device::cupm::DeviceType::CUDA ? MATSOLVERCUDA : MATSOLVERHIP;
118: }

120:   #define MATDENSECUPM_BASE_HEADER(T) \
121:     PETSC_CUPMOBJECT_HEADER(T); \
122:     using VecSeq_CUPM = ::Petsc::vec::cupm::impl::VecSeq_CUPM<T>; \
123:     using VecMPI_CUPM = ::Petsc::vec::cupm::impl::VecMPI_CUPM<T>; \
124:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATSEQDENSECUPM; \
125:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATMPIDENSECUPM; \
126:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATDENSECUPM; \
127:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MATSOLVERCUPM; \
128:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArray_C; \
129:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayRead_C; \
130:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayWrite_C; \
131:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArray_C; \
132:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayRead_C; \
133:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayWrite_C; \
134:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMPlaceArray_C; \
135:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMReplaceArray_C; \
136:     using ::Petsc::mat::cupm::impl::MatDense_CUPM_Base<T>::MatDenseCUPMResetArray_C

138: // forward declare
139: template <device::cupm::DeviceType>
140: class MatDense_Seq_CUPM;
141: template <device::cupm::DeviceType>
142: class MatDense_MPI_CUPM;

144: // ==========================================================================================
145: // MatDense_CUPM
146: //
147: // The true "base" class for MatDenseCUPM. The reason MatDense_CUPM and MatDense_CUPM_Base
148: // exist is to separate out the CRTP code from the non-crtp code so that the generic functions
149: // can be called via templates below.
150: // ==========================================================================================

152: template <device::cupm::DeviceType T, typename Derived>
153: class MatDense_CUPM : protected MatDense_CUPM_Base<T> {
154: protected:
155:   MATDENSECUPM_BASE_HEADER(T);

157:   template <PetscMemType, PetscMemoryAccessMode>
158:   class MatrixArray;

160:   // Cast the Mat to its host struct, i.e. return the result of (Mat_SeqDense *)m->data
161:   template <typename U = Derived>
162:   PETSC_NODISCARD static constexpr auto    MatIMPLCast(Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(U::MatIMPLCast_(m))
163:   PETSC_NODISCARD static constexpr MatType MATIMPLCUPM() noexcept;

165:   static PetscErrorCode CreateIMPLDenseCUPM(MPI_Comm, PetscInt, PetscInt, PetscInt, PetscInt, PetscScalar *, Mat *, PetscDeviceContext, bool) noexcept;
166:   static PetscErrorCode SetPreallocation(Mat, PetscDeviceContext, PetscScalar * = nullptr) noexcept;

168:   template <typename F>
169:   static PetscErrorCode DiagonalUnaryTransform(Mat, PetscInt, PetscInt, PetscInt, PetscDeviceContext, F &&) noexcept;

171:   PETSC_NODISCARD static auto DeviceArrayRead(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_READ>{dctx, m})
172:   PETSC_NODISCARD static auto DeviceArrayWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_WRITE>{dctx, m})
173:   PETSC_NODISCARD static auto DeviceArrayReadWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_DEVICE, PETSC_MEMORY_ACCESS_READ_WRITE>{dctx, m})
174:   PETSC_NODISCARD static auto HostArrayRead(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_READ>{dctx, m})
175:   PETSC_NODISCARD static auto HostArrayWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_WRITE>{dctx, m})
176:   PETSC_NODISCARD static auto HostArrayReadWrite(PetscDeviceContext dctx, Mat m) noexcept PETSC_DECLTYPE_AUTO_RETURNS(MatrixArray<PETSC_MEMTYPE_HOST, PETSC_MEMORY_ACCESS_READ_WRITE>{dctx, m})
177: };

179: // ==========================================================================================
180: // MatDense_CUPM::MatrixArray
181: // ==========================================================================================

183: template <device::cupm::DeviceType T, typename D>
184: template <PetscMemType MT, PetscMemoryAccessMode MA>
185: class MatDense_CUPM<T, D>::MatrixArray : public device::cupm::impl::RestoreableArray<T, MT, MA> {
186:   using base_type = device::cupm::impl::RestoreableArray<T, MT, MA>;

188: public:
189:   MatrixArray(PetscDeviceContext, Mat) noexcept;
190:   ~MatrixArray() noexcept;

192:   // must declare move constructor since we declare a destructor
193:   constexpr MatrixArray(MatrixArray &&) noexcept;

195: private:
196:   Mat m_ = nullptr;
197: };

199: // ==========================================================================================
200: // MatDense_CUPM::MatrixArray -- Public API
201: // ==========================================================================================

203: template <device::cupm::DeviceType T, typename D>
204: template <PetscMemType MT, PetscMemoryAccessMode MA>
205: inline MatDense_CUPM<T, D>::MatrixArray<MT, MA>::MatrixArray(PetscDeviceContext dctx, Mat m) noexcept : base_type{dctx}, m_{m}
206: {
207:   PetscFunctionBegin;
208:   PetscCallAbort(PETSC_COMM_SELF, D::template GetArray<MT, MA>(m, &this->ptr_, dctx));
209:   PetscFunctionReturnVoid();
210: }

212: template <device::cupm::DeviceType T, typename D>
213: template <PetscMemType MT, PetscMemoryAccessMode MA>
214: inline MatDense_CUPM<T, D>::MatrixArray<MT, MA>::~MatrixArray() noexcept
215: {
216:   PetscFunctionBegin;
217:   PetscCallAbort(PETSC_COMM_SELF, D::template RestoreArray<MT, MA>(m_, &this->ptr_, this->dctx_));
218:   PetscFunctionReturnVoid();
219: }

221: template <device::cupm::DeviceType T, typename D>
222: template <PetscMemType MT, PetscMemoryAccessMode MA>
223: inline constexpr MatDense_CUPM<T, D>::MatrixArray<MT, MA>::MatrixArray(MatrixArray &&other) noexcept : base_type{std::move(other)}, m_{util::exchange(other.m_, nullptr)}
224: {
225: }

227: // ==========================================================================================
228: // MatDense_CUPM -- Protected API
229: // ==========================================================================================

231: template <device::cupm::DeviceType T, typename D>
232: inline constexpr MatType MatDense_CUPM<T, D>::MATIMPLCUPM() noexcept
233: {
234:   return D::MATIMPLCUPM_();
235: }

237: // Common core for MatCreateSeqDenseCUPM() and MatCreateMPIDenseCUPM()
238: template <device::cupm::DeviceType T, typename D>
239: inline PetscErrorCode MatDense_CUPM<T, D>::CreateIMPLDenseCUPM(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A, PetscDeviceContext dctx, bool preallocate) noexcept
240: {
241:   Mat mat;

243:   PetscFunctionBegin;
245:   PetscCall(MatCreate(comm, &mat));
246:   PetscCall(MatSetSizes(mat, m, n, M, N));
247:   PetscCall(MatSetType(mat, D::MATIMPLCUPM()));
248:   if (preallocate) {
249:     PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
250:     PetscCall(D::SetPreallocation(mat, dctx, data));
251:   }
252:   *A = mat;
253:   PetscFunctionReturn(PETSC_SUCCESS);
254: }

256: template <device::cupm::DeviceType T, typename D>
257: inline PetscErrorCode MatDense_CUPM<T, D>::SetPreallocation(Mat A, PetscDeviceContext dctx, PetscScalar *device_array) noexcept
258: {
259:   PetscFunctionBegin;
261:   // might be the local (sequential) matrix of a MatMPIDense_CUPM. Since this would be called
262:   // from the MPI matrix'es impl MATIMPLCUPM() would return MATMPIDENSECUPM().
264:   PetscCheckTypeNames(A, D::MATSEQDENSECUPM(), D::MATMPIDENSECUPM());
265:   PetscCall(PetscLayoutSetUp(A->rmap));
266:   PetscCall(PetscLayoutSetUp(A->cmap));
267:   PetscCall(D::SetPreallocation_(A, dctx, device_array));
268:   A->preallocated = PETSC_TRUE;
269:   A->assembled    = PETSC_TRUE;
270:   PetscFunctionReturn(PETSC_SUCCESS);
271: }

273: namespace detail
274: {

276: // ==========================================================================================
277: // MatrixIteratorBase
278: //
279: // A base class for creating thrust iterators over the local sub-matrix. This will set up the
280: // proper iterator definitions so thrust knows how to handle things properly. Template
281: // parameters are as follows:
282: //
283: // - Iterator:
284: // The type of the primary array iterator. Usually this is
285: // thrust::device_pointer<PetscScalar>::iterator.
286: //
287: // - IndexFunctor:
288: // This should be a functor which contains an operator() that when called with an index `i`,
289: // returns the i'th permuted index into the array. For example, it could return the i'th
290: // diagonal entry.
291: // ==========================================================================================
292: template <typename Iterator, typename IndexFunctor>
293: class MatrixIteratorBase {
294: public:
295:   using array_iterator_type = Iterator;
296:   using index_functor_type  = IndexFunctor;

298:   using difference_type     = typename thrust::iterator_difference<array_iterator_type>::type;
299:   using CountingIterator    = thrust::counting_iterator<difference_type>;
300:   using TransformIterator   = thrust::transform_iterator<index_functor_type, CountingIterator>;
301:   using PermutationIterator = thrust::permutation_iterator<array_iterator_type, TransformIterator>;
302:   using iterator            = PermutationIterator; // type of the begin/end iterator

304:   constexpr MatrixIteratorBase(array_iterator_type first, array_iterator_type last, index_functor_type idx_func) noexcept : first{std::move(first)}, last{std::move(last)}, func{std::move(idx_func)} { }

306:   PETSC_NODISCARD iterator begin() const noexcept
307:   {
308:     return PermutationIterator{
309:       first, TransformIterator{CountingIterator{0}, func}
310:     };
311:   }

313: protected:
314:   array_iterator_type first;
315:   array_iterator_type last;
316:   index_functor_type  func;
317: };

319: // ==========================================================================================
320: // StridedIndexFunctor
321: //
322: // Iterator which permutes a linear index range into strided matrix indices. Usually used to
323: // get the diagonal.
324: // ==========================================================================================
325: template <typename T>
326: struct StridedIndexFunctor {
327:   PETSC_NODISCARD PETSC_HOSTDEVICE_INLINE_DECL constexpr T operator()(const T &i) const noexcept { return stride * i; }

329:   T stride;
330: };

332: template <typename Iterator>
333: class DiagonalIterator : public MatrixIteratorBase<Iterator, StridedIndexFunctor<typename thrust::iterator_difference<Iterator>::type>> {
334: public:
335:   using base_type = MatrixIteratorBase<Iterator, StridedIndexFunctor<typename thrust::iterator_difference<Iterator>::type>>;

337:   using difference_type = typename base_type::difference_type;
338:   using iterator        = typename base_type::iterator;

340:   constexpr DiagonalIterator(Iterator first, Iterator last, difference_type stride) noexcept : base_type{std::move(first), std::move(last), {stride}} { }

342:   PETSC_NODISCARD iterator end() const noexcept { return this->begin() + (this->last - this->first + this->func.stride - 1) / this->func.stride; }
343: };

345: } // namespace detail

347: template <device::cupm::DeviceType T, typename D>
348: template <typename F>
349: inline PetscErrorCode MatDense_CUPM<T, D>::DiagonalUnaryTransform(Mat A, PetscInt rstart, PetscInt rend, PetscInt cols, PetscDeviceContext dctx, F &&functor) noexcept
350: {
351:   const auto rend2 = std::min(rend, cols);

353:   PetscFunctionBegin;
354:   if (rend2 > rstart) {
355:     const auto da = D::DeviceArrayReadWrite(dctx, A);
356:     PetscInt   lda;

358:     PetscCall(MatDenseGetLDA(A, &lda));
359:     {
360:       using DiagonalIterator  = detail::DiagonalIterator<thrust::device_vector<PetscScalar>::iterator>;
361:       const auto        dptr  = thrust::device_pointer_cast(da.data());
362:       const std::size_t begin = rstart * lda;
363:       const std::size_t end   = rend2 - rstart + rend2 * lda;
364:       DiagonalIterator  diagonal{dptr + begin, dptr + end, lda + 1};
365:       cupmStream_t      stream;

367:       PetscCall(D::GetHandlesFrom_(dctx, &stream));
368:       // clang-format off
369:       PetscCallThrust(
370:         THRUST_CALL(
371:           thrust::transform,
372:           stream,
373:           diagonal.begin(), diagonal.end(), diagonal.begin(),
374:           std::forward<F>(functor)
375:         )
376:       );
377:       // clang-format on
378:     }
379:     PetscCall(PetscLogGpuFlops(rend2 - rstart));
380:   }
381:   PetscFunctionReturn(PETSC_SUCCESS);
382: }

384:   #define MatComposeOp_CUPM(use_host, pobj, op_str, op_host, ...) \
385:     do { \
386:       if (use_host) { \
387:         PetscCall(PetscObjectComposeFunction(pobj, op_str, op_host)); \
388:       } else { \
389:         PetscCall(PetscObjectComposeFunction(pobj, op_str, __VA_ARGS__)); \
390:       } \
391:     } while (0)

393:   #define MatSetOp_CUPM(use_host, mat, op_name, op_host, ...) \
394:     do { \
395:       if (use_host) { \
396:         (mat)->ops->op_name = op_host; \
397:       } else { \
398:         (mat)->ops->op_name = __VA_ARGS__; \
399:       } \
400:     } while (0)

402:   #define MATDENSECUPM_HEADER(T, ...) \
403:     MATDENSECUPM_BASE_HEADER(T); \
404:     friend class ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>; \
405:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::MatIMPLCast; \
406:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::MATIMPLCUPM; \
407:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::CreateIMPLDenseCUPM; \
408:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::SetPreallocation; \
409:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayRead; \
410:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayWrite; \
411:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DeviceArrayReadWrite; \
412:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayRead; \
413:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayWrite; \
414:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::HostArrayReadWrite; \
415:     using ::Petsc::mat::cupm::impl::MatDense_CUPM<T, __VA_ARGS__>::DiagonalUnaryTransform

417: } // namespace impl

419: namespace
420: {

422: template <device::cupm::DeviceType T, PetscMemoryAccessMode access>
423: inline PetscErrorCode MatDenseCUPMGetArray_Private(Mat A, PetscScalar **array) noexcept
424: {
425:   PetscFunctionBegin;
428:   switch (access) {
429:   case PETSC_MEMORY_ACCESS_READ:
430:     PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayRead_C(), (Mat, PetscScalar **), (A, array));
431:     break;
432:   case PETSC_MEMORY_ACCESS_WRITE:
433:     PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArrayWrite_C(), (Mat, PetscScalar **), (A, array));
434:     break;
435:   case PETSC_MEMORY_ACCESS_READ_WRITE:
436:     PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMGetArray_C(), (Mat, PetscScalar **), (A, array));
437:     break;
438:   }
439:   if (PetscMemoryAccessWrite(access)) PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
440:   PetscFunctionReturn(PETSC_SUCCESS);
441: }

443: template <device::cupm::DeviceType T, PetscMemoryAccessMode access>
444: inline PetscErrorCode MatDenseCUPMRestoreArray_Private(Mat A, PetscScalar **array) noexcept
445: {
446:   PetscFunctionBegin;
449:   switch (access) {
450:   case PETSC_MEMORY_ACCESS_READ:
451:     PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayRead_C(), (Mat, PetscScalar **), (A, array));
452:     break;
453:   case PETSC_MEMORY_ACCESS_WRITE:
454:     PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArrayWrite_C(), (Mat, PetscScalar **), (A, array));
455:     break;
456:   case PETSC_MEMORY_ACCESS_READ_WRITE:
457:     PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMRestoreArray_C(), (Mat, PetscScalar **), (A, array));
458:     break;
459:   }
460:   if (PetscMemoryAccessWrite(access)) {
461:     PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
462:     A->offloadmask = PETSC_OFFLOAD_GPU;
463:   }
464:   if (array) *array = nullptr;
465:   PetscFunctionReturn(PETSC_SUCCESS);
466: }

468: template <device::cupm::DeviceType T>
469: inline PetscErrorCode MatDenseCUPMGetArray(Mat A, PetscScalar **array) noexcept
470: {
471:   PetscFunctionBegin;
472:   PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_READ_WRITE>(A, array));
473:   PetscFunctionReturn(PETSC_SUCCESS);
474: }

476: template <device::cupm::DeviceType T>
477: inline PetscErrorCode MatDenseCUPMGetArrayRead(Mat A, const PetscScalar **array) noexcept
478: {
479:   PetscFunctionBegin;
480:   PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_READ>(A, const_cast<PetscScalar **>(array)));
481:   PetscFunctionReturn(PETSC_SUCCESS);
482: }

484: template <device::cupm::DeviceType T>
485: inline PetscErrorCode MatDenseCUPMGetArrayWrite(Mat A, PetscScalar **array) noexcept
486: {
487:   PetscFunctionBegin;
488:   PetscCall(MatDenseCUPMGetArray_Private<T, PETSC_MEMORY_ACCESS_WRITE>(A, array));
489:   PetscFunctionReturn(PETSC_SUCCESS);
490: }

492: template <device::cupm::DeviceType T>
493: inline PetscErrorCode MatDenseCUPMRestoreArray(Mat A, PetscScalar **array) noexcept
494: {
495:   PetscFunctionBegin;
496:   PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_READ_WRITE>(A, array));
497:   PetscFunctionReturn(PETSC_SUCCESS);
498: }

500: template <device::cupm::DeviceType T>
501: inline PetscErrorCode MatDenseCUPMRestoreArrayRead(Mat A, const PetscScalar **array) noexcept
502: {
503:   PetscFunctionBegin;
504:   PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_READ>(A, const_cast<PetscScalar **>(array)));
505:   PetscFunctionReturn(PETSC_SUCCESS);
506: }

508: template <device::cupm::DeviceType T>
509: inline PetscErrorCode MatDenseCUPMRestoreArrayWrite(Mat A, PetscScalar **array) noexcept
510: {
511:   PetscFunctionBegin;
512:   PetscCall(MatDenseCUPMRestoreArray_Private<T, PETSC_MEMORY_ACCESS_WRITE>(A, array));
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: template <device::cupm::DeviceType T>
517: inline PetscErrorCode MatDenseCUPMPlaceArray(Mat A, const PetscScalar *array) noexcept
518: {
519:   PetscFunctionBegin;
521:   PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMPlaceArray_C(), (Mat, const PetscScalar *), (A, array));
522:   PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
523:   A->offloadmask = PETSC_OFFLOAD_GPU;
524:   PetscFunctionReturn(PETSC_SUCCESS);
525: }

527: template <device::cupm::DeviceType T>
528: inline PetscErrorCode MatDenseCUPMReplaceArray(Mat A, const PetscScalar *array) noexcept
529: {
530:   PetscFunctionBegin;
532:   PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMReplaceArray_C(), (Mat, const PetscScalar *), (A, array));
533:   PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
534:   A->offloadmask = PETSC_OFFLOAD_GPU;
535:   PetscFunctionReturn(PETSC_SUCCESS);
536: }

538: template <device::cupm::DeviceType T>
539: inline PetscErrorCode MatDenseCUPMResetArray(Mat A) noexcept
540: {
541:   PetscFunctionBegin;
543:   PetscUseMethod(A, impl::MatDense_CUPM_Base<T>::MatDenseCUPMResetArray_C(), (Mat), (A));
544:   PetscCall(PetscObjectStateIncrease(PetscObjectCast(A)));
545:   PetscFunctionReturn(PETSC_SUCCESS);
546: }

548: } // anonymous namespace

550: } // namespace cupm

552: } // namespace mat

554: } // namespace Petsc

556: #endif // __cplusplus

558: #endif // PETSCMATDENSECUPMIMPL_H