Actual source code: vecmpicupm.cu
1: #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/
3: using namespace Petsc::vec::cupm;
4: using Petsc::device::cupm::DeviceType;
6: static constexpr auto VecMPI_CUDA = impl::VecMPI_CUPM<DeviceType::CUDA>{};
8: /*MC
9: VECCUDA - VECCUDA = "cuda" - A `VECSEQCUDA` on a single-process communicator, and `VECMPICUDA`
10: otherwise.
12: Options Database Keys:
13: . -vec_type cuda - sets the vector type to `VECCUDA` during a call to `VecSetFromOptions()`
15: Level: beginner
17: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQCUDA`,
18: `VECMPICUDA`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`
19: M*/
21: /*MC
22: VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA
24: Options Database Keys:
25: . -vec_type mpicuda - sets the vector type to `VECMPICUDA` during a call to `VecSetFromOptions()`
27: Level: beginner
29: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`,
30: `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`
31: M*/
33: PetscErrorCode VecCreate_CUDA(Vec v)
34: {
35: PetscFunctionBegin;
36: PetscCall(VecMPI_CUDA.Create_CUPM(v));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PetscErrorCode VecCreate_MPICUDA(Vec v)
41: {
42: PetscFunctionBegin;
43: PetscCall(VecMPI_CUDA.Create(v));
44: PetscFunctionReturn(PETSC_SUCCESS);
45: }
47: PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
48: {
49: PetscDeviceContext dctx;
51: PetscFunctionBegin;
53: PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
54: PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: /*@
59: VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.
61: Collective, Possibly Synchronous
63: Input Parameters:
64: + comm - the MPI communicator to use
65: . n - local vector length (or `PETSC_DECIDE` to have calculated if `N` is given)
66: - N - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
68: Output Parameter:
69: . v - the vector
71: Notes:
72: Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
73: existing vector.
75: This function may initialize `PetscDevice`, which may incur a device synchronization.
77: Level: intermediate
79: .seealso: `VecCreateMPICUDAWithArray()`, `VecCreateMPICUDAWithArrays()`, `VecCreateSeqCUDA(),
80: `VecCreateSeq()`, `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
81: `VecCreateGhost()`, `VecCreateMPIWithArray()`, `VecCreateGhostWithArray()`, `VecMPISetGhost()`
82: @*/
83: PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
84: {
85: PetscFunctionBegin;
87: PetscCall(VecCreateMPICUPMAsync<DeviceType::CUDA>(comm, n, N, v));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: /*@C
92: VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
93: user provides the complete array space to store the vector values.
95: Collective, Possibly Synchronous
97: Input Parameters:
98: + comm - the MPI communicator to use
99: . bs - block size, same meaning as `VecSetBlockSize()`
100: . n - local vector length, cannot be `PETSC_DECIDE`
101: . N - global vector length (or `PETSC_DECIDE` to have calculated)
102: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
103: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
105: Output Parameter:
106: . v - the vector
108: Notes:
109: See `VecCreateSeqCUDAWithArrays()` for further discussion, this routine shares identical
110: semantics.
112: Level: intermediate
114: .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArrays()`, `VecCreateMPIWithArray()`,
115: `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
116: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
117: @*/
118: PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
119: {
120: PetscFunctionBegin;
121: PetscCall(VecCreateMPICUPMWithArrays<DeviceType::CUDA>(comm, bs, n, N, cpuarray, gpuarray, v));
122: PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: /*@C
126: VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
127: user provides the device array space to store the vector values.
129: Collective
131: Input Parameters:
132: + comm - the MPI communicator to use
133: . bs - block size, same meaning as `VecSetBlockSize()`
134: . n - local vector length, cannot be `PETSC_DECIDE`
135: . N - global vector length (or `PETSC_DECIDE` to have calculated)
136: - gpuarray - the user provided GPU array to store the vector values
138: Output Parameter:
139: . v - the vector
141: Notes:
142: See `VecCreateSeqCUDAWithArray()` for further discussion, this routine shares identical
143: semantics.
145: Level: intermediate
147: .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArray()`, `VecCreateMPIWithArray()`,
148: `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
149: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
150: @*/
151: PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
152: {
153: PetscFunctionBegin;
154: PetscCall(VecCreateMPICUPMWithArray<DeviceType::CUDA>(comm, bs, n, N, gpuarray, v));
155: PetscFunctionReturn(PETSC_SUCCESS);
156: }