Actual source code: vecseqcupm.cu
1: #include "../vecseqcupm.hpp" /*I <petscvec.h> I*/
3: using namespace Petsc::vec::cupm;
4: using ::Petsc::device::cupm::DeviceType;
6: static constexpr auto VecSeq_CUDA = impl::VecSeq_CUPM<DeviceType::CUDA>{};
8: PetscErrorCode VecCreate_SeqCUDA(Vec v)
9: {
10: PetscFunctionBegin;
11: PetscCall(VecSeq_CUDA.Create(v));
12: PetscFunctionReturn(PETSC_SUCCESS);
13: }
15: /*@
16: VecCreateSeqCUDA - Creates a standard, sequential, array-style vector.
18: Collective, Possibly Synchronous
20: Input Parameters:
21: + comm - the communicator, must be `PETSC_COMM_SELF`
22: - n - the vector length
24: Output Parameter:
25: . v - the vector
27: Level: intermediate
29: Notes:
30: Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
31: existing vector.
33: This function may initialize `PetscDevice`, which may incur a device synchronization.
35: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqCUDAWithArray()`,
36: `VecCreateMPI()`, `VecCreateMPICUDA()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
37: @*/
38: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm, PetscInt n, Vec *v)
39: {
40: PetscFunctionBegin;
41: PetscCall(VecCreateSeqCUPMAsync<DeviceType::CUDA>(comm, n, v));
42: PetscFunctionReturn(PETSC_SUCCESS);
43: }
45: /*@C
46: VecCreateSeqCUDAWithArrays - Creates a sequential, array-style vector using CUDA, where the
47: user provides the complete array space to store the vector values.
49: Collective, Possibly Synchronous
51: Input Parameters:
52: + comm - the communicator, must be `PETSC_COMM_SELF`
53: . bs - the block size
54: . n - the local vector length
55: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
56: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
58: Output Parameter:
59: . v - the vector
61: Level: intermediate
63: Notes:
64: If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
65: SET the array for storing the vector values. Otherwise, the array must be allocated on the
66: device.
68: If both cpuarray and gpuarray are provided, the provided arrays must have identical
69: values.
71: The arrays are NOT freed when the vector is destroyed via `VecDestroy()`. The user must free
72: them themselves, but not until the vector is destroyed.
74: This function may initialize `PetscDevice`, which may incur a device synchronization.
76: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeqWithArray()`, `VecCreateSeqCUDA()`,
77: `VecCreateSeqCUDAWithArray()`, `VecCreateMPICUDA()`, `VecCreateMPICUDAWithArray()`,
78: `VecCreateMPICUDAWithArrays()`, `VecCUDAPlaceArray()`
79: C@*/
80: PetscErrorCode VecCreateSeqCUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
81: {
82: PetscFunctionBegin;
83: PetscCall(VecCreateSeqCUPMWithArraysAsync<DeviceType::CUDA>(comm, bs, n, cpuarray, gpuarray, v));
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: /*@C
88: VecCreateSeqCUDAWithArray - Creates a sequential, array-style vector using CUDA, where the
89: user provides the device array space to store the vector values.
91: Collective, Possibly Synchronous
93: Input Parameters:
94: + comm - the communicator, must be `PETSC_COMM_SELF`
95: . bs - the block size
96: . n - the vector length
97: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
99: Output Parameter:
100: . v - the vector
102: Level: intermediate
104: Notes:
105: If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
106: SET the array for storing the vector values. Otherwise, the array must be allocated on the
107: device.
109: The array is NOT freed when the vector is destroyed via `VecDestroy()`. The user must free the
110: array themselves, but not until the vector is destroyed.
112: Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
113: existing vector.
115: This function may initialize `PetscDevice`, which may incur a device synchronization.
117: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqWithArray()`,
118: `VecCreateMPIWithArray()`, `VecCreateSeqCUDA()`, `VecCreateMPICUDAWithArray()`, `VecCUDAPlaceArray()`,
119: `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
120: @*/
121: PetscErrorCode VecCreateSeqCUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar gpuarray[], Vec *v)
122: {
123: PetscFunctionBegin;
124: PetscCall(VecCreateSeqCUDAWithArrays(comm, bs, n, nullptr, gpuarray, v));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: /*@C
129: VecCUDAGetArray - Provides access to the device buffer inside a vector
131: Not Collective; Asynchronous; No Fortran Support
133: Input Parameter:
134: . v - the vector
136: Output Parameter:
137: . a - the device buffer
139: Level: intermediate
141: Notes:
142: This routine has semantics similar to `VecGetArray()`; the returned buffer points to a
143: consistent view of the vector data. This may involve copying data from the host to the device
144: if the data on the device is out of date. It is also assumed that the returned buffer is
145: immediately modified, marking the host data out of date. This is similar to intent(inout) in
146: Fortran.
148: If the user does require strong memory guarantees, they are encouraged to use
149: `VecCUDAGetArrayRead()` and/or `VecCUDAGetArrayWrite()` instead.
151: The user must call `VecCUDARestoreArray()` when they are finished using the array.
153: Developer Note:
154: If the device memory hasn't been allocated previously it will be allocated as part of this
155: routine.
157: .seealso: [](ch_vectors), `VecCUDARestoreArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
158: `VecGetArrayRead()`, `VecGetArrayWrite()`
159: @*/
160: PetscErrorCode VecCUDAGetArray(Vec v, PetscScalar **a)
161: {
162: PetscFunctionBegin;
163: PetscCall(VecCUPMGetArrayAsync<DeviceType::CUDA>(v, a));
164: PetscFunctionReturn(PETSC_SUCCESS);
165: }
167: /*@C
168: VecCUDARestoreArray - Restore a device buffer previously acquired with `VecCUDAGetArray()`.
170: NotCollective; Asynchronous; No Fortran Support
172: Input Parameters:
173: + v - the vector
174: - a - the device buffer
176: Level: intermediate
178: Note:
179: The restored pointer is invalid after this function returns. This function also marks the
180: host data as out of date. Subsequent access to the vector data on the host side via
181: `VecGetArray()` will incur a (synchronous) data transfer.
183: .seealso: [](ch_vectors), `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
184: `VecRestoreArray()`, `VecGetArrayRead()`
185: @*/
186: PetscErrorCode VecCUDARestoreArray(Vec v, PetscScalar **a)
187: {
188: PetscFunctionBegin;
189: PetscCall(VecCUPMRestoreArrayAsync<DeviceType::CUDA>(v, a));
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: /*@C
194: VecCUDAGetArrayRead - Provides read access to the CUDA buffer inside a vector.
196: Not Collective; Asynchronous; No Fortran Support
198: Input Parameter:
199: . v - the vector
201: Output Parameter:
202: . a - the CUDA pointer.
204: Level: intermediate
206: Notes:
207: See `VecCUDAGetArray()` for data movement semantics of this function.
209: This function assumes that the user will not modify the vector data. This is analgogous to
210: intent(in) in Fortran.
212: The device pointer must be restored by calling `VecCUDARestoreArrayRead()`. If the data on the
213: host side was previously up to date it will remain so, i.e. data on both the device and the
214: host is up to date. Accessing data on the host side does not incur a device to host data
215: transfer.
217: .seealso: [](ch_vectors), `VecCUDARestoreArrayRead()`, `VecCUDAGetArray()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
218: `VecGetArrayRead()`
219: @*/
220: PetscErrorCode VecCUDAGetArrayRead(Vec v, const PetscScalar **a)
221: {
222: PetscFunctionBegin;
223: PetscCall(VecCUPMGetArrayReadAsync<DeviceType::CUDA>(v, a));
224: PetscFunctionReturn(PETSC_SUCCESS);
225: }
227: /*@C
228: VecCUDARestoreArrayRead - Restore a CUDA device pointer previously acquired with
229: `VecCUDAGetArrayRead()`.
231: Not Collective; Asynchronous; No Fortran Support
233: Input Parameters:
234: + v - the vector
235: - a - the CUDA device pointer
237: Level: intermediate
239: Note:
240: This routine does not modify the corresponding array on the host in any way. The pointer is
241: invalid after this function returns.
243: .seealso: [](ch_vectors), `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecGetArray()`,
244: `VecRestoreArray()`, `VecGetArrayRead()`
245: @*/
246: PetscErrorCode VecCUDARestoreArrayRead(Vec v, const PetscScalar **a)
247: {
248: PetscFunctionBegin;
249: PetscCall(VecCUPMRestoreArrayReadAsync<DeviceType::CUDA>(v, a));
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: /*@C
254: VecCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a vector.
256: Not Collective; Asynchronous; No Fortran Support
258: Input Parameter:
259: . v - the vector
261: Output Parameter:
262: . a - the CUDA pointer
264: Level: advanced
266: Notes:
267: The data pointed to by the device pointer is uninitialized. The user may not read from this
268: data. Furthermore, the entire array needs to be filled by the user to obtain well-defined
269: behaviour. The device memory will be allocated by this function if it hasn't been allocated
270: previously. This is analogous to intent(out) in Fortran.
272: The device pointer needs to be released with `VecCUDARestoreArrayWrite()`. When the pointer is
273: released the host data of the vector is marked as out of data. Subsequent access of the host
274: data with e.g. VecGetArray() incurs a device to host data transfer.
276: .seealso: [](ch_vectors), `VecCUDARestoreArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
277: `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecGetArrayRead()`
278: @*/
279: PetscErrorCode VecCUDAGetArrayWrite(Vec v, PetscScalar **a)
280: {
281: PetscFunctionBegin;
282: PetscCall(VecCUPMGetArrayWriteAsync<DeviceType::CUDA>(v, a));
283: PetscFunctionReturn(PETSC_SUCCESS);
284: }
286: /*@C
287: VecCUDARestoreArrayWrite - Restore a CUDA device pointer previously acquired with
288: `VecCUDAGetArrayWrite()`.
290: Not Collective; Asynchronous; No Fortran Support
292: Input Parameters:
293: + v - the vector
294: - a - the CUDA device pointer. This pointer is invalid after `VecCUDARestoreArrayWrite()` returns.
296: Level: intermediate
298: Note:
299: Data on the host will be marked as out of date. Subsequent access of the data on the host
300: side e.g. with `VecGetArray()` will incur a device to host data transfer.
302: .seealso: [](ch_vectors), `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
303: `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecRestoreArray()`, `VecGetArrayRead()`
304: @*/
305: PetscErrorCode VecCUDARestoreArrayWrite(Vec v, PetscScalar **a)
306: {
307: PetscFunctionBegin;
308: PetscCall(VecCUPMRestoreArrayWriteAsync<DeviceType::CUDA>(v, a));
309: PetscFunctionReturn(PETSC_SUCCESS);
310: }
312: /*@C
313: VecCUDAPlaceArray - Allows one to replace the GPU array in a vector with a GPU array provided
314: by the user.
316: Not Collective; Asynchronous; No Fortran Support
318: Input Parameters:
319: + vec - the vector
320: - array - the GPU array
322: Level: advanced
324: Notes:
325: This routine is useful to avoid copying an array into a vector, though you can return to the
326: original GPU array with a call to `VecCUDAResetArray()`.
328: It is not possible to use `VecCUDAPlaceArray()` and `VecPlaceArray()` at the same time on the
329: same vector.
331: `vec` does not take ownership of `array` in any way. The user must free `array` themselves
332: but be careful not to do so before the vector has either been destroyed, had its original
333: array restored with `VecCUDAResetArray()` or permanently replaced with
334: `VecCUDAReplaceArray()`.
336: .seealso: [](ch_vectors), `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`,
337: `VecResetArray()`, `VecCUDAResetArray()`, `VecCUDAReplaceArray()`
338: @*/
339: PetscErrorCode VecCUDAPlaceArray(Vec vin, const PetscScalar a[])
340: {
341: PetscFunctionBegin;
342: PetscCall(VecCUPMPlaceArrayAsync<DeviceType::CUDA>(vin, a));
343: PetscFunctionReturn(PETSC_SUCCESS);
344: }
346: /*@C
347: VecCUDAReplaceArray - Permanently replace the GPU array in a vector with a GPU array provided
348: by the user.
350: Not Collective; No Fortran Support
352: Input Parameters:
353: + vec - the vector
354: - array - the GPU array
356: Level: advanced
358: Notes:
359: This is useful to avoid copying a GPU array into a vector.
361: This frees the memory associated with the old GPU array. The vector takes ownership of the
362: passed array so it CANNOT be freed by the user. It will be freed when the vector is
363: destroyed.
365: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecPlaceArray()`, `VecResetArray()`,
366: `VecCUDAResetArray()`, `VecCUDAPlaceArray()`, `VecReplaceArray()`
367: @*/
368: PetscErrorCode VecCUDAReplaceArray(Vec vin, const PetscScalar a[])
369: {
370: PetscFunctionBegin;
371: PetscCall(VecCUPMReplaceArrayAsync<DeviceType::CUDA>(vin, a));
372: PetscFunctionReturn(PETSC_SUCCESS);
373: }
375: /*@C
376: VecCUDAResetArray - Resets a vector to use its default memory.
378: Not Collective; No Fortran Support
380: Input Parameters:
381: . vec - the vector
383: Level: advanced
385: Note:
386: Call this after the use of `VecCUDAPlaceArray()`.
388: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecPlaceArray()`,
389: `VecResetArray()`, `VecCUDAPlaceArray()`, `VecCUDAReplaceArray()`
390: @*/
391: PetscErrorCode VecCUDAResetArray(Vec vin)
392: {
393: PetscFunctionBegin;
394: PetscCall(VecCUPMResetArrayAsync<DeviceType::CUDA>(vin));
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }