Actual source code: vecseqcupm.cu

  1: #include "../vecseqcupm.hpp" /*I <petscvec.h> I*/

  3: using namespace Petsc::vec::cupm;
  4: using ::Petsc::device::cupm::DeviceType;

  6: static constexpr auto VecSeq_CUDA = impl::VecSeq_CUPM<DeviceType::CUDA>{};

  8: PetscErrorCode VecCreate_SeqCUDA(Vec v)
  9: {
 10:   PetscFunctionBegin;
 11:   PetscCall(VecSeq_CUDA.Create(v));
 12:   PetscFunctionReturn(PETSC_SUCCESS);
 13: }

 15: /*@
 16:   VecCreateSeqCUDA - Creates a standard, sequential, array-style vector.

 18:   Collective, Possibly Synchronous

 20:   Input Parameters:
 21: + comm - the communicator, must be `PETSC_COMM_SELF`
 22: - n    - the vector length

 24:   Output Parameter:
 25: . v - the vector

 27:   Level: intermediate

 29:   Notes:
 30:   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
 31:   existing vector.

 33:   This function may initialize `PetscDevice`, which may incur a device synchronization.

 35: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqCUDAWithArray()`,
 36:           `VecCreateMPI()`, `VecCreateMPICUDA()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
 37: @*/
 38: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm, PetscInt n, Vec *v)
 39: {
 40:   PetscFunctionBegin;
 41:   PetscCall(VecCreateSeqCUPMAsync<DeviceType::CUDA>(comm, n, v));
 42:   PetscFunctionReturn(PETSC_SUCCESS);
 43: }

 45: /*@C
 46:   VecCreateSeqCUDAWithArrays - Creates a sequential, array-style vector using CUDA, where the
 47:   user provides the complete array space to store the vector values.

 49:   Collective, Possibly Synchronous

 51:   Input Parameters:
 52: + comm     - the communicator, must be `PETSC_COMM_SELF`
 53: . bs       - the block size
 54: . n        - the local vector length
 55: . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
 56: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)

 58:   Output Parameter:
 59: . v - the vector

 61:   Level: intermediate

 63:   Notes:
 64:   If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
 65:   SET the array for storing the vector values. Otherwise, the array must be allocated on the
 66:   device.

 68:   If both cpuarray and gpuarray are provided, the provided arrays must have identical
 69:   values.

 71:   The arrays are NOT freed when the vector is destroyed via `VecDestroy()`. The user must free
 72:   them themselves, but not until the vector is destroyed.

 74:   This function may initialize `PetscDevice`, which may incur a device synchronization.

 76: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeqWithArray()`, `VecCreateSeqCUDA()`,
 77:           `VecCreateSeqCUDAWithArray()`, `VecCreateMPICUDA()`, `VecCreateMPICUDAWithArray()`,
 78:           `VecCreateMPICUDAWithArrays()`, `VecCUDAPlaceArray()`
 79: C@*/
 80: PetscErrorCode VecCreateSeqCUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
 81: {
 82:   PetscFunctionBegin;
 83:   PetscCall(VecCreateSeqCUPMWithArraysAsync<DeviceType::CUDA>(comm, bs, n, cpuarray, gpuarray, v));
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: /*@C
 88:   VecCreateSeqCUDAWithArray - Creates a sequential, array-style vector using CUDA, where the
 89:   user provides the device array space to store the vector values.

 91:   Collective, Possibly Synchronous

 93:   Input Parameters:
 94: + comm     - the communicator, must be `PETSC_COMM_SELF`
 95: . bs       - the block size
 96: . n        - the vector length
 97: - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)

 99:   Output Parameter:
100: . v - the vector

102:   Level: intermediate

104:   Notes:
105:   If the user-provided array is `NULL`, then `VecCUDAPlaceArray()` can be used at a later stage to
106:   SET the array for storing the vector values. Otherwise, the array must be allocated on the
107:   device.

109:   The array is NOT freed when the vector is destroyed via `VecDestroy()`. The user must free the
110:   array themselves, but not until the vector is destroyed.

112:   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
113:   existing vector.

115:   This function may initialize `PetscDevice`, which may incur a device synchronization.

117: .seealso: [](ch_vectors), `PetscDeviceInitialize()`, `VecCreate()`, `VecCreateSeq()`, `VecCreateSeqWithArray()`,
118:           `VecCreateMPIWithArray()`, `VecCreateSeqCUDA()`, `VecCreateMPICUDAWithArray()`, `VecCUDAPlaceArray()`,
119:           `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
120: @*/
121: PetscErrorCode VecCreateSeqCUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar gpuarray[], Vec *v)
122: {
123:   PetscFunctionBegin;
124:   PetscCall(VecCreateSeqCUDAWithArrays(comm, bs, n, nullptr, gpuarray, v));
125:   PetscFunctionReturn(PETSC_SUCCESS);
126: }

128: /*@C
129:   VecCUDAGetArray - Provides access to the device buffer inside a vector

131:   Not Collective; Asynchronous; No Fortran Support

133:   Input Parameter:
134: . v - the vector

136:   Output Parameter:
137: . a - the device buffer

139:   Level: intermediate

141:   Notes:
142:   This routine has semantics similar to `VecGetArray()`; the returned buffer points to a
143:   consistent view of the vector data. This may involve copying data from the host to the device
144:   if the data on the device is out of date. It is also assumed that the returned buffer is
145:   immediately modified, marking the host data out of date. This is similar to intent(inout) in
146:   Fortran.

148:   If the user does require strong memory guarantees, they are encouraged to use
149:   `VecCUDAGetArrayRead()` and/or `VecCUDAGetArrayWrite()` instead.

151:   The user must call `VecCUDARestoreArray()` when they are finished using the array.

153:   Developer Note:
154:   If the device memory hasn't been allocated previously it will be allocated as part of this
155:   routine.

157: .seealso: [](ch_vectors), `VecCUDARestoreArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
158:           `VecGetArrayRead()`, `VecGetArrayWrite()`
159: @*/
160: PetscErrorCode VecCUDAGetArray(Vec v, PetscScalar **a)
161: {
162:   PetscFunctionBegin;
163:   PetscCall(VecCUPMGetArrayAsync<DeviceType::CUDA>(v, a));
164:   PetscFunctionReturn(PETSC_SUCCESS);
165: }

167: /*@C
168:   VecCUDARestoreArray - Restore a device buffer previously acquired with `VecCUDAGetArray()`.

170:   NotCollective; Asynchronous; No Fortran Support

172:   Input Parameters:
173: + v - the vector
174: - a - the device buffer

176:   Level: intermediate

178:   Note:
179:   The restored pointer is invalid after this function returns. This function also marks the
180:   host data as out of date. Subsequent access to the vector data on the host side via
181:   `VecGetArray()` will incur a (synchronous) data transfer.

183: .seealso: [](ch_vectors), `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
184:           `VecRestoreArray()`, `VecGetArrayRead()`
185: @*/
186: PetscErrorCode VecCUDARestoreArray(Vec v, PetscScalar **a)
187: {
188:   PetscFunctionBegin;
189:   PetscCall(VecCUPMRestoreArrayAsync<DeviceType::CUDA>(v, a));
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }

193: /*@C
194:   VecCUDAGetArrayRead - Provides read access to the CUDA buffer inside a vector.

196:   Not Collective; Asynchronous; No Fortran Support

198:   Input Parameter:
199: . v - the vector

201:   Output Parameter:
202: . a - the CUDA pointer.

204:   Level: intermediate

206:   Notes:
207:   See `VecCUDAGetArray()` for data movement semantics of this function.

209:   This function assumes that the user will not modify the vector data. This is analgogous to
210:   intent(in) in Fortran.

212:   The device pointer must be restored by calling `VecCUDARestoreArrayRead()`. If the data on the
213:   host side was previously up to date it will remain so, i.e. data on both the device and the
214:   host is up to date. Accessing data on the host side does not incur a device to host data
215:   transfer.

217: .seealso: [](ch_vectors), `VecCUDARestoreArrayRead()`, `VecCUDAGetArray()`, `VecCUDAGetArrayWrite()`, `VecGetArray()`,
218:           `VecGetArrayRead()`
219: @*/
220: PetscErrorCode VecCUDAGetArrayRead(Vec v, const PetscScalar **a)
221: {
222:   PetscFunctionBegin;
223:   PetscCall(VecCUPMGetArrayReadAsync<DeviceType::CUDA>(v, a));
224:   PetscFunctionReturn(PETSC_SUCCESS);
225: }

227: /*@C
228:   VecCUDARestoreArrayRead - Restore a CUDA device pointer previously acquired with
229:   `VecCUDAGetArrayRead()`.

231:   Not Collective; Asynchronous; No Fortran Support

233:   Input Parameters:
234: + v - the vector
235: - a - the CUDA device pointer

237:   Level: intermediate

239:   Note:
240:   This routine does not modify the corresponding array on the host in any way. The pointer is
241:   invalid after this function returns.

243: .seealso: [](ch_vectors), `VecCUDAGetArrayRead()`, `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecGetArray()`,
244:           `VecRestoreArray()`, `VecGetArrayRead()`
245: @*/
246: PetscErrorCode VecCUDARestoreArrayRead(Vec v, const PetscScalar **a)
247: {
248:   PetscFunctionBegin;
249:   PetscCall(VecCUPMRestoreArrayReadAsync<DeviceType::CUDA>(v, a));
250:   PetscFunctionReturn(PETSC_SUCCESS);
251: }

253: /*@C
254:   VecCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a vector.

256:    Not Collective; Asynchronous; No Fortran Support

258:   Input Parameter:
259: . v - the vector

261:   Output Parameter:
262: . a - the CUDA pointer

264:   Level: advanced

266:   Notes:
267:   The data pointed to by the device pointer is uninitialized. The user may not read from this
268:   data. Furthermore, the entire array needs to be filled by the user to obtain well-defined
269:   behaviour. The device memory will be allocated by this function if it hasn't been allocated
270:   previously. This is analogous to intent(out) in Fortran.

272:   The device pointer needs to be released with `VecCUDARestoreArrayWrite()`. When the pointer is
273:   released the host data of the vector is marked as out of data. Subsequent access of the host
274:   data with e.g. VecGetArray() incurs a device to host data transfer.

276: .seealso: [](ch_vectors), `VecCUDARestoreArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
277:           `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecGetArrayRead()`
278: @*/
279: PetscErrorCode VecCUDAGetArrayWrite(Vec v, PetscScalar **a)
280: {
281:   PetscFunctionBegin;
282:   PetscCall(VecCUPMGetArrayWriteAsync<DeviceType::CUDA>(v, a));
283:   PetscFunctionReturn(PETSC_SUCCESS);
284: }

286: /*@C
287:   VecCUDARestoreArrayWrite - Restore a CUDA device pointer previously acquired with
288:   `VecCUDAGetArrayWrite()`.

290:    Not Collective; Asynchronous; No Fortran Support

292:   Input Parameters:
293: + v - the vector
294: - a - the CUDA device pointer.  This pointer is invalid after `VecCUDARestoreArrayWrite()` returns.

296:   Level: intermediate

298:   Note:
299:   Data on the host will be marked as out of date. Subsequent access of the data on the host
300:   side e.g. with `VecGetArray()` will incur a device to host data transfer.

302: .seealso: [](ch_vectors), `VecCUDAGetArrayWrite()`, `VecCUDAGetArray()`, `VecCUDAGetArrayRead()`,
303:           `VecCUDAGetArrayWrite()`, `VecGetArray()`, `VecRestoreArray()`, `VecGetArrayRead()`
304: @*/
305: PetscErrorCode VecCUDARestoreArrayWrite(Vec v, PetscScalar **a)
306: {
307:   PetscFunctionBegin;
308:   PetscCall(VecCUPMRestoreArrayWriteAsync<DeviceType::CUDA>(v, a));
309:   PetscFunctionReturn(PETSC_SUCCESS);
310: }

312: /*@C
313:   VecCUDAPlaceArray - Allows one to replace the GPU array in a vector with a GPU array provided
314:   by the user.

316:   Not Collective; Asynchronous; No Fortran Support

318:   Input Parameters:
319: + vec - the vector
320: - array - the GPU array

322:   Level: advanced

324:   Notes:
325:   This routine is useful to avoid copying an array into a vector, though you can return to the
326:   original GPU array with a call to `VecCUDAResetArray()`.

328:   It is not possible to use `VecCUDAPlaceArray()` and `VecPlaceArray()` at the same time on the
329:   same vector.

331:   `vec` does not take ownership of `array` in any way. The user must free `array` themselves
332:   but be careful not to do so before the vector has either been destroyed, had its original
333:   array restored with `VecCUDAResetArray()` or permanently replaced with
334:   `VecCUDAReplaceArray()`.

336: .seealso: [](ch_vectors), `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`,
337:           `VecResetArray()`, `VecCUDAResetArray()`, `VecCUDAReplaceArray()`
338: @*/
339: PetscErrorCode VecCUDAPlaceArray(Vec vin, const PetscScalar a[])
340: {
341:   PetscFunctionBegin;
342:   PetscCall(VecCUPMPlaceArrayAsync<DeviceType::CUDA>(vin, a));
343:   PetscFunctionReturn(PETSC_SUCCESS);
344: }

346: /*@C
347:   VecCUDAReplaceArray - Permanently replace the GPU array in a vector with a GPU array provided
348:   by the user.

350:   Not Collective; No Fortran Support

352:   Input Parameters:
353: + vec   - the vector
354: - array - the GPU array

356:   Level: advanced

358:   Notes:
359:   This is useful to avoid copying a GPU array into a vector.

361:   This frees the memory associated with the old GPU array. The vector takes ownership of the
362:   passed array so it CANNOT be freed by the user. It will be freed when the vector is
363:   destroyed.

365: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecPlaceArray()`, `VecResetArray()`,
366:           `VecCUDAResetArray()`, `VecCUDAPlaceArray()`, `VecReplaceArray()`
367: @*/
368: PetscErrorCode VecCUDAReplaceArray(Vec vin, const PetscScalar a[])
369: {
370:   PetscFunctionBegin;
371:   PetscCall(VecCUPMReplaceArrayAsync<DeviceType::CUDA>(vin, a));
372:   PetscFunctionReturn(PETSC_SUCCESS);
373: }

375: /*@C
376:   VecCUDAResetArray - Resets a vector to use its default memory.

378:   Not Collective; No Fortran Support

380:   Input Parameters:
381: . vec - the vector

383:   Level: advanced

385:   Note:
386:   Call this after the use of `VecCUDAPlaceArray()`.

388: .seealso: [](ch_vectors), `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecPlaceArray()`,
389:           `VecResetArray()`, `VecCUDAPlaceArray()`, `VecCUDAReplaceArray()`
390: @*/
391: PetscErrorCode VecCUDAResetArray(Vec vin)
392: {
393:   PetscFunctionBegin;
394:   PetscCall(VecCUPMResetArrayAsync<DeviceType::CUDA>(vin));
395:   PetscFunctionReturn(PETSC_SUCCESS);
396: }