Actual source code: mpikok.kokkos.cxx
2: /*
3: This file contains routines for Parallel vector operations.
4: */
6: #include <petscvec_kokkos.hpp>
7: #include <petsc/private/deviceimpl.h>
8: #include <petsc/private/vecimpl.h>
9: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
10: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
11: #include <petscsf.h>
13: static PetscErrorCode VecDestroy_MPIKokkos(Vec v)
14: {
15: PetscFunctionBegin;
16: delete static_cast<Vec_Kokkos *>(v->spptr);
17: PetscCall(VecDestroy_MPI(v));
18: PetscFunctionReturn(PETSC_SUCCESS);
19: }
21: static PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
22: {
23: PetscFunctionBegin;
24: PetscCall(VecNorm_MPI_Default(xin, type, z, VecNorm_SeqKokkos));
25: PetscFunctionReturn(PETSC_SUCCESS);
26: }
28: /* z = y^H x */
29: static PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
30: {
31: PetscFunctionBegin;
32: PetscCall(VecXDot_MPI_Default(xin, yin, z, VecDot_SeqKokkos));
33: PetscFunctionReturn(PETSC_SUCCESS);
34: }
36: /* z = y^T x */
37: static PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
38: {
39: PetscFunctionBegin;
40: PetscCall(VecXDot_MPI_Default(xin, yin, z, VecTDot_SeqKokkos));
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: static PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
45: {
46: PetscFunctionBegin;
47: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMDot_SeqKokkos));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: static PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
52: {
53: PetscFunctionBegin;
54: PetscCall(VecMXDot_MPI_Default(xin, nv, y, z, VecMTDot_SeqKokkos));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
59: {
60: const MPI_Op ops[] = {MPIU_MAXLOC, MPIU_MAX};
62: PetscFunctionBegin;
63: PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMax_SeqKokkos, ops));
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
67: PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
68: {
69: const MPI_Op ops[] = {MPIU_MINLOC, MPIU_MIN};
71: PetscFunctionBegin;
72: PetscCall(VecMinMax_MPI_Default(xin, idx, z, VecMin_SeqKokkos, ops));
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
77: {
78: Vec v;
79: Vec_MPI *vecmpi;
80: Vec_Kokkos *veckok;
82: PetscFunctionBegin;
83: /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
84: PetscCall(VecDuplicate_MPI(win, &v)); /* after the call, v is a VECMPI, with data zero'ed */
85: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
86: PetscCall(PetscMemcpy(v->ops, win->ops, sizeof(*win->ops)));
88: /* Build the Vec_Kokkos struct */
89: vecmpi = static_cast<Vec_MPI *>(v->data);
90: veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
91: Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
92: v->spptr = veckok;
93: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
94: *vv = v;
95: PetscFunctionReturn(PETSC_SUCCESS);
96: }
98: PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
99: {
100: PetscFunctionBegin;
101: PetscCall(VecDotNorm2_MPI_Default(s, t, dp, nm, VecDotNorm2_SeqKokkos));
102: PetscFunctionReturn(PETSC_SUCCESS);
103: }
105: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
106: {
107: PetscFunctionBegin;
108: PetscCall(VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y));
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
112: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
113: {
114: const auto vecmpi = static_cast<Vec_MPI *>(x->data);
115: const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
116: PetscInt m;
118: PetscFunctionBegin;
119: PetscCall(VecGetLocalSize(x, &m));
120: PetscCall(VecSetPreallocationCOO_MPI(x, ncoo, coo_i));
121: PetscCallCXX(veckok->SetUpCOO(vecmpi, m));
122: PetscFunctionReturn(PETSC_SUCCESS);
123: }
125: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
126: {
127: const auto vecmpi = static_cast<Vec_MPI *>(x->data);
128: const auto veckok = static_cast<Vec_Kokkos *>(x->spptr);
129: const PetscCountKokkosView &jmap1 = veckok->jmap1_d;
130: const PetscCountKokkosView &perm1 = veckok->perm1_d;
131: const PetscCountKokkosView &imap2 = veckok->imap2_d;
132: const PetscCountKokkosView &jmap2 = veckok->jmap2_d;
133: const PetscCountKokkosView &perm2 = veckok->perm2_d;
134: const PetscCountKokkosView &Cperm = veckok->Cperm_d;
135: PetscScalarKokkosView &sendbuf = veckok->sendbuf_d;
136: PetscScalarKokkosView &recvbuf = veckok->recvbuf_d;
137: PetscScalarKokkosView xv;
138: ConstPetscScalarKokkosView vv;
139: PetscMemType memtype;
140: PetscInt m;
142: PetscFunctionBegin;
143: PetscCall(VecGetLocalSize(x, &m));
144: PetscCall(PetscGetMemType(v, &memtype));
145: if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
146: vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
147: } else {
148: vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
149: }
151: /* Pack entries to be sent to remote */
152: Kokkos::parallel_for(
153: vecmpi->sendlen, KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
154: PetscCall(PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE));
156: if (imode == INSERT_VALUES) PetscCall(VecGetKokkosViewWrite(x, &xv)); /* write vector */
157: else PetscCall(VecGetKokkosView(x, &xv)); /* read & write vector */
159: Kokkos::parallel_for(
160: m, KOKKOS_LAMBDA(const PetscCount i) {
161: PetscScalar sum = 0.0;
162: for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
163: xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
164: });
166: PetscCall(PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE));
168: /* Add received remote entries */
169: Kokkos::parallel_for(
170: vecmpi->nnz2, KOKKOS_LAMBDA(PetscCount i) {
171: for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
172: });
174: if (imode == INSERT_VALUES) PetscCall(VecRestoreKokkosViewWrite(x, &xv));
175: else PetscCall(VecRestoreKokkosView(x, &xv));
176: PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: static PetscErrorCode VecSetOps_MPIKokkos(Vec v)
180: {
181: PetscFunctionBegin;
182: v->ops->abs = VecAbs_SeqKokkos;
183: v->ops->reciprocal = VecReciprocal_SeqKokkos;
184: v->ops->pointwisemult = VecPointwiseMult_SeqKokkos;
185: v->ops->setrandom = VecSetRandom_SeqKokkos;
186: v->ops->dotnorm2 = VecDotNorm2_MPIKokkos;
187: v->ops->waxpy = VecWAXPY_SeqKokkos;
188: v->ops->norm = VecNorm_MPIKokkos;
189: v->ops->min = VecMin_MPIKokkos;
190: v->ops->max = VecMax_MPIKokkos;
191: v->ops->sum = VecSum_SeqKokkos;
192: v->ops->shift = VecShift_SeqKokkos;
193: v->ops->scale = VecScale_SeqKokkos;
194: v->ops->copy = VecCopy_SeqKokkos;
195: v->ops->set = VecSet_SeqKokkos;
196: v->ops->swap = VecSwap_SeqKokkos;
197: v->ops->axpy = VecAXPY_SeqKokkos;
198: v->ops->axpby = VecAXPBY_SeqKokkos;
199: v->ops->maxpy = VecMAXPY_SeqKokkos;
200: v->ops->aypx = VecAYPX_SeqKokkos;
201: v->ops->axpbypcz = VecAXPBYPCZ_SeqKokkos;
202: v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
203: v->ops->placearray = VecPlaceArray_SeqKokkos;
204: v->ops->replacearray = VecReplaceArray_SeqKokkos;
205: v->ops->resetarray = VecResetArray_SeqKokkos;
207: v->ops->dot = VecDot_MPIKokkos;
208: v->ops->tdot = VecTDot_MPIKokkos;
209: v->ops->mdot = VecMDot_MPIKokkos;
210: v->ops->mtdot = VecMTDot_MPIKokkos;
212: v->ops->dot_local = VecDot_SeqKokkos;
213: v->ops->tdot_local = VecTDot_SeqKokkos;
214: v->ops->mdot_local = VecMDot_SeqKokkos;
215: v->ops->mtdot_local = VecMTDot_SeqKokkos;
217: v->ops->norm_local = VecNorm_SeqKokkos;
218: v->ops->duplicate = VecDuplicate_MPIKokkos;
219: v->ops->destroy = VecDestroy_MPIKokkos;
220: v->ops->getlocalvector = VecGetLocalVector_SeqKokkos;
221: v->ops->restorelocalvector = VecRestoreLocalVector_SeqKokkos;
222: v->ops->getlocalvectorread = VecGetLocalVector_SeqKokkos;
223: v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
224: v->ops->getarraywrite = VecGetArrayWrite_SeqKokkos;
225: v->ops->getarray = VecGetArray_SeqKokkos;
226: v->ops->restorearray = VecRestoreArray_SeqKokkos;
227: v->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqKokkos;
228: v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
229: v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
230: v->ops->getsubvector = VecGetSubVector_MPIKokkos;
231: v->ops->restoresubvector = VecRestoreSubVector_SeqKokkos;
233: v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
234: v->ops->setvaluescoo = VecSetValuesCOO_MPIKokkos;
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /*MC
239: VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos
241: Options Database Keys:
242: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()
244: Level: beginner
246: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
247: M*/
248: PetscErrorCode VecCreate_MPIKokkos(Vec v)
249: {
250: Vec_MPI *vecmpi;
251: Vec_Kokkos *veckok;
253: PetscFunctionBegin;
254: PetscCall(PetscKokkosInitializeCheck());
255: PetscCall(PetscLayoutSetUp(v->map));
256: PetscCall(VecCreate_MPI(v)); /* Calloc host array */
258: vecmpi = static_cast<Vec_MPI *>(v->data);
259: PetscCall(PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS));
260: PetscCall(VecSetOps_MPIKokkos(v));
261: veckok = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
262: v->spptr = static_cast<void *>(veckok);
263: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: /*@C
268: VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
269: where the user provides the GPU array space to store the vector values.
271: Collective
273: Input Parameters:
274: + comm - the MPI communicator to use
275: . bs - block size, same meaning as VecSetBlockSize()
276: . n - local vector length, cannot be PETSC_DECIDE
277: . N - global vector length (or PETSC_DECIDE to have calculated)
278: - array - the user provided GPU array to store the vector values
280: Output Parameter:
281: . vv - the vector
283: Notes:
284: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
285: same type as an existing vector.
287: If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
288: at a later stage to SET the array for storing the vector values.
290: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
291: The user should not free the array until the vector is destroyed.
293: Level: intermediate
295: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
296: `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
297: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
299: @*/
300: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
301: {
302: Vec w;
303: Vec_Kokkos *veckok;
304: Vec_MPI *vecmpi;
305: PetscScalar *harray;
307: PetscFunctionBegin;
308: PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
309: PetscCall(PetscKokkosInitializeCheck());
310: PetscCall(PetscSplitOwnership(comm, &n, &N));
311: PetscCall(VecCreate(comm, &w));
312: PetscCall(VecSetSizes(w, n, N));
313: PetscCall(VecSetBlockSize(w, bs));
314: PetscCall(PetscLayoutSetUp(w->map));
316: if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
317: harray = const_cast<PetscScalar *>(darray);
318: } else PetscCall(PetscMalloc1(w->map->n, &harray)); /* If device is not the same as host, allocate the host array ourselves */
320: PetscCall(VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray)); /* Build a sequential vector with provided data */
321: vecmpi = static_cast<Vec_MPI *>(w->data);
323: if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */
325: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS));
326: PetscCall(VecSetOps_MPIKokkos(w));
327: veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
328: veckok->v_dual.modify_device(); /* Mark the device is modified */
329: w->spptr = static_cast<void *>(veckok);
330: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
331: *v = w;
332: PetscFunctionReturn(PETSC_SUCCESS);
333: }
335: /*
336: VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
337: with user-provided arrays on host and device.
339: Collective
341: Input Parameter:
342: + comm - the communicator
343: . bs - the block size
344: . n - the local vector length
345: . N - the global vector length
346: - harray - host memory where the vector elements are to be stored.
347: - darray - device memory where the vector elements are to be stored.
349: Output Parameter:
350: . v - the vector
352: Notes:
353: If there is no device, then harray and darray must be the same.
354: If n is not zero, then harray and darray must be allocated.
355: After the call, the created vector is supposed to be in a synchronized state, i.e.,
356: we suppose harray and darray have the same data.
358: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
359: The user should not free the array until the vector is destroyed.
360: */
361: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
362: {
363: Vec w;
365: PetscFunctionBegin;
366: PetscCall(PetscKokkosInitializeCheck());
367: if (n) {
369: PetscCheck(darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "darray cannot be NULL");
370: }
371: if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) PetscCheck(harray == darray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "harray and darray must be the same");
372: PetscCall(VecCreateMPIWithArray(comm, bs, n, N, harray, &w));
373: PetscCall(PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS)); /* Change it to Kokkos */
374: PetscCall(VecSetOps_MPIKokkos(w));
375: PetscCallCXX(w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray)));
376: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
377: *v = w;
378: PetscFunctionReturn(PETSC_SUCCESS);
379: }
381: /*MC
382: VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos
384: Options Database Keys:
385: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()
387: Level: beginner
389: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
390: M*/
391: PetscErrorCode VecCreate_Kokkos(Vec v)
392: {
393: PetscMPIInt size;
395: PetscFunctionBegin;
396: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
397: if (size == 1) PetscCall(VecSetType(v, VECSEQKOKKOS));
398: else PetscCall(VecSetType(v, VECMPIKOKKOS));
399: PetscFunctionReturn(PETSC_SUCCESS);
400: }