Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
15: #ifdef VIENNACL_WITH_OPENCL
16: #include "viennacl/ocl/backend.hpp"
17: #endif
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20: {
21: PetscFunctionBegin;
22: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
23: *a = 0;
24: PetscCall(VecViennaCLCopyToGPU(v));
25: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
26: ViennaCLWaitForGPU();
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
31: {
32: PetscFunctionBegin;
33: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
34: v->offloadmask = PETSC_OFFLOAD_GPU;
36: PetscCall(PetscObjectStateIncrease((PetscObject)v));
37: PetscFunctionReturn(PETSC_SUCCESS);
38: }
40: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
41: {
42: PetscFunctionBegin;
43: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
44: *a = 0;
45: PetscCall(VecViennaCLCopyToGPU(v));
46: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
47: ViennaCLWaitForGPU();
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
52: {
53: PetscFunctionBegin;
54: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
59: {
60: PetscFunctionBegin;
61: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
62: *a = 0;
63: PetscCall(VecViennaCLAllocateCheck(v));
64: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
65: ViennaCLWaitForGPU();
66: PetscFunctionReturn(PETSC_SUCCESS);
67: }
69: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
70: {
71: PetscFunctionBegin;
72: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
73: v->offloadmask = PETSC_OFFLOAD_GPU;
75: PetscCall(PetscObjectStateIncrease((PetscObject)v));
76: PetscFunctionReturn(PETSC_SUCCESS);
77: }
79: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
80: {
81: char string[20];
82: PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
84: PetscFunctionBegin;
85: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
86: PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
87: if (flg) {
88: try {
89: PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
90: PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
91: PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));
93: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
94: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
95: #if defined(PETSC_HAVE_CUDA)
96: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
97: #endif
98: #if defined(PETSC_HAVE_OPENCL)
99: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100: #endif
101: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102: } catch (std::exception const &ex) {
103: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104: }
105: }
107: #if defined(PETSC_HAVE_OPENCL)
108: /* ViennaCL OpenCL device type configuration */
109: PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110: if (flg) {
111: try {
112: PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
115: PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
118: PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120: } catch (std::exception const &ex) {
121: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122: }
123: }
124: #endif
126: /* Print available backends */
127: PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128: if (flg) {
129: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130: #if defined(PETSC_HAVE_CUDA)
131: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132: #endif
133: #if defined(PETSC_HAVE_OPENCL)
134: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135: #endif
136: #if defined(PETSC_HAVE_OPENMP)
137: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138: #else
139: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140: #endif
141: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
143: /* Print selected backends */
144: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: "));
145: #if defined(PETSC_HAVE_CUDA)
146: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150: #endif
151: #if defined(PETSC_HAVE_OPENMP)
152: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153: #else
154: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155: #endif
156: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157: }
158: PetscFunctionReturn(PETSC_SUCCESS);
159: }
161: /*
162: Allocates space for the vector array on the Host if it does not exist.
163: Does NOT change the PetscViennaCLFlag for the vector
164: Does NOT zero the ViennaCL array
165: */
166: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167: {
168: PetscScalar *array;
169: Vec_Seq *s;
170: PetscInt n = v->map->n;
172: PetscFunctionBegin;
173: s = (Vec_Seq *)v->data;
174: PetscCall(VecViennaCLAllocateCheck(v));
175: if (s->array == 0) {
176: PetscCall(PetscMalloc1(n, &array));
177: s->array = array;
178: s->array_allocated = array;
179: }
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: /*
184: Allocates space for the vector array on the GPU if it does not exist.
185: Does NOT change the PetscViennaCLFlag for the vector
186: Does NOT zero the ViennaCL array
188: */
189: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190: {
191: PetscFunctionBegin;
192: if (!v->spptr) {
193: try {
194: v->spptr = new Vec_ViennaCL;
195: ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196: ((Vec_ViennaCL *)v->spptr)->GPUarray = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
198: } catch (std::exception const &ex) {
199: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200: }
201: }
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
205: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
206: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207: {
208: PetscFunctionBegin;
209: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210: PetscCall(VecViennaCLAllocateCheck(v));
211: if (v->map->n > 0) {
212: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213: PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214: try {
215: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216: viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217: ViennaCLWaitForGPU();
218: } catch (std::exception const &ex) {
219: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220: }
221: PetscCall(PetscLogCpuToGpu((v->map->n) * sizeof(PetscScalar)));
222: PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223: v->offloadmask = PETSC_OFFLOAD_BOTH;
224: }
225: }
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: /*
230: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231: */
232: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233: {
234: PetscFunctionBegin;
235: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236: PetscCall(VecViennaCLAllocateCheckHost(v));
237: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238: PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239: try {
240: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241: viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242: ViennaCLWaitForGPU();
243: } catch (std::exception const &ex) {
244: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245: }
246: PetscCall(PetscLogGpuToCpu((v->map->n) * sizeof(PetscScalar)));
247: PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248: v->offloadmask = PETSC_OFFLOAD_BOTH;
249: }
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: /* Copy on CPU */
254: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255: {
256: PetscScalar *ya;
257: const PetscScalar *xa;
259: PetscFunctionBegin;
260: PetscCall(VecViennaCLAllocateCheckHost(xin));
261: PetscCall(VecViennaCLAllocateCheckHost(yin));
262: if (xin != yin) {
263: PetscCall(VecGetArrayRead(xin, &xa));
264: PetscCall(VecGetArray(yin, &ya));
265: PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266: PetscCall(VecRestoreArrayRead(xin, &xa));
267: PetscCall(VecRestoreArray(yin, &ya));
268: }
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273: {
274: PetscInt n = xin->map->n, i;
275: PetscScalar *xx;
277: PetscFunctionBegin;
278: PetscCall(VecGetArray(xin, &xx));
279: for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280: PetscCall(VecRestoreArray(xin, &xx));
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285: {
286: Vec_Seq *vs = (Vec_Seq *)v->data;
288: PetscFunctionBegin;
289: PetscCall(PetscObjectSAWsViewOff(v));
290: #if defined(PETSC_USE_LOG)
291: PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
292: #endif
293: if (vs->array_allocated) PetscCall(PetscFree(vs->array_allocated));
294: PetscCall(PetscFree(vs));
295: PetscFunctionReturn(PETSC_SUCCESS);
296: }
298: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
299: {
300: Vec_Seq *v = (Vec_Seq *)vin->data;
302: PetscFunctionBegin;
303: v->array = v->unplacedarray;
304: v->unplacedarray = 0;
305: PetscFunctionReturn(PETSC_SUCCESS);
306: }
308: /*MC
309: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
311: Options Database Keys:
312: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
314: Level: beginner
316: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
317: M*/
319: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
320: {
321: const ViennaCLVector *xgpu;
322: ViennaCLVector *ygpu;
324: PetscFunctionBegin;
325: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
326: PetscCall(VecViennaCLGetArray(yin, &ygpu));
327: PetscCall(PetscLogGpuTimeBegin());
328: try {
329: if (alpha != 0.0 && xin->map->n > 0) {
330: *ygpu = *xgpu + alpha * *ygpu;
331: PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
332: } else {
333: *ygpu = *xgpu;
334: }
335: ViennaCLWaitForGPU();
336: } catch (std::exception const &ex) {
337: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
338: }
339: PetscCall(PetscLogGpuTimeEnd());
340: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
341: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
342: PetscFunctionReturn(PETSC_SUCCESS);
343: }
345: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
346: {
347: const ViennaCLVector *xgpu;
348: ViennaCLVector *ygpu;
350: PetscFunctionBegin;
351: if (alpha != 0.0 && xin->map->n > 0) {
352: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
353: PetscCall(VecViennaCLGetArray(yin, &ygpu));
354: PetscCall(PetscLogGpuTimeBegin());
355: try {
356: *ygpu += alpha * *xgpu;
357: ViennaCLWaitForGPU();
358: } catch (std::exception const &ex) {
359: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
360: }
361: PetscCall(PetscLogGpuTimeEnd());
362: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
363: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
364: PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
365: }
366: PetscFunctionReturn(PETSC_SUCCESS);
367: }
369: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
370: {
371: const ViennaCLVector *xgpu, *ygpu;
372: ViennaCLVector *wgpu;
374: PetscFunctionBegin;
375: if (xin->map->n > 0) {
376: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
377: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
378: PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
379: PetscCall(PetscLogGpuTimeBegin());
380: try {
381: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
382: ViennaCLWaitForGPU();
383: } catch (std::exception const &ex) {
384: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
385: }
386: PetscCall(PetscLogGpuTimeEnd());
387: PetscCall(PetscLogGpuFlops(win->map->n));
388: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
389: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
390: PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
391: }
392: PetscFunctionReturn(PETSC_SUCCESS);
393: }
395: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
396: {
397: const ViennaCLVector *xgpu, *ygpu;
398: ViennaCLVector *wgpu;
400: PetscFunctionBegin;
401: if (alpha == 0.0 && xin->map->n > 0) {
402: PetscCall(VecCopy_SeqViennaCL(yin, win));
403: } else {
404: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
405: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
406: PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
407: PetscCall(PetscLogGpuTimeBegin());
408: if (alpha == 1.0) {
409: try {
410: *wgpu = *ygpu + *xgpu;
411: } catch (std::exception const &ex) {
412: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
413: }
414: PetscCall(PetscLogGpuFlops(win->map->n));
415: } else if (alpha == -1.0) {
416: try {
417: *wgpu = *ygpu - *xgpu;
418: } catch (std::exception const &ex) {
419: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
420: }
421: PetscCall(PetscLogGpuFlops(win->map->n));
422: } else {
423: try {
424: *wgpu = *ygpu + alpha * *xgpu;
425: } catch (std::exception const &ex) {
426: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
427: }
428: PetscCall(PetscLogGpuFlops(2 * win->map->n));
429: }
430: ViennaCLWaitForGPU();
431: PetscCall(PetscLogGpuTimeEnd());
432: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
433: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
434: PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
435: }
436: PetscFunctionReturn(PETSC_SUCCESS);
437: }
439: /*
440: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
441: *
442: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
443: * hence there is an iterated application of these until the final result is obtained
444: */
445: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
446: {
447: PetscInt j;
449: PetscFunctionBegin;
450: for (j = 0; j < nv; ++j) {
451: if (j + 1 < nv) {
452: PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
453: ++j;
454: } else {
455: PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
456: }
457: }
458: ViennaCLWaitForGPU();
459: PetscFunctionReturn(PETSC_SUCCESS);
460: }
462: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
463: {
464: const ViennaCLVector *xgpu, *ygpu;
466: PetscFunctionBegin;
467: if (xin->map->n > 0) {
468: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
469: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
470: PetscCall(PetscLogGpuTimeBegin());
471: try {
472: *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
473: } catch (std::exception const &ex) {
474: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
475: }
476: ViennaCLWaitForGPU();
477: PetscCall(PetscLogGpuTimeEnd());
478: if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
479: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
480: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
481: } else *z = 0.0;
482: PetscFunctionReturn(PETSC_SUCCESS);
483: }
485: /*
486: * Operation z[j] = dot(x, y[j])
487: *
488: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
489: */
490: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
491: {
492: PetscInt n = xin->map->n, i;
493: const ViennaCLVector *xgpu, *ygpu;
494: Vec *yyin = (Vec *)yin;
495: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
497: PetscFunctionBegin;
498: if (xin->map->n > 0) {
499: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
500: for (i = 0; i < nv; i++) {
501: PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
502: ygpu_array[i] = ygpu;
503: }
504: PetscCall(PetscLogGpuTimeBegin());
505: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
506: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
507: viennacl::copy(result.begin(), result.end(), z);
508: for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
509: ViennaCLWaitForGPU();
510: PetscCall(PetscLogGpuTimeEnd());
511: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
512: PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
513: } else {
514: for (i = 0; i < nv; i++) z[i] = 0.0;
515: }
516: PetscFunctionReturn(PETSC_SUCCESS);
517: }
519: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
520: {
521: PetscFunctionBegin;
522: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
523: PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
524: ViennaCLWaitForGPU();
525: PetscFunctionReturn(PETSC_SUCCESS);
526: }
528: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
529: {
530: ViennaCLVector *xgpu;
532: PetscFunctionBegin;
533: if (xin->map->n > 0) {
534: PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
535: PetscCall(PetscLogGpuTimeBegin());
536: try {
537: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
538: ViennaCLWaitForGPU();
539: } catch (std::exception const &ex) {
540: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
541: }
542: PetscCall(PetscLogGpuTimeEnd());
543: PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
544: }
545: PetscFunctionReturn(PETSC_SUCCESS);
546: }
548: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
549: {
550: ViennaCLVector *xgpu;
552: PetscFunctionBegin;
553: if (alpha == 0.0 && xin->map->n > 0) {
554: PetscCall(VecSet_SeqViennaCL(xin, alpha));
555: PetscCall(PetscLogGpuFlops(xin->map->n));
556: } else if (alpha != 1.0 && xin->map->n > 0) {
557: PetscCall(VecViennaCLGetArray(xin, &xgpu));
558: PetscCall(PetscLogGpuTimeBegin());
559: try {
560: *xgpu *= alpha;
561: ViennaCLWaitForGPU();
562: } catch (std::exception const &ex) {
563: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
564: }
565: PetscCall(PetscLogGpuTimeEnd());
566: PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
567: PetscCall(PetscLogGpuFlops(xin->map->n));
568: }
569: PetscFunctionReturn(PETSC_SUCCESS);
570: }
572: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
573: {
574: PetscFunctionBegin;
575: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
576: PetscCall(VecDot_SeqViennaCL(xin, yin, z));
577: ViennaCLWaitForGPU();
578: PetscFunctionReturn(PETSC_SUCCESS);
579: }
581: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
582: {
583: const ViennaCLVector *xgpu;
584: ViennaCLVector *ygpu;
586: PetscFunctionBegin;
587: if (xin != yin && xin->map->n > 0) {
588: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
589: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
590: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
591: PetscCall(PetscLogGpuTimeBegin());
592: try {
593: *ygpu = *xgpu;
594: ViennaCLWaitForGPU();
595: } catch (std::exception const &ex) {
596: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
597: }
598: PetscCall(PetscLogGpuTimeEnd());
599: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
600: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
602: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
603: /* copy in CPU if we are on the CPU*/
604: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
605: ViennaCLWaitForGPU();
606: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
607: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
608: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
609: /* copy in CPU */
610: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
611: ViennaCLWaitForGPU();
612: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
613: /* copy in GPU */
614: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
615: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
616: PetscCall(PetscLogGpuTimeBegin());
617: try {
618: *ygpu = *xgpu;
619: ViennaCLWaitForGPU();
620: } catch (std::exception const &ex) {
621: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
622: }
623: PetscCall(PetscLogGpuTimeEnd());
624: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
625: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
626: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
627: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
628: default to copy in GPU (this is an arbitrary choice) */
629: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
630: PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
631: PetscCall(PetscLogGpuTimeBegin());
632: try {
633: *ygpu = *xgpu;
634: ViennaCLWaitForGPU();
635: } catch (std::exception const &ex) {
636: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
637: }
638: PetscCall(PetscLogGpuTimeEnd());
639: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
640: PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
641: } else {
642: PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
643: ViennaCLWaitForGPU();
644: }
645: }
646: }
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
650: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
651: {
652: ViennaCLVector *xgpu, *ygpu;
654: PetscFunctionBegin;
655: if (xin != yin && xin->map->n > 0) {
656: PetscCall(VecViennaCLGetArray(xin, &xgpu));
657: PetscCall(VecViennaCLGetArray(yin, &ygpu));
658: PetscCall(PetscLogGpuTimeBegin());
659: try {
660: viennacl::swap(*xgpu, *ygpu);
661: ViennaCLWaitForGPU();
662: } catch (std::exception const &ex) {
663: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
664: }
665: PetscCall(PetscLogGpuTimeEnd());
666: PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
667: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
668: }
669: PetscFunctionReturn(PETSC_SUCCESS);
670: }
672: // y = alpha * x + beta * y
673: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
674: {
675: PetscScalar a = alpha, b = beta;
676: const ViennaCLVector *xgpu;
677: ViennaCLVector *ygpu;
679: PetscFunctionBegin;
680: if (a == 0.0 && xin->map->n > 0) {
681: PetscCall(VecScale_SeqViennaCL(yin, beta));
682: } else if (b == 1.0 && xin->map->n > 0) {
683: PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
684: } else if (a == 1.0 && xin->map->n > 0) {
685: PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
686: } else if (b == 0.0 && xin->map->n > 0) {
687: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
688: PetscCall(VecViennaCLGetArray(yin, &ygpu));
689: PetscCall(PetscLogGpuTimeBegin());
690: try {
691: *ygpu = *xgpu * alpha;
692: ViennaCLWaitForGPU();
693: } catch (std::exception const &ex) {
694: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
695: }
696: PetscCall(PetscLogGpuTimeEnd());
697: PetscCall(PetscLogGpuFlops(xin->map->n));
698: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
699: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
700: } else if (xin->map->n > 0) {
701: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
702: PetscCall(VecViennaCLGetArray(yin, &ygpu));
703: PetscCall(PetscLogGpuTimeBegin());
704: try {
705: *ygpu = *xgpu * alpha + *ygpu * beta;
706: ViennaCLWaitForGPU();
707: } catch (std::exception const &ex) {
708: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
709: }
710: PetscCall(PetscLogGpuTimeEnd());
711: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
712: PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
713: PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
714: }
715: PetscFunctionReturn(PETSC_SUCCESS);
716: }
718: /* operation z = alpha * x + beta *y + gamma *z*/
719: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
720: {
721: PetscInt n = zin->map->n;
722: const ViennaCLVector *xgpu, *ygpu;
723: ViennaCLVector *zgpu;
725: PetscFunctionBegin;
726: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
727: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
728: PetscCall(VecViennaCLGetArray(zin, &zgpu));
729: if (alpha == 0.0 && xin->map->n > 0) {
730: PetscCall(PetscLogGpuTimeBegin());
731: try {
732: if (beta == 0.0) {
733: *zgpu = gamma * *zgpu;
734: ViennaCLWaitForGPU();
735: PetscCall(PetscLogGpuFlops(1.0 * n));
736: } else if (gamma == 0.0) {
737: *zgpu = beta * *ygpu;
738: ViennaCLWaitForGPU();
739: PetscCall(PetscLogGpuFlops(1.0 * n));
740: } else {
741: *zgpu = beta * *ygpu + gamma * *zgpu;
742: ViennaCLWaitForGPU();
743: PetscCall(PetscLogGpuFlops(3.0 * n));
744: }
745: } catch (std::exception const &ex) {
746: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
747: }
748: PetscCall(PetscLogGpuTimeEnd());
749: PetscCall(PetscLogGpuFlops(3.0 * n));
750: } else if (beta == 0.0 && xin->map->n > 0) {
751: PetscCall(PetscLogGpuTimeBegin());
752: try {
753: if (gamma == 0.0) {
754: *zgpu = alpha * *xgpu;
755: ViennaCLWaitForGPU();
756: PetscCall(PetscLogGpuFlops(1.0 * n));
757: } else {
758: *zgpu = alpha * *xgpu + gamma * *zgpu;
759: ViennaCLWaitForGPU();
760: PetscCall(PetscLogGpuFlops(3.0 * n));
761: }
762: } catch (std::exception const &ex) {
763: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
764: }
765: PetscCall(PetscLogGpuTimeEnd());
766: } else if (gamma == 0.0 && xin->map->n > 0) {
767: PetscCall(PetscLogGpuTimeBegin());
768: try {
769: *zgpu = alpha * *xgpu + beta * *ygpu;
770: ViennaCLWaitForGPU();
771: } catch (std::exception const &ex) {
772: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
773: }
774: PetscCall(PetscLogGpuTimeEnd());
775: PetscCall(PetscLogGpuFlops(3.0 * n));
776: } else if (xin->map->n > 0) {
777: PetscCall(PetscLogGpuTimeBegin());
778: try {
779: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
780: if (gamma != 1.0) *zgpu *= gamma;
781: *zgpu += alpha * *xgpu + beta * *ygpu;
782: ViennaCLWaitForGPU();
783: } catch (std::exception const &ex) {
784: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
785: }
786: PetscCall(PetscLogGpuTimeEnd());
787: PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
788: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
789: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
790: PetscCall(PetscLogGpuFlops(5.0 * n));
791: }
792: PetscFunctionReturn(PETSC_SUCCESS);
793: }
795: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
796: {
797: PetscInt n = win->map->n;
798: const ViennaCLVector *xgpu, *ygpu;
799: ViennaCLVector *wgpu;
801: PetscFunctionBegin;
802: if (xin->map->n > 0) {
803: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
804: PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
805: PetscCall(VecViennaCLGetArray(win, &wgpu));
806: PetscCall(PetscLogGpuTimeBegin());
807: try {
808: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
809: ViennaCLWaitForGPU();
810: } catch (std::exception const &ex) {
811: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
812: }
813: PetscCall(PetscLogGpuTimeEnd());
814: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
815: PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
816: PetscCall(VecViennaCLRestoreArray(win, &wgpu));
817: PetscCall(PetscLogGpuFlops(n));
818: }
819: PetscFunctionReturn(PETSC_SUCCESS);
820: }
822: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
823: {
824: PetscInt n = xin->map->n;
825: PetscBLASInt bn;
826: const ViennaCLVector *xgpu;
828: PetscFunctionBegin;
829: if (xin->map->n > 0) {
830: PetscCall(PetscBLASIntCast(n, &bn));
831: PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
832: if (type == NORM_2 || type == NORM_FROBENIUS) {
833: PetscCall(PetscLogGpuTimeBegin());
834: try {
835: *z = viennacl::linalg::norm_2(*xgpu);
836: ViennaCLWaitForGPU();
837: } catch (std::exception const &ex) {
838: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
839: }
840: PetscCall(PetscLogGpuTimeEnd());
841: PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
842: } else if (type == NORM_INFINITY) {
843: PetscCall(PetscLogGpuTimeBegin());
844: try {
845: *z = viennacl::linalg::norm_inf(*xgpu);
846: ViennaCLWaitForGPU();
847: } catch (std::exception const &ex) {
848: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
849: }
850: PetscCall(PetscLogGpuTimeEnd());
851: } else if (type == NORM_1) {
852: PetscCall(PetscLogGpuTimeBegin());
853: try {
854: *z = viennacl::linalg::norm_1(*xgpu);
855: ViennaCLWaitForGPU();
856: } catch (std::exception const &ex) {
857: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
858: }
859: PetscCall(PetscLogGpuTimeEnd());
860: PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
861: } else if (type == NORM_1_AND_2) {
862: PetscCall(PetscLogGpuTimeBegin());
863: try {
864: *z = viennacl::linalg::norm_1(*xgpu);
865: *(z + 1) = viennacl::linalg::norm_2(*xgpu);
866: ViennaCLWaitForGPU();
867: } catch (std::exception const &ex) {
868: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
869: }
870: PetscCall(PetscLogGpuTimeEnd());
871: PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
872: PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
873: }
874: PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
875: } else if (type == NORM_1_AND_2) {
876: *z = 0.0;
877: *(z + 1) = 0.0;
878: } else *z = 0.0;
879: PetscFunctionReturn(PETSC_SUCCESS);
880: }
882: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
883: {
884: PetscFunctionBegin;
885: PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
886: xin->offloadmask = PETSC_OFFLOAD_CPU;
887: PetscFunctionReturn(PETSC_SUCCESS);
888: }
890: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
891: {
892: PetscFunctionBegin;
893: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
894: PetscCall(VecViennaCLCopyFromGPU(vin));
895: PetscCall(VecResetArray_SeqViennaCL_Private(vin));
896: vin->offloadmask = PETSC_OFFLOAD_CPU;
897: PetscFunctionReturn(PETSC_SUCCESS);
898: }
900: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
901: {
902: PetscFunctionBegin;
903: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
904: PetscCall(VecViennaCLCopyFromGPU(vin));
905: PetscCall(VecPlaceArray_Seq(vin, a));
906: vin->offloadmask = PETSC_OFFLOAD_CPU;
907: PetscFunctionReturn(PETSC_SUCCESS);
908: }
910: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
911: {
912: PetscFunctionBegin;
913: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
914: PetscCall(VecViennaCLCopyFromGPU(vin));
915: PetscCall(VecReplaceArray_Seq(vin, a));
916: vin->offloadmask = PETSC_OFFLOAD_CPU;
917: PetscFunctionReturn(PETSC_SUCCESS);
918: }
920: /*@C
921: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
923: Collective
925: Input Parameters:
926: + comm - the communicator, should be PETSC_COMM_SELF
927: - n - the vector length
929: Output Parameter:
930: . V - the vector
932: Notes:
933: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
934: same type as an existing vector.
936: Level: intermediate
938: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
939: @*/
940: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
941: {
942: PetscFunctionBegin;
943: PetscCall(VecCreate(comm, v));
944: PetscCall(VecSetSizes(*v, n, n));
945: PetscCall(VecSetType(*v, VECSEQVIENNACL));
946: PetscFunctionReturn(PETSC_SUCCESS);
947: }
949: /*@C
950: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
951: where the user provides the array space to store the vector values.
953: Collective
955: Input Parameters:
956: + comm - the communicator, should be PETSC_COMM_SELF
957: . bs - the block size
958: . n - the vector length
959: - array - viennacl array where the vector elements are to be stored.
961: Output Parameter:
962: . V - the vector
964: Notes:
965: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
966: same type as an existing vector.
968: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
969: at a later stage to SET the array for storing the vector values.
971: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
972: The user should not free the array until the vector is destroyed.
974: Level: intermediate
976: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
977: `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
978: `VecCreateMPIWithArray()`
979: @*/
980: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
981: {
982: PetscMPIInt size;
984: PetscFunctionBegin;
985: PetscCall(VecCreate(comm, V));
986: PetscCall(VecSetSizes(*V, n, n));
987: PetscCall(VecSetBlockSize(*V, bs));
988: PetscCallMPI(MPI_Comm_size(comm, &size));
989: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
990: PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
991: PetscFunctionReturn(PETSC_SUCCESS);
992: }
994: /*@C
995: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
996: the user provides the array space to store the vector values.
998: Collective
1000: Input Parameters:
1001: + comm - the communicator, should be PETSC_COMM_SELF
1002: . bs - the block size
1003: . n - the vector length
1004: - cpuarray - CPU memory where the vector elements are to be stored.
1005: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1007: Output Parameter:
1008: . V - the vector
1010: Notes:
1011: If both cpuarray and viennaclvec are provided, the caller must ensure that
1012: the provided arrays have identical values.
1014: PETSc does NOT free the provided arrays when the vector is destroyed via
1015: VecDestroy(). The user should not free the array until the vector is
1016: destroyed.
1018: Level: intermediate
1020: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1021: `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1022: `VecViennaCLAllocateCheckHost()`
1023: @*/
1024: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
1025: {
1026: PetscMPIInt size;
1028: PetscFunctionBegin;
1030: PetscCallMPI(MPI_Comm_size(comm, &size));
1031: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
1033: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1034: PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));
1036: if (cpuarray && viennaclvec) {
1037: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1038: s->array = (PetscScalar *)cpuarray;
1039: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1040: } else if (cpuarray) {
1041: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1042: s->array = (PetscScalar *)cpuarray;
1043: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1044: } else if (viennaclvec) {
1045: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1046: } else {
1047: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1048: }
1050: PetscFunctionReturn(PETSC_SUCCESS);
1051: }
1053: /*@C
1054: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1055: the one provided by the user. This is useful to avoid a copy.
1057: Not Collective
1059: Input Parameters:
1060: + vec - the vector
1061: - array - the ViennaCL vector
1063: Notes:
1064: You can return to the original viennacl vector with a call to
1065: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1066: and VecPlaceArray() at the same time on the same vector.
1068: Level: intermediate
1070: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1071: `VecCUDAPlaceArray()`,
1073: @*/
1074: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1075: {
1076: PetscFunctionBegin;
1077: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1078: PetscCall(VecViennaCLCopyToGPU(vin));
1079: PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1080: ((Vec_Seq *)vin->data)->unplacedarray = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1081: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1082: vin->offloadmask = PETSC_OFFLOAD_GPU;
1083: PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1084: PetscFunctionReturn(PETSC_SUCCESS);
1085: }
1087: /*@C
1088: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1089: after the use of VecViennaCLPlaceArray().
1091: Not Collective
1093: Input Parameters:
1094: . vec - the vector
1096: Level: developer
1098: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1099: @*/
1100: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1101: {
1102: PetscFunctionBegin;
1103: PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1104: PetscCall(VecViennaCLCopyToGPU(vin));
1105: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1106: ((Vec_Seq *)vin->data)->unplacedarray = 0;
1107: vin->offloadmask = PETSC_OFFLOAD_GPU;
1108: PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1109: PetscFunctionReturn(PETSC_SUCCESS);
1110: }
1112: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1113: *
1114: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1115: */
1116: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1117: {
1118: PetscFunctionBegin;
1119: PetscCall(VecDot_SeqViennaCL(s, t, dp));
1120: PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1121: *nm *= *nm; //squared norm required
1122: PetscFunctionReturn(PETSC_SUCCESS);
1123: }
1125: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1126: {
1127: PetscFunctionBegin;
1128: PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1129: PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1130: PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist));
1131: PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist));
1132: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1133: PetscFunctionReturn(PETSC_SUCCESS);
1134: }
1136: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1137: {
1138: PetscFunctionBegin;
1139: try {
1140: if (v->spptr) {
1141: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1142: delete (Vec_ViennaCL *)v->spptr;
1143: }
1144: } catch (char *ex) {
1145: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1146: }
1147: PetscCall(VecDestroy_SeqViennaCL_Private(v));
1148: PetscFunctionReturn(PETSC_SUCCESS);
1149: }
1151: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1152: {
1153: PetscFunctionBegin;
1154: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1155: PetscCall(VecViennaCLCopyFromGPU(v));
1156: } else {
1157: PetscCall(VecViennaCLAllocateCheckHost(v));
1158: }
1159: *a = *((PetscScalar **)v->data);
1160: PetscFunctionReturn(PETSC_SUCCESS);
1161: }
1163: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1164: {
1165: PetscFunctionBegin;
1166: v->offloadmask = PETSC_OFFLOAD_CPU;
1167: PetscFunctionReturn(PETSC_SUCCESS);
1168: }
1170: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1171: {
1172: PetscFunctionBegin;
1173: PetscCall(VecViennaCLAllocateCheckHost(v));
1174: *a = *((PetscScalar **)v->data);
1175: PetscFunctionReturn(PETSC_SUCCESS);
1176: }
1178: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1179: {
1180: PetscFunctionBegin;
1181: V->boundtocpu = flg;
1182: if (flg) {
1183: PetscCall(VecViennaCLCopyFromGPU(V));
1184: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1185: V->ops->dot = VecDot_Seq;
1186: V->ops->norm = VecNorm_Seq;
1187: V->ops->tdot = VecTDot_Seq;
1188: V->ops->scale = VecScale_Seq;
1189: V->ops->copy = VecCopy_Seq;
1190: V->ops->set = VecSet_Seq;
1191: V->ops->swap = VecSwap_Seq;
1192: V->ops->axpy = VecAXPY_Seq;
1193: V->ops->axpby = VecAXPBY_Seq;
1194: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1195: V->ops->pointwisemult = VecPointwiseMult_Seq;
1196: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1197: V->ops->setrandom = VecSetRandom_Seq;
1198: V->ops->dot_local = VecDot_Seq;
1199: V->ops->tdot_local = VecTDot_Seq;
1200: V->ops->norm_local = VecNorm_Seq;
1201: V->ops->mdot_local = VecMDot_Seq;
1202: V->ops->mtdot_local = VecMTDot_Seq;
1203: V->ops->maxpy = VecMAXPY_Seq;
1204: V->ops->mdot = VecMDot_Seq;
1205: V->ops->mtdot = VecMTDot_Seq;
1206: V->ops->aypx = VecAYPX_Seq;
1207: V->ops->waxpy = VecWAXPY_Seq;
1208: V->ops->dotnorm2 = NULL;
1209: V->ops->placearray = VecPlaceArray_Seq;
1210: V->ops->replacearray = VecReplaceArray_Seq;
1211: V->ops->resetarray = VecResetArray_Seq;
1212: V->ops->duplicate = VecDuplicate_Seq;
1213: } else {
1214: V->ops->dot = VecDot_SeqViennaCL;
1215: V->ops->norm = VecNorm_SeqViennaCL;
1216: V->ops->tdot = VecTDot_SeqViennaCL;
1217: V->ops->scale = VecScale_SeqViennaCL;
1218: V->ops->copy = VecCopy_SeqViennaCL;
1219: V->ops->set = VecSet_SeqViennaCL;
1220: V->ops->swap = VecSwap_SeqViennaCL;
1221: V->ops->axpy = VecAXPY_SeqViennaCL;
1222: V->ops->axpby = VecAXPBY_SeqViennaCL;
1223: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1224: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1225: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1226: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1227: V->ops->dot_local = VecDot_SeqViennaCL;
1228: V->ops->tdot_local = VecTDot_SeqViennaCL;
1229: V->ops->norm_local = VecNorm_SeqViennaCL;
1230: V->ops->mdot_local = VecMDot_SeqViennaCL;
1231: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1232: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1233: V->ops->mdot = VecMDot_SeqViennaCL;
1234: V->ops->mtdot = VecMTDot_SeqViennaCL;
1235: V->ops->aypx = VecAYPX_SeqViennaCL;
1236: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1237: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1238: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1239: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1240: V->ops->resetarray = VecResetArray_SeqViennaCL;
1241: V->ops->destroy = VecDestroy_SeqViennaCL;
1242: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1243: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1244: V->ops->getarray = VecGetArray_SeqViennaCL;
1245: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1246: }
1247: PetscFunctionReturn(PETSC_SUCCESS);
1248: }
1250: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1251: {
1252: PetscMPIInt size;
1254: PetscFunctionBegin;
1255: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1256: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1257: PetscCall(VecCreate_Seq_Private(V, 0));
1258: PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1260: PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1261: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1263: PetscCall(VecViennaCLAllocateCheck(V));
1264: PetscCall(VecSet_SeqViennaCL(V, 0.0));
1265: PetscFunctionReturn(PETSC_SUCCESS);
1266: }
1268: /*@C
1269: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1271: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1272: invoking clReleaseContext().
1274: Input Parameter:
1275: . v - the vector
1277: Output Parameter:
1278: . ctx - pointer to the underlying CL context
1280: Level: intermediate
1282: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1283: @*/
1284: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1285: {
1286: #if !defined(PETSC_HAVE_OPENCL)
1287: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1288: #else
1290: PetscFunctionBegin;
1291: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1292: const ViennaCLVector *v_vcl;
1293: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1294: try {
1295: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1296: const cl_context ocl_ctx = vcl_ctx.handle().get();
1297: clRetainContext(ocl_ctx);
1298: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1299: } catch (std::exception const &ex) {
1300: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1301: }
1303: PetscFunctionReturn(PETSC_SUCCESS);
1304: #endif
1305: }
1307: /*@C
1308: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1309: operations of the Vec are enqueued.
1311: Caller should cast (*queue) to (const cl_command_queue). Caller is
1312: responsible for invoking clReleaseCommandQueue().
1314: Input Parameter:
1315: . v - the vector
1317: Output Parameter:
1318: . ctx - pointer to the CL command queue
1320: Level: intermediate
1322: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1323: @*/
1324: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1325: {
1326: #if !defined(PETSC_HAVE_OPENCL)
1327: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1328: #else
1329: PetscFunctionBegin;
1330: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1331: const ViennaCLVector *v_vcl;
1332: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1333: try {
1334: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1335: const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1336: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1337: clRetainCommandQueue(ocl_queue);
1338: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1339: } catch (std::exception const &ex) {
1340: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1341: }
1343: PetscFunctionReturn(PETSC_SUCCESS);
1344: #endif
1345: }
1347: /*@C
1348: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1350: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1351: invoking clReleaseMemObject().
1353: Input Parameter:
1354: . v - the vector
1356: Output Parameter:
1357: . mem - pointer to the device buffer
1359: Level: intermediate
1361: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1362: @*/
1363: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1364: {
1365: #if !defined(PETSC_HAVE_OPENCL)
1366: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1367: #else
1368: PetscFunctionBegin;
1369: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1370: const ViennaCLVector *v_vcl;
1371: PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1372: try {
1373: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1374: clRetainMemObject(ocl_mem);
1375: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1376: } catch (std::exception const &ex) {
1377: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1378: }
1379: PetscFunctionReturn(PETSC_SUCCESS);
1380: #endif
1381: }
1383: /*@C
1384: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1386: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1387: invoking clReleaseMemObject().
1389: The device pointer has to be released by calling
1390: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1391: the host will be marked as out of date. A subsequent access of the host data
1392: will thus incur a data transfer from the device to the host.
1394: Input Parameter:
1395: . v - the vector
1397: Output Parameter:
1398: . mem - pointer to the device buffer
1400: Level: intermediate
1402: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1403: @*/
1404: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1405: {
1406: #if !defined(PETSC_HAVE_OPENCL)
1407: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1408: #else
1409: PetscFunctionBegin;
1410: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1411: ViennaCLVector *v_vcl;
1412: PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1413: try {
1414: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1415: clRetainMemObject(ocl_mem);
1416: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1417: } catch (std::exception const &ex) {
1418: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1419: }
1421: PetscFunctionReturn(PETSC_SUCCESS);
1422: #endif
1423: }
1425: /*@C
1426: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1427: acquired with VecViennaCLGetCLMemWrite().
1429: This marks the host data as out of date. Subsequent access to the
1430: vector data on the host side with for instance VecGetArray() incurs a
1431: data transfer.
1433: Input Parameter:
1434: . v - the vector
1436: Level: intermediate
1438: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1439: @*/
1440: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1441: {
1442: #if !defined(PETSC_HAVE_OPENCL)
1443: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1444: #else
1445: PetscFunctionBegin;
1446: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1447: PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1449: PetscFunctionReturn(PETSC_SUCCESS);
1450: #endif
1451: }
1453: /*@C
1454: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1456: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1457: invoking clReleaseMemObject().
1459: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1460: Upon restoring the vector data the data on the host will be marked as out of
1461: date. A subsequent access of the host data will thus incur a data transfer
1462: from the device to the host.
1464: Input Parameter:
1465: . v - the vector
1467: Output Parameter:
1468: . mem - pointer to the device buffer
1470: Level: intermediate
1472: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1473: @*/
1474: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1475: {
1476: #if !defined(PETSC_HAVE_OPENCL)
1477: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1478: #else
1479: PetscFunctionBegin;
1480: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1481: ViennaCLVector *v_vcl;
1482: PetscCall(VecViennaCLGetArray(v, &v_vcl));
1483: try {
1484: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1485: clRetainMemObject(ocl_mem);
1486: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1487: } catch (std::exception const &ex) {
1488: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1489: }
1491: PetscFunctionReturn(PETSC_SUCCESS);
1492: #endif
1493: }
1495: /*@C
1496: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1497: acquired with VecViennaCLGetCLMem().
1499: This marks the host data as out of date. Subsequent access to the vector
1500: data on the host side with for instance VecGetArray() incurs a data
1501: transfer.
1503: Input Parameter:
1504: . v - the vector
1506: Level: intermediate
1508: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1509: @*/
1510: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1511: {
1512: #if !defined(PETSC_HAVE_OPENCL)
1513: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1514: #else
1515: PetscFunctionBegin;
1516: PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1517: PetscCall(VecViennaCLRestoreArray(v, NULL));
1519: PetscFunctionReturn(PETSC_SUCCESS);
1520: #endif
1521: }
1523: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1524: {
1525: Vec_ViennaCL *vecviennacl;
1526: PetscMPIInt size;
1528: PetscFunctionBegin;
1529: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1530: PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1531: PetscCall(VecCreate_Seq_Private(V, 0));
1532: PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1533: PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1534: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1536: if (array) {
1537: if (!V->spptr) V->spptr = new Vec_ViennaCL;
1538: vecviennacl = (Vec_ViennaCL *)V->spptr;
1539: vecviennacl->GPUarray_allocated = 0;
1540: vecviennacl->GPUarray = (ViennaCLVector *)array;
1541: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1542: }
1544: PetscFunctionReturn(PETSC_SUCCESS);
1545: }