Actual source code: vecviennacl.cxx

  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"

 15: #ifdef VIENNACL_WITH_OPENCL
 16:   #include "viennacl/ocl/backend.hpp"
 17: #endif

 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 20: {
 21:   PetscFunctionBegin;
 22:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 23:   *a = 0;
 24:   PetscCall(VecViennaCLCopyToGPU(v));
 25:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 26:   ViennaCLWaitForGPU();
 27:   PetscFunctionReturn(PETSC_SUCCESS);
 28: }

 30: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 31: {
 32:   PetscFunctionBegin;
 33:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 34:   v->offloadmask = PETSC_OFFLOAD_GPU;

 36:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 37:   PetscFunctionReturn(PETSC_SUCCESS);
 38: }

 40: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 41: {
 42:   PetscFunctionBegin;
 43:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 44:   *a = 0;
 45:   PetscCall(VecViennaCLCopyToGPU(v));
 46:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 47:   ViennaCLWaitForGPU();
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 52: {
 53:   PetscFunctionBegin;
 54:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 59: {
 60:   PetscFunctionBegin;
 61:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 62:   *a = 0;
 63:   PetscCall(VecViennaCLAllocateCheck(v));
 64:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 65:   ViennaCLWaitForGPU();
 66:   PetscFunctionReturn(PETSC_SUCCESS);
 67: }

 69: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 70: {
 71:   PetscFunctionBegin;
 72:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 73:   v->offloadmask = PETSC_OFFLOAD_GPU;

 75:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 76:   PetscFunctionReturn(PETSC_SUCCESS);
 77: }

 79: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 80: {
 81:   char      string[20];
 82:   PetscBool flg, flg_cuda, flg_opencl, flg_openmp;

 84:   PetscFunctionBegin;
 85:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 86:   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
 87:   if (flg) {
 88:     try {
 89:       PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
 90:       PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
 91:       PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));

 93:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
 94:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
 95: #if defined(PETSC_HAVE_CUDA)
 96:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
 97: #endif
 98: #if defined(PETSC_HAVE_OPENCL)
 99:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100: #endif
101:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102:     } catch (std::exception const &ex) {
103:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104:     }
105:   }

107: #if defined(PETSC_HAVE_OPENCL)
108:   /* ViennaCL OpenCL device type configuration */
109:   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110:   if (flg) {
111:     try {
112:       PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

115:       PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

118:       PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120:     } catch (std::exception const &ex) {
121:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122:     }
123:   }
124: #endif

126:   /* Print available backends */
127:   PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128:   if (flg) {
129:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130: #if defined(PETSC_HAVE_CUDA)
131:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132: #endif
133: #if defined(PETSC_HAVE_OPENCL)
134:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135: #endif
136: #if defined(PETSC_HAVE_OPENMP)
137:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138: #else
139:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140: #endif
141:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));

143:     /* Print selected backends */
144:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: "));
145: #if defined(PETSC_HAVE_CUDA)
146:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150: #endif
151: #if defined(PETSC_HAVE_OPENMP)
152:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153: #else
154:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155: #endif
156:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157:   }
158:   PetscFunctionReturn(PETSC_SUCCESS);
159: }

161: /*
162:     Allocates space for the vector array on the Host if it does not exist.
163:     Does NOT change the PetscViennaCLFlag for the vector
164:     Does NOT zero the ViennaCL array
165:  */
166: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167: {
168:   PetscScalar *array;
169:   Vec_Seq     *s;
170:   PetscInt     n = v->map->n;

172:   PetscFunctionBegin;
173:   s = (Vec_Seq *)v->data;
174:   PetscCall(VecViennaCLAllocateCheck(v));
175:   if (s->array == 0) {
176:     PetscCall(PetscMalloc1(n, &array));
177:     s->array           = array;
178:     s->array_allocated = array;
179:   }
180:   PetscFunctionReturn(PETSC_SUCCESS);
181: }

183: /*
184:     Allocates space for the vector array on the GPU if it does not exist.
185:     Does NOT change the PetscViennaCLFlag for the vector
186:     Does NOT zero the ViennaCL array

188:  */
189: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190: {
191:   PetscFunctionBegin;
192:   if (!v->spptr) {
193:     try {
194:       v->spptr                                       = new Vec_ViennaCL;
195:       ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196:       ((Vec_ViennaCL *)v->spptr)->GPUarray           = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;

198:     } catch (std::exception const &ex) {
199:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200:     }
201:   }
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
206: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207: {
208:   PetscFunctionBegin;
209:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210:   PetscCall(VecViennaCLAllocateCheck(v));
211:   if (v->map->n > 0) {
212:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213:       PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214:       try {
215:         ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216:         viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217:         ViennaCLWaitForGPU();
218:       } catch (std::exception const &ex) {
219:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220:       }
221:       PetscCall(PetscLogCpuToGpu((v->map->n) * sizeof(PetscScalar)));
222:       PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223:       v->offloadmask = PETSC_OFFLOAD_BOTH;
224:     }
225:   }
226:   PetscFunctionReturn(PETSC_SUCCESS);
227: }

229: /*
230:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231: */
232: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233: {
234:   PetscFunctionBegin;
235:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236:   PetscCall(VecViennaCLAllocateCheckHost(v));
237:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238:     PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239:     try {
240:       ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241:       viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242:       ViennaCLWaitForGPU();
243:     } catch (std::exception const &ex) {
244:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245:     }
246:     PetscCall(PetscLogGpuToCpu((v->map->n) * sizeof(PetscScalar)));
247:     PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248:     v->offloadmask = PETSC_OFFLOAD_BOTH;
249:   }
250:   PetscFunctionReturn(PETSC_SUCCESS);
251: }

253: /* Copy on CPU */
254: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255: {
256:   PetscScalar       *ya;
257:   const PetscScalar *xa;

259:   PetscFunctionBegin;
260:   PetscCall(VecViennaCLAllocateCheckHost(xin));
261:   PetscCall(VecViennaCLAllocateCheckHost(yin));
262:   if (xin != yin) {
263:     PetscCall(VecGetArrayRead(xin, &xa));
264:     PetscCall(VecGetArray(yin, &ya));
265:     PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266:     PetscCall(VecRestoreArrayRead(xin, &xa));
267:     PetscCall(VecRestoreArray(yin, &ya));
268:   }
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }

272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273: {
274:   PetscInt     n = xin->map->n, i;
275:   PetscScalar *xx;

277:   PetscFunctionBegin;
278:   PetscCall(VecGetArray(xin, &xx));
279:   for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280:   PetscCall(VecRestoreArray(xin, &xx));
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285: {
286:   Vec_Seq *vs = (Vec_Seq *)v->data;

288:   PetscFunctionBegin;
289:   PetscCall(PetscObjectSAWsViewOff(v));
290: #if defined(PETSC_USE_LOG)
291:   PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
292: #endif
293:   if (vs->array_allocated) PetscCall(PetscFree(vs->array_allocated));
294:   PetscCall(PetscFree(vs));
295:   PetscFunctionReturn(PETSC_SUCCESS);
296: }

298: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
299: {
300:   Vec_Seq *v = (Vec_Seq *)vin->data;

302:   PetscFunctionBegin;
303:   v->array         = v->unplacedarray;
304:   v->unplacedarray = 0;
305:   PetscFunctionReturn(PETSC_SUCCESS);
306: }

308: /*MC
309:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

311:    Options Database Keys:
312: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

314:   Level: beginner

316: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
317: M*/

319: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
320: {
321:   const ViennaCLVector *xgpu;
322:   ViennaCLVector       *ygpu;

324:   PetscFunctionBegin;
325:   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
326:   PetscCall(VecViennaCLGetArray(yin, &ygpu));
327:   PetscCall(PetscLogGpuTimeBegin());
328:   try {
329:     if (alpha != 0.0 && xin->map->n > 0) {
330:       *ygpu = *xgpu + alpha * *ygpu;
331:       PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
332:     } else {
333:       *ygpu = *xgpu;
334:     }
335:     ViennaCLWaitForGPU();
336:   } catch (std::exception const &ex) {
337:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
338:   }
339:   PetscCall(PetscLogGpuTimeEnd());
340:   PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
341:   PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
342:   PetscFunctionReturn(PETSC_SUCCESS);
343: }

345: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
346: {
347:   const ViennaCLVector *xgpu;
348:   ViennaCLVector       *ygpu;

350:   PetscFunctionBegin;
351:   if (alpha != 0.0 && xin->map->n > 0) {
352:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
353:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
354:     PetscCall(PetscLogGpuTimeBegin());
355:     try {
356:       *ygpu += alpha * *xgpu;
357:       ViennaCLWaitForGPU();
358:     } catch (std::exception const &ex) {
359:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
360:     }
361:     PetscCall(PetscLogGpuTimeEnd());
362:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
363:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
364:     PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
365:   }
366:   PetscFunctionReturn(PETSC_SUCCESS);
367: }

369: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
370: {
371:   const ViennaCLVector *xgpu, *ygpu;
372:   ViennaCLVector       *wgpu;

374:   PetscFunctionBegin;
375:   if (xin->map->n > 0) {
376:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
377:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
378:     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
379:     PetscCall(PetscLogGpuTimeBegin());
380:     try {
381:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
382:       ViennaCLWaitForGPU();
383:     } catch (std::exception const &ex) {
384:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
385:     }
386:     PetscCall(PetscLogGpuTimeEnd());
387:     PetscCall(PetscLogGpuFlops(win->map->n));
388:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
389:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
390:     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
391:   }
392:   PetscFunctionReturn(PETSC_SUCCESS);
393: }

395: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
396: {
397:   const ViennaCLVector *xgpu, *ygpu;
398:   ViennaCLVector       *wgpu;

400:   PetscFunctionBegin;
401:   if (alpha == 0.0 && xin->map->n > 0) {
402:     PetscCall(VecCopy_SeqViennaCL(yin, win));
403:   } else {
404:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
405:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
406:     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
407:     PetscCall(PetscLogGpuTimeBegin());
408:     if (alpha == 1.0) {
409:       try {
410:         *wgpu = *ygpu + *xgpu;
411:       } catch (std::exception const &ex) {
412:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
413:       }
414:       PetscCall(PetscLogGpuFlops(win->map->n));
415:     } else if (alpha == -1.0) {
416:       try {
417:         *wgpu = *ygpu - *xgpu;
418:       } catch (std::exception const &ex) {
419:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
420:       }
421:       PetscCall(PetscLogGpuFlops(win->map->n));
422:     } else {
423:       try {
424:         *wgpu = *ygpu + alpha * *xgpu;
425:       } catch (std::exception const &ex) {
426:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
427:       }
428:       PetscCall(PetscLogGpuFlops(2 * win->map->n));
429:     }
430:     ViennaCLWaitForGPU();
431:     PetscCall(PetscLogGpuTimeEnd());
432:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
433:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
434:     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
435:   }
436:   PetscFunctionReturn(PETSC_SUCCESS);
437: }

439: /*
440:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
441:  *
442:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
443:  * hence there is an iterated application of these until the final result is obtained
444:  */
445: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
446: {
447:   PetscInt j;

449:   PetscFunctionBegin;
450:   for (j = 0; j < nv; ++j) {
451:     if (j + 1 < nv) {
452:       PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
453:       ++j;
454:     } else {
455:       PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
456:     }
457:   }
458:   ViennaCLWaitForGPU();
459:   PetscFunctionReturn(PETSC_SUCCESS);
460: }

462: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
463: {
464:   const ViennaCLVector *xgpu, *ygpu;

466:   PetscFunctionBegin;
467:   if (xin->map->n > 0) {
468:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
469:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
470:     PetscCall(PetscLogGpuTimeBegin());
471:     try {
472:       *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
473:     } catch (std::exception const &ex) {
474:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
475:     }
476:     ViennaCLWaitForGPU();
477:     PetscCall(PetscLogGpuTimeEnd());
478:     if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
479:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
480:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
481:   } else *z = 0.0;
482:   PetscFunctionReturn(PETSC_SUCCESS);
483: }

485: /*
486:  * Operation z[j] = dot(x, y[j])
487:  *
488:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
489:  */
490: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
491: {
492:   PetscInt                                                n = xin->map->n, i;
493:   const ViennaCLVector                                   *xgpu, *ygpu;
494:   Vec                                                    *yyin = (Vec *)yin;
495:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

497:   PetscFunctionBegin;
498:   if (xin->map->n > 0) {
499:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
500:     for (i = 0; i < nv; i++) {
501:       PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
502:       ygpu_array[i] = ygpu;
503:     }
504:     PetscCall(PetscLogGpuTimeBegin());
505:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
506:     ViennaCLVector                      result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
507:     viennacl::copy(result.begin(), result.end(), z);
508:     for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
509:     ViennaCLWaitForGPU();
510:     PetscCall(PetscLogGpuTimeEnd());
511:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
512:     PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
513:   } else {
514:     for (i = 0; i < nv; i++) z[i] = 0.0;
515:   }
516:   PetscFunctionReturn(PETSC_SUCCESS);
517: }

519: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
520: {
521:   PetscFunctionBegin;
522:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
523:   PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
524:   ViennaCLWaitForGPU();
525:   PetscFunctionReturn(PETSC_SUCCESS);
526: }

528: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
529: {
530:   ViennaCLVector *xgpu;

532:   PetscFunctionBegin;
533:   if (xin->map->n > 0) {
534:     PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
535:     PetscCall(PetscLogGpuTimeBegin());
536:     try {
537:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
538:       ViennaCLWaitForGPU();
539:     } catch (std::exception const &ex) {
540:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
541:     }
542:     PetscCall(PetscLogGpuTimeEnd());
543:     PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
544:   }
545:   PetscFunctionReturn(PETSC_SUCCESS);
546: }

548: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
549: {
550:   ViennaCLVector *xgpu;

552:   PetscFunctionBegin;
553:   if (alpha == 0.0 && xin->map->n > 0) {
554:     PetscCall(VecSet_SeqViennaCL(xin, alpha));
555:     PetscCall(PetscLogGpuFlops(xin->map->n));
556:   } else if (alpha != 1.0 && xin->map->n > 0) {
557:     PetscCall(VecViennaCLGetArray(xin, &xgpu));
558:     PetscCall(PetscLogGpuTimeBegin());
559:     try {
560:       *xgpu *= alpha;
561:       ViennaCLWaitForGPU();
562:     } catch (std::exception const &ex) {
563:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
564:     }
565:     PetscCall(PetscLogGpuTimeEnd());
566:     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
567:     PetscCall(PetscLogGpuFlops(xin->map->n));
568:   }
569:   PetscFunctionReturn(PETSC_SUCCESS);
570: }

572: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
573: {
574:   PetscFunctionBegin;
575:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
576:   PetscCall(VecDot_SeqViennaCL(xin, yin, z));
577:   ViennaCLWaitForGPU();
578:   PetscFunctionReturn(PETSC_SUCCESS);
579: }

581: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
582: {
583:   const ViennaCLVector *xgpu;
584:   ViennaCLVector       *ygpu;

586:   PetscFunctionBegin;
587:   if (xin != yin && xin->map->n > 0) {
588:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
589:       PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
590:       PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
591:       PetscCall(PetscLogGpuTimeBegin());
592:       try {
593:         *ygpu = *xgpu;
594:         ViennaCLWaitForGPU();
595:       } catch (std::exception const &ex) {
596:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
597:       }
598:       PetscCall(PetscLogGpuTimeEnd());
599:       PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
600:       PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));

602:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
603:       /* copy in CPU if we are on the CPU*/
604:       PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
605:       ViennaCLWaitForGPU();
606:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
607:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
608:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
609:         /* copy in CPU */
610:         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
611:         ViennaCLWaitForGPU();
612:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
613:         /* copy in GPU */
614:         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
615:         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
616:         PetscCall(PetscLogGpuTimeBegin());
617:         try {
618:           *ygpu = *xgpu;
619:           ViennaCLWaitForGPU();
620:         } catch (std::exception const &ex) {
621:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
622:         }
623:         PetscCall(PetscLogGpuTimeEnd());
624:         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
625:         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
626:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
627:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
628:            default to copy in GPU (this is an arbitrary choice) */
629:         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
630:         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
631:         PetscCall(PetscLogGpuTimeBegin());
632:         try {
633:           *ygpu = *xgpu;
634:           ViennaCLWaitForGPU();
635:         } catch (std::exception const &ex) {
636:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
637:         }
638:         PetscCall(PetscLogGpuTimeEnd());
639:         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
640:         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
641:       } else {
642:         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
643:         ViennaCLWaitForGPU();
644:       }
645:     }
646:   }
647:   PetscFunctionReturn(PETSC_SUCCESS);
648: }

650: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
651: {
652:   ViennaCLVector *xgpu, *ygpu;

654:   PetscFunctionBegin;
655:   if (xin != yin && xin->map->n > 0) {
656:     PetscCall(VecViennaCLGetArray(xin, &xgpu));
657:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
658:     PetscCall(PetscLogGpuTimeBegin());
659:     try {
660:       viennacl::swap(*xgpu, *ygpu);
661:       ViennaCLWaitForGPU();
662:     } catch (std::exception const &ex) {
663:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
664:     }
665:     PetscCall(PetscLogGpuTimeEnd());
666:     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
667:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
668:   }
669:   PetscFunctionReturn(PETSC_SUCCESS);
670: }

672: // y = alpha * x + beta * y
673: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
674: {
675:   PetscScalar           a = alpha, b = beta;
676:   const ViennaCLVector *xgpu;
677:   ViennaCLVector       *ygpu;

679:   PetscFunctionBegin;
680:   if (a == 0.0 && xin->map->n > 0) {
681:     PetscCall(VecScale_SeqViennaCL(yin, beta));
682:   } else if (b == 1.0 && xin->map->n > 0) {
683:     PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
684:   } else if (a == 1.0 && xin->map->n > 0) {
685:     PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
686:   } else if (b == 0.0 && xin->map->n > 0) {
687:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
688:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
689:     PetscCall(PetscLogGpuTimeBegin());
690:     try {
691:       *ygpu = *xgpu * alpha;
692:       ViennaCLWaitForGPU();
693:     } catch (std::exception const &ex) {
694:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
695:     }
696:     PetscCall(PetscLogGpuTimeEnd());
697:     PetscCall(PetscLogGpuFlops(xin->map->n));
698:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
699:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
700:   } else if (xin->map->n > 0) {
701:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
702:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
703:     PetscCall(PetscLogGpuTimeBegin());
704:     try {
705:       *ygpu = *xgpu * alpha + *ygpu * beta;
706:       ViennaCLWaitForGPU();
707:     } catch (std::exception const &ex) {
708:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
709:     }
710:     PetscCall(PetscLogGpuTimeEnd());
711:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
712:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
713:     PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
714:   }
715:   PetscFunctionReturn(PETSC_SUCCESS);
716: }

718: /* operation  z = alpha * x + beta *y + gamma *z*/
719: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
720: {
721:   PetscInt              n = zin->map->n;
722:   const ViennaCLVector *xgpu, *ygpu;
723:   ViennaCLVector       *zgpu;

725:   PetscFunctionBegin;
726:   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
727:   PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
728:   PetscCall(VecViennaCLGetArray(zin, &zgpu));
729:   if (alpha == 0.0 && xin->map->n > 0) {
730:     PetscCall(PetscLogGpuTimeBegin());
731:     try {
732:       if (beta == 0.0) {
733:         *zgpu = gamma * *zgpu;
734:         ViennaCLWaitForGPU();
735:         PetscCall(PetscLogGpuFlops(1.0 * n));
736:       } else if (gamma == 0.0) {
737:         *zgpu = beta * *ygpu;
738:         ViennaCLWaitForGPU();
739:         PetscCall(PetscLogGpuFlops(1.0 * n));
740:       } else {
741:         *zgpu = beta * *ygpu + gamma * *zgpu;
742:         ViennaCLWaitForGPU();
743:         PetscCall(PetscLogGpuFlops(3.0 * n));
744:       }
745:     } catch (std::exception const &ex) {
746:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
747:     }
748:     PetscCall(PetscLogGpuTimeEnd());
749:     PetscCall(PetscLogGpuFlops(3.0 * n));
750:   } else if (beta == 0.0 && xin->map->n > 0) {
751:     PetscCall(PetscLogGpuTimeBegin());
752:     try {
753:       if (gamma == 0.0) {
754:         *zgpu = alpha * *xgpu;
755:         ViennaCLWaitForGPU();
756:         PetscCall(PetscLogGpuFlops(1.0 * n));
757:       } else {
758:         *zgpu = alpha * *xgpu + gamma * *zgpu;
759:         ViennaCLWaitForGPU();
760:         PetscCall(PetscLogGpuFlops(3.0 * n));
761:       }
762:     } catch (std::exception const &ex) {
763:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
764:     }
765:     PetscCall(PetscLogGpuTimeEnd());
766:   } else if (gamma == 0.0 && xin->map->n > 0) {
767:     PetscCall(PetscLogGpuTimeBegin());
768:     try {
769:       *zgpu = alpha * *xgpu + beta * *ygpu;
770:       ViennaCLWaitForGPU();
771:     } catch (std::exception const &ex) {
772:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
773:     }
774:     PetscCall(PetscLogGpuTimeEnd());
775:     PetscCall(PetscLogGpuFlops(3.0 * n));
776:   } else if (xin->map->n > 0) {
777:     PetscCall(PetscLogGpuTimeBegin());
778:     try {
779:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
780:       if (gamma != 1.0) *zgpu *= gamma;
781:       *zgpu += alpha * *xgpu + beta * *ygpu;
782:       ViennaCLWaitForGPU();
783:     } catch (std::exception const &ex) {
784:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
785:     }
786:     PetscCall(PetscLogGpuTimeEnd());
787:     PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
788:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
789:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
790:     PetscCall(PetscLogGpuFlops(5.0 * n));
791:   }
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
796: {
797:   PetscInt              n = win->map->n;
798:   const ViennaCLVector *xgpu, *ygpu;
799:   ViennaCLVector       *wgpu;

801:   PetscFunctionBegin;
802:   if (xin->map->n > 0) {
803:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
804:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
805:     PetscCall(VecViennaCLGetArray(win, &wgpu));
806:     PetscCall(PetscLogGpuTimeBegin());
807:     try {
808:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
809:       ViennaCLWaitForGPU();
810:     } catch (std::exception const &ex) {
811:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
812:     }
813:     PetscCall(PetscLogGpuTimeEnd());
814:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
815:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
816:     PetscCall(VecViennaCLRestoreArray(win, &wgpu));
817:     PetscCall(PetscLogGpuFlops(n));
818:   }
819:   PetscFunctionReturn(PETSC_SUCCESS);
820: }

822: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
823: {
824:   PetscInt              n = xin->map->n;
825:   PetscBLASInt          bn;
826:   const ViennaCLVector *xgpu;

828:   PetscFunctionBegin;
829:   if (xin->map->n > 0) {
830:     PetscCall(PetscBLASIntCast(n, &bn));
831:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
832:     if (type == NORM_2 || type == NORM_FROBENIUS) {
833:       PetscCall(PetscLogGpuTimeBegin());
834:       try {
835:         *z = viennacl::linalg::norm_2(*xgpu);
836:         ViennaCLWaitForGPU();
837:       } catch (std::exception const &ex) {
838:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
839:       }
840:       PetscCall(PetscLogGpuTimeEnd());
841:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
842:     } else if (type == NORM_INFINITY) {
843:       PetscCall(PetscLogGpuTimeBegin());
844:       try {
845:         *z = viennacl::linalg::norm_inf(*xgpu);
846:         ViennaCLWaitForGPU();
847:       } catch (std::exception const &ex) {
848:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
849:       }
850:       PetscCall(PetscLogGpuTimeEnd());
851:     } else if (type == NORM_1) {
852:       PetscCall(PetscLogGpuTimeBegin());
853:       try {
854:         *z = viennacl::linalg::norm_1(*xgpu);
855:         ViennaCLWaitForGPU();
856:       } catch (std::exception const &ex) {
857:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
858:       }
859:       PetscCall(PetscLogGpuTimeEnd());
860:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
861:     } else if (type == NORM_1_AND_2) {
862:       PetscCall(PetscLogGpuTimeBegin());
863:       try {
864:         *z       = viennacl::linalg::norm_1(*xgpu);
865:         *(z + 1) = viennacl::linalg::norm_2(*xgpu);
866:         ViennaCLWaitForGPU();
867:       } catch (std::exception const &ex) {
868:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
869:       }
870:       PetscCall(PetscLogGpuTimeEnd());
871:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
872:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
873:     }
874:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
875:   } else if (type == NORM_1_AND_2) {
876:     *z       = 0.0;
877:     *(z + 1) = 0.0;
878:   } else *z = 0.0;
879:   PetscFunctionReturn(PETSC_SUCCESS);
880: }

882: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
883: {
884:   PetscFunctionBegin;
885:   PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
886:   xin->offloadmask = PETSC_OFFLOAD_CPU;
887:   PetscFunctionReturn(PETSC_SUCCESS);
888: }

890: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
891: {
892:   PetscFunctionBegin;
893:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
894:   PetscCall(VecViennaCLCopyFromGPU(vin));
895:   PetscCall(VecResetArray_SeqViennaCL_Private(vin));
896:   vin->offloadmask = PETSC_OFFLOAD_CPU;
897:   PetscFunctionReturn(PETSC_SUCCESS);
898: }

900: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
901: {
902:   PetscFunctionBegin;
903:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
904:   PetscCall(VecViennaCLCopyFromGPU(vin));
905:   PetscCall(VecPlaceArray_Seq(vin, a));
906:   vin->offloadmask = PETSC_OFFLOAD_CPU;
907:   PetscFunctionReturn(PETSC_SUCCESS);
908: }

910: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
911: {
912:   PetscFunctionBegin;
913:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
914:   PetscCall(VecViennaCLCopyFromGPU(vin));
915:   PetscCall(VecReplaceArray_Seq(vin, a));
916:   vin->offloadmask = PETSC_OFFLOAD_CPU;
917:   PetscFunctionReturn(PETSC_SUCCESS);
918: }

920: /*@C
921:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

923:    Collective

925:    Input Parameters:
926: +  comm - the communicator, should be PETSC_COMM_SELF
927: -  n - the vector length

929:    Output Parameter:
930: .  V - the vector

932:    Notes:
933:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
934:    same type as an existing vector.

936:    Level: intermediate

938: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
939: @*/
940: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
941: {
942:   PetscFunctionBegin;
943:   PetscCall(VecCreate(comm, v));
944:   PetscCall(VecSetSizes(*v, n, n));
945:   PetscCall(VecSetType(*v, VECSEQVIENNACL));
946:   PetscFunctionReturn(PETSC_SUCCESS);
947: }

949: /*@C
950:    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
951:    where the user provides the array space to store the vector values.

953:    Collective

955:    Input Parameters:
956: +  comm - the communicator, should be PETSC_COMM_SELF
957: .  bs - the block size
958: .  n - the vector length
959: -  array - viennacl array where the vector elements are to be stored.

961:    Output Parameter:
962: .  V - the vector

964:    Notes:
965:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
966:    same type as an existing vector.

968:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
969:    at a later stage to SET the array for storing the vector values.

971:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
972:    The user should not free the array until the vector is destroyed.

974:    Level: intermediate

976: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
977:           `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
978:           `VecCreateMPIWithArray()`
979: @*/
980: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
981: {
982:   PetscMPIInt size;

984:   PetscFunctionBegin;
985:   PetscCall(VecCreate(comm, V));
986:   PetscCall(VecSetSizes(*V, n, n));
987:   PetscCall(VecSetBlockSize(*V, bs));
988:   PetscCallMPI(MPI_Comm_size(comm, &size));
989:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
990:   PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
991:   PetscFunctionReturn(PETSC_SUCCESS);
992: }

994: /*@C
995:    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
996:    the user provides the array space to store the vector values.

998:    Collective

1000:    Input Parameters:
1001: +  comm - the communicator, should be PETSC_COMM_SELF
1002: .  bs - the block size
1003: .  n - the vector length
1004: -  cpuarray - CPU memory where the vector elements are to be stored.
1005: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

1007:    Output Parameter:
1008: .  V - the vector

1010:    Notes:
1011:    If both cpuarray and viennaclvec are provided, the caller must ensure that
1012:    the provided arrays have identical values.

1014:    PETSc does NOT free the provided arrays when the vector is destroyed via
1015:    VecDestroy(). The user should not free the array until the vector is
1016:    destroyed.

1018:    Level: intermediate

1020: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1021:           `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1022:           `VecViennaCLAllocateCheckHost()`
1023: @*/
1024: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
1025: {
1026:   PetscMPIInt size;

1028:   PetscFunctionBegin;

1030:   PetscCallMPI(MPI_Comm_size(comm, &size));
1031:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");

1033:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1034:   PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));

1036:   if (cpuarray && viennaclvec) {
1037:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1038:     s->array          = (PetscScalar *)cpuarray;
1039:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1040:   } else if (cpuarray) {
1041:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1042:     s->array          = (PetscScalar *)cpuarray;
1043:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1044:   } else if (viennaclvec) {
1045:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1046:   } else {
1047:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1048:   }

1050:   PetscFunctionReturn(PETSC_SUCCESS);
1051: }

1053: /*@C
1054:    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1055:    the one provided by the user. This is useful to avoid a copy.

1057:    Not Collective

1059:    Input Parameters:
1060: +  vec - the vector
1061: -  array - the ViennaCL vector

1063:    Notes:
1064:    You can return to the original viennacl vector with a call to
1065:    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1066:    and VecPlaceArray() at the same time on the same vector.

1068:    Level: intermediate

1070: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1071:           `VecCUDAPlaceArray()`,

1073: @*/
1074: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1075: {
1076:   PetscFunctionBegin;
1077:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1078:   PetscCall(VecViennaCLCopyToGPU(vin));
1079:   PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1080:   ((Vec_Seq *)vin->data)->unplacedarray  = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1081:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1082:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1083:   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1084:   PetscFunctionReturn(PETSC_SUCCESS);
1085: }

1087: /*@C
1088:    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1089:    after the use of VecViennaCLPlaceArray().

1091:    Not Collective

1093:    Input Parameters:
1094: .  vec - the vector

1096:    Level: developer

1098: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1099: @*/
1100: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1101: {
1102:   PetscFunctionBegin;
1103:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1104:   PetscCall(VecViennaCLCopyToGPU(vin));
1105:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1106:   ((Vec_Seq *)vin->data)->unplacedarray  = 0;
1107:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1108:   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1109:   PetscFunctionReturn(PETSC_SUCCESS);
1110: }

1112: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1113:  *
1114:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1115:  */
1116: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1117: {
1118:   PetscFunctionBegin;
1119:   PetscCall(VecDot_SeqViennaCL(s, t, dp));
1120:   PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1121:   *nm *= *nm; //squared norm required
1122:   PetscFunctionReturn(PETSC_SUCCESS);
1123: }

1125: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1126: {
1127:   PetscFunctionBegin;
1128:   PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1129:   PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1130:   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist));
1131:   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist));
1132:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1133:   PetscFunctionReturn(PETSC_SUCCESS);
1134: }

1136: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1137: {
1138:   PetscFunctionBegin;
1139:   try {
1140:     if (v->spptr) {
1141:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1142:       delete (Vec_ViennaCL *)v->spptr;
1143:     }
1144:   } catch (char *ex) {
1145:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1146:   }
1147:   PetscCall(VecDestroy_SeqViennaCL_Private(v));
1148:   PetscFunctionReturn(PETSC_SUCCESS);
1149: }

1151: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1152: {
1153:   PetscFunctionBegin;
1154:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1155:     PetscCall(VecViennaCLCopyFromGPU(v));
1156:   } else {
1157:     PetscCall(VecViennaCLAllocateCheckHost(v));
1158:   }
1159:   *a = *((PetscScalar **)v->data);
1160:   PetscFunctionReturn(PETSC_SUCCESS);
1161: }

1163: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1164: {
1165:   PetscFunctionBegin;
1166:   v->offloadmask = PETSC_OFFLOAD_CPU;
1167:   PetscFunctionReturn(PETSC_SUCCESS);
1168: }

1170: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1171: {
1172:   PetscFunctionBegin;
1173:   PetscCall(VecViennaCLAllocateCheckHost(v));
1174:   *a = *((PetscScalar **)v->data);
1175:   PetscFunctionReturn(PETSC_SUCCESS);
1176: }

1178: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1179: {
1180:   PetscFunctionBegin;
1181:   V->boundtocpu = flg;
1182:   if (flg) {
1183:     PetscCall(VecViennaCLCopyFromGPU(V));
1184:     V->offloadmask          = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1185:     V->ops->dot             = VecDot_Seq;
1186:     V->ops->norm            = VecNorm_Seq;
1187:     V->ops->tdot            = VecTDot_Seq;
1188:     V->ops->scale           = VecScale_Seq;
1189:     V->ops->copy            = VecCopy_Seq;
1190:     V->ops->set             = VecSet_Seq;
1191:     V->ops->swap            = VecSwap_Seq;
1192:     V->ops->axpy            = VecAXPY_Seq;
1193:     V->ops->axpby           = VecAXPBY_Seq;
1194:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1195:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1196:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1197:     V->ops->setrandom       = VecSetRandom_Seq;
1198:     V->ops->dot_local       = VecDot_Seq;
1199:     V->ops->tdot_local      = VecTDot_Seq;
1200:     V->ops->norm_local      = VecNorm_Seq;
1201:     V->ops->mdot_local      = VecMDot_Seq;
1202:     V->ops->mtdot_local     = VecMTDot_Seq;
1203:     V->ops->maxpy           = VecMAXPY_Seq;
1204:     V->ops->mdot            = VecMDot_Seq;
1205:     V->ops->mtdot           = VecMTDot_Seq;
1206:     V->ops->aypx            = VecAYPX_Seq;
1207:     V->ops->waxpy           = VecWAXPY_Seq;
1208:     V->ops->dotnorm2        = NULL;
1209:     V->ops->placearray      = VecPlaceArray_Seq;
1210:     V->ops->replacearray    = VecReplaceArray_Seq;
1211:     V->ops->resetarray      = VecResetArray_Seq;
1212:     V->ops->duplicate       = VecDuplicate_Seq;
1213:   } else {
1214:     V->ops->dot             = VecDot_SeqViennaCL;
1215:     V->ops->norm            = VecNorm_SeqViennaCL;
1216:     V->ops->tdot            = VecTDot_SeqViennaCL;
1217:     V->ops->scale           = VecScale_SeqViennaCL;
1218:     V->ops->copy            = VecCopy_SeqViennaCL;
1219:     V->ops->set             = VecSet_SeqViennaCL;
1220:     V->ops->swap            = VecSwap_SeqViennaCL;
1221:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1222:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1223:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1224:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1225:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1226:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1227:     V->ops->dot_local       = VecDot_SeqViennaCL;
1228:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1229:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1230:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1231:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1232:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1233:     V->ops->mdot            = VecMDot_SeqViennaCL;
1234:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1235:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1236:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1237:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1238:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1239:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1240:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1241:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1242:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1243:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1244:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1245:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1246:   }
1247:   PetscFunctionReturn(PETSC_SUCCESS);
1248: }

1250: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1251: {
1252:   PetscMPIInt size;

1254:   PetscFunctionBegin;
1255:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1256:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1257:   PetscCall(VecCreate_Seq_Private(V, 0));
1258:   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));

1260:   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1261:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1263:   PetscCall(VecViennaCLAllocateCheck(V));
1264:   PetscCall(VecSet_SeqViennaCL(V, 0.0));
1265:   PetscFunctionReturn(PETSC_SUCCESS);
1266: }

1268: /*@C
1269:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1271:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1272:   invoking clReleaseContext().

1274:   Input Parameter:
1275: .  v    - the vector

1277:   Output Parameter:
1278: .  ctx - pointer to the underlying CL context

1280:   Level: intermediate

1282: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1283: @*/
1284: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1285: {
1286: #if !defined(PETSC_HAVE_OPENCL)
1287:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1288: #else

1290:   PetscFunctionBegin;
1291:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1292:   const ViennaCLVector *v_vcl;
1293:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1294:   try {
1295:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1296:     const cl_context       ocl_ctx = vcl_ctx.handle().get();
1297:     clRetainContext(ocl_ctx);
1298:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1299:   } catch (std::exception const &ex) {
1300:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1301:   }

1303:   PetscFunctionReturn(PETSC_SUCCESS);
1304: #endif
1305: }

1307: /*@C
1308:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1309:   operations of the Vec are enqueued.

1311:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1312:   responsible for invoking clReleaseCommandQueue().

1314:   Input Parameter:
1315: .  v    - the vector

1317:   Output Parameter:
1318: .  ctx - pointer to the CL command queue

1320:   Level: intermediate

1322: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1323: @*/
1324: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1325: {
1326: #if !defined(PETSC_HAVE_OPENCL)
1327:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1328: #else
1329:   PetscFunctionBegin;
1330:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1331:   const ViennaCLVector *v_vcl;
1332:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1333:   try {
1334:     viennacl::ocl::context              vcl_ctx   = v_vcl->handle().opencl_handle().context();
1335:     const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1336:     const cl_command_queue              ocl_queue = vcl_queue.handle().get();
1337:     clRetainCommandQueue(ocl_queue);
1338:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1339:   } catch (std::exception const &ex) {
1340:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1341:   }

1343:   PetscFunctionReturn(PETSC_SUCCESS);
1344: #endif
1345: }

1347: /*@C
1348:   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.

1350:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1351:   invoking clReleaseMemObject().

1353:   Input Parameter:
1354: .  v    - the vector

1356:   Output Parameter:
1357: .  mem - pointer to the device buffer

1359:   Level: intermediate

1361: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1362: @*/
1363: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1364: {
1365: #if !defined(PETSC_HAVE_OPENCL)
1366:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1367: #else
1368:   PetscFunctionBegin;
1369:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1370:   const ViennaCLVector *v_vcl;
1371:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1372:   try {
1373:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1374:     clRetainMemObject(ocl_mem);
1375:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1376:   } catch (std::exception const &ex) {
1377:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1378:   }
1379:   PetscFunctionReturn(PETSC_SUCCESS);
1380: #endif
1381: }

1383: /*@C
1384:   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.

1386:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1387:   invoking clReleaseMemObject().

1389:   The device pointer has to be released by calling
1390:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1391:   the host will be marked as out of date.  A subsequent access of the host data
1392:   will thus incur a data transfer from the device to the host.

1394:   Input Parameter:
1395: .  v    - the vector

1397:   Output Parameter:
1398: .  mem - pointer to the device buffer

1400:   Level: intermediate

1402: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1403: @*/
1404: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1405: {
1406: #if !defined(PETSC_HAVE_OPENCL)
1407:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1408: #else
1409:   PetscFunctionBegin;
1410:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1411:   ViennaCLVector *v_vcl;
1412:   PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1413:   try {
1414:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1415:     clRetainMemObject(ocl_mem);
1416:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1417:   } catch (std::exception const &ex) {
1418:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1419:   }

1421:   PetscFunctionReturn(PETSC_SUCCESS);
1422: #endif
1423: }

1425: /*@C
1426:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1427:   acquired with VecViennaCLGetCLMemWrite().

1429:    This marks the host data as out of date.  Subsequent access to the
1430:    vector data on the host side with for instance VecGetArray() incurs a
1431:    data transfer.

1433:   Input Parameter:
1434: .  v    - the vector

1436:   Level: intermediate

1438: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1439: @*/
1440: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1441: {
1442: #if !defined(PETSC_HAVE_OPENCL)
1443:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1444: #else
1445:   PetscFunctionBegin;
1446:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1447:   PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));

1449:   PetscFunctionReturn(PETSC_SUCCESS);
1450: #endif
1451: }

1453: /*@C
1454:   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.

1456:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1457:   invoking clReleaseMemObject().

1459:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1460:   Upon restoring the vector data the data on the host will be marked as out of
1461:   date.  A subsequent access of the host data will thus incur a data transfer
1462:   from the device to the host.

1464:   Input Parameter:
1465: .  v    - the vector

1467:   Output Parameter:
1468: .  mem - pointer to the device buffer

1470:   Level: intermediate

1472: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1473: @*/
1474: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1475: {
1476: #if !defined(PETSC_HAVE_OPENCL)
1477:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1478: #else
1479:   PetscFunctionBegin;
1480:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1481:   ViennaCLVector *v_vcl;
1482:   PetscCall(VecViennaCLGetArray(v, &v_vcl));
1483:   try {
1484:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1485:     clRetainMemObject(ocl_mem);
1486:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1487:   } catch (std::exception const &ex) {
1488:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1489:   }

1491:   PetscFunctionReturn(PETSC_SUCCESS);
1492: #endif
1493: }

1495: /*@C
1496:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1497:   acquired with VecViennaCLGetCLMem().

1499:    This marks the host data as out of date. Subsequent access to the vector
1500:    data on the host side with for instance VecGetArray() incurs a data
1501:    transfer.

1503:   Input Parameter:
1504: .  v    - the vector

1506:   Level: intermediate

1508: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1509: @*/
1510: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1511: {
1512: #if !defined(PETSC_HAVE_OPENCL)
1513:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1514: #else
1515:   PetscFunctionBegin;
1516:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1517:   PetscCall(VecViennaCLRestoreArray(v, NULL));

1519:   PetscFunctionReturn(PETSC_SUCCESS);
1520: #endif
1521: }

1523: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1524: {
1525:   Vec_ViennaCL *vecviennacl;
1526:   PetscMPIInt   size;

1528:   PetscFunctionBegin;
1529:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1530:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1531:   PetscCall(VecCreate_Seq_Private(V, 0));
1532:   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1533:   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1534:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1536:   if (array) {
1537:     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1538:     vecviennacl                     = (Vec_ViennaCL *)V->spptr;
1539:     vecviennacl->GPUarray_allocated = 0;
1540:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
1541:     V->offloadmask                  = PETSC_OFFLOAD_UNALLOCATED;
1542:   }

1544:   PetscFunctionReturn(PETSC_SUCCESS);
1545: }