Actual source code: bvec1.c
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petscblaslapack.h>
10: #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
11: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
12: #else
13: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
14: #endif
15: {
16: const PetscInt n = xin->map->n;
17: const PetscBLASInt one = 1;
18: const PetscScalar *ya, *xa;
19: PetscBLASInt bn;
21: PetscFunctionBegin;
22: PetscCall(PetscBLASIntCast(n, &bn));
23: if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
24: PetscCall(VecGetArrayRead(xin, &xa));
25: PetscCall(VecGetArrayRead(yin, &ya));
26: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
27: the second */
28: PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
29: PetscCall(VecRestoreArrayRead(xin, &xa));
30: PetscCall(VecRestoreArrayRead(yin, &ya));
31: PetscFunctionReturn(PETSC_SUCCESS);
32: }
34: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
35: {
36: PetscFunctionBegin;
37: PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
38: PetscFunctionReturn(PETSC_SUCCESS);
39: }
41: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
42: {
43: PetscFunctionBegin;
44: /*
45: pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
46: dot(&bn, xa, &one, ya, &one)
47: */
48: PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
49: PetscFunctionReturn(PETSC_SUCCESS);
50: }
52: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
53: {
54: PetscFunctionBegin;
55: if (alpha == (PetscScalar)0.0) {
56: PetscCall(VecSet_Seq(xin, alpha));
57: } else if (alpha != (PetscScalar)1.0) {
58: const PetscBLASInt one = 1;
59: PetscBLASInt bn;
60: PetscScalar *xarray;
62: PetscCall(PetscBLASIntCast(xin->map->n, &bn));
63: PetscCall(PetscLogFlops(bn));
64: PetscCall(VecGetArray(xin, &xarray));
65: PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
66: PetscCall(VecRestoreArray(xin, &xarray));
67: }
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
72: {
73: PetscFunctionBegin;
74: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
75: if (alpha != (PetscScalar)0.0) {
76: const PetscScalar *xarray;
77: PetscScalar *yarray;
78: const PetscBLASInt one = 1;
79: PetscBLASInt bn;
81: PetscCall(PetscBLASIntCast(yin->map->n, &bn));
82: PetscCall(PetscLogFlops(2.0 * bn));
83: PetscCall(VecGetArrayRead(xin, &xarray));
84: PetscCall(VecGetArray(yin, &yarray));
85: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
86: PetscCall(VecRestoreArrayRead(xin, &xarray));
87: PetscCall(VecRestoreArray(yin, &yarray));
88: }
89: PetscFunctionReturn(PETSC_SUCCESS);
90: }
92: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
93: {
94: PetscFunctionBegin;
95: if (a == (PetscScalar)0.0) {
96: PetscCall(VecScale_Seq(yin, b));
97: } else if (b == (PetscScalar)1.0) {
98: PetscCall(VecAXPY_Seq(yin, a, xin));
99: } else if (a == (PetscScalar)1.0) {
100: PetscCall(VecAYPX_Seq(yin, b, xin));
101: } else {
102: const PetscInt n = yin->map->n;
103: const PetscScalar *xx;
104: PetscInt flops;
105: PetscScalar *yy;
107: PetscCall(VecGetArrayRead(xin, &xx));
108: PetscCall(VecGetArray(yin, &yy));
109: if (b == (PetscScalar)0.0) {
110: flops = n;
111: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
112: } else {
113: flops = 3 * n;
114: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
115: }
116: PetscCall(VecRestoreArrayRead(xin, &xx));
117: PetscCall(VecRestoreArray(yin, &yy));
118: PetscCall(PetscLogFlops(flops));
119: }
120: PetscFunctionReturn(PETSC_SUCCESS);
121: }
123: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
124: {
125: const PetscInt n = zin->map->n;
126: const PetscScalar *yy, *xx;
127: PetscInt flops = 4 * n; // common case
128: PetscScalar *zz;
130: PetscFunctionBegin;
131: PetscCall(VecGetArrayRead(xin, &xx));
132: PetscCall(VecGetArrayRead(yin, &yy));
133: PetscCall(VecGetArray(zin, &zz));
134: if (alpha == (PetscScalar)1.0) {
135: for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
136: } else if (gamma == (PetscScalar)1.0) {
137: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
138: } else if (gamma == (PetscScalar)0.0) {
139: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
140: flops -= n;
141: } else {
142: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
143: flops += n;
144: }
145: PetscCall(VecRestoreArrayRead(xin, &xx));
146: PetscCall(VecRestoreArrayRead(yin, &yy));
147: PetscCall(VecRestoreArray(zin, &zz));
148: PetscCall(PetscLogFlops(flops));
149: PetscFunctionReturn(PETSC_SUCCESS);
150: }