Actual source code: bvec1.c


  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <petscblaslapack.h>

 10: #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
 11: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 12: #else
 13: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 14: #endif
 15: {
 16:   const PetscInt     n   = xin->map->n;
 17:   const PetscBLASInt one = 1;
 18:   const PetscScalar *ya, *xa;
 19:   PetscBLASInt       bn;

 21:   PetscFunctionBegin;
 22:   PetscCall(PetscBLASIntCast(n, &bn));
 23:   if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
 24:   PetscCall(VecGetArrayRead(xin, &xa));
 25:   PetscCall(VecGetArrayRead(yin, &ya));
 26:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
 27:      the second */
 28:   PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
 29:   PetscCall(VecRestoreArrayRead(xin, &xa));
 30:   PetscCall(VecRestoreArrayRead(yin, &ya));
 31:   PetscFunctionReturn(PETSC_SUCCESS);
 32: }

 34: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 35: {
 36:   PetscFunctionBegin;
 37:   PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
 38:   PetscFunctionReturn(PETSC_SUCCESS);
 39: }

 41: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 42: {
 43:   PetscFunctionBegin;
 44:   /*
 45:     pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
 46:     dot(&bn, xa, &one, ya, &one)
 47:   */
 48:   PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
 49:   PetscFunctionReturn(PETSC_SUCCESS);
 50: }

 52: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 53: {
 54:   PetscFunctionBegin;
 55:   if (alpha == (PetscScalar)0.0) {
 56:     PetscCall(VecSet_Seq(xin, alpha));
 57:   } else if (alpha != (PetscScalar)1.0) {
 58:     const PetscBLASInt one = 1;
 59:     PetscBLASInt       bn;
 60:     PetscScalar       *xarray;

 62:     PetscCall(PetscBLASIntCast(xin->map->n, &bn));
 63:     PetscCall(PetscLogFlops(bn));
 64:     PetscCall(VecGetArray(xin, &xarray));
 65:     PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
 66:     PetscCall(VecRestoreArray(xin, &xarray));
 67:   }
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
 72: {
 73:   PetscFunctionBegin;
 74:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
 75:   if (alpha != (PetscScalar)0.0) {
 76:     const PetscScalar *xarray;
 77:     PetscScalar       *yarray;
 78:     const PetscBLASInt one = 1;
 79:     PetscBLASInt       bn;

 81:     PetscCall(PetscBLASIntCast(yin->map->n, &bn));
 82:     PetscCall(PetscLogFlops(2.0 * bn));
 83:     PetscCall(VecGetArrayRead(xin, &xarray));
 84:     PetscCall(VecGetArray(yin, &yarray));
 85:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
 86:     PetscCall(VecRestoreArrayRead(xin, &xarray));
 87:     PetscCall(VecRestoreArray(yin, &yarray));
 88:   }
 89:   PetscFunctionReturn(PETSC_SUCCESS);
 90: }

 92: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
 93: {
 94:   PetscFunctionBegin;
 95:   if (a == (PetscScalar)0.0) {
 96:     PetscCall(VecScale_Seq(yin, b));
 97:   } else if (b == (PetscScalar)1.0) {
 98:     PetscCall(VecAXPY_Seq(yin, a, xin));
 99:   } else if (a == (PetscScalar)1.0) {
100:     PetscCall(VecAYPX_Seq(yin, b, xin));
101:   } else {
102:     const PetscInt     n = yin->map->n;
103:     const PetscScalar *xx;
104:     PetscInt           flops;
105:     PetscScalar       *yy;

107:     PetscCall(VecGetArrayRead(xin, &xx));
108:     PetscCall(VecGetArray(yin, &yy));
109:     if (b == (PetscScalar)0.0) {
110:       flops = n;
111:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
112:     } else {
113:       flops = 3 * n;
114:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
115:     }
116:     PetscCall(VecRestoreArrayRead(xin, &xx));
117:     PetscCall(VecRestoreArray(yin, &yy));
118:     PetscCall(PetscLogFlops(flops));
119:   }
120:   PetscFunctionReturn(PETSC_SUCCESS);
121: }

123: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
124: {
125:   const PetscInt     n = zin->map->n;
126:   const PetscScalar *yy, *xx;
127:   PetscInt           flops = 4 * n; // common case
128:   PetscScalar       *zz;

130:   PetscFunctionBegin;
131:   PetscCall(VecGetArrayRead(xin, &xx));
132:   PetscCall(VecGetArrayRead(yin, &yy));
133:   PetscCall(VecGetArray(zin, &zz));
134:   if (alpha == (PetscScalar)1.0) {
135:     for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
136:   } else if (gamma == (PetscScalar)1.0) {
137:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
138:   } else if (gamma == (PetscScalar)0.0) {
139:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
140:     flops -= n;
141:   } else {
142:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
143:     flops += n;
144:   }
145:   PetscCall(VecRestoreArrayRead(xin, &xx));
146:   PetscCall(VecRestoreArrayRead(yin, &yy));
147:   PetscCall(VecRestoreArray(zin, &zz));
148:   PetscCall(PetscLogFlops(flops));
149:   PetscFunctionReturn(PETSC_SUCCESS);
150: }