Actual source code: cufft.cu


  2: /*
  3:     Provides an interface to the CUFFT package.
  4:     Testing examples can be found in ~src/mat/tests
  5: */

  7: #include <petscdevice_cuda.h>
  8: #include <petsc/private/matimpl.h>

 10: typedef struct {
 11:   PetscInt      ndim;
 12:   PetscInt     *dim;
 13:   cufftHandle   p_forward, p_backward;
 14:   cufftComplex *devArray;
 15: } Mat_CUFFT;

 17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
 18: {
 19:   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
 20:   cufftComplex *devArray = cufft->devArray;
 21:   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
 22:   PetscScalar  *x_array, *y_array;

 24:   PetscFunctionBegin;
 25:   PetscCall(VecGetArray(x, &x_array));
 26:   PetscCall(VecGetArray(y, &y_array));
 27:   if (!cufft->p_forward) {
 28:     /* create a plan, then execute it */
 29:     switch (ndim) {
 30:     case 1:
 31:       PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1));
 32:       break;
 33:     case 2:
 34:       PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C));
 35:       break;
 36:     case 3:
 37:       PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C));
 38:       break;
 39:     default:
 40:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
 41:     }
 42:   }
 43:   /* transfer to GPU memory */
 44:   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
 45:   /* execute transform */
 46:   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
 47:   /* transfer from GPU memory */
 48:   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
 49:   PetscCall(VecRestoreArray(y, &y_array));
 50:   PetscCall(VecRestoreArray(x, &x_array));
 51:   PetscFunctionReturn(PETSC_SUCCESS);
 52: }

 54: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
 55: {
 56:   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
 57:   cufftComplex *devArray = cufft->devArray;
 58:   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
 59:   PetscScalar  *x_array, *y_array;

 61:   PetscFunctionBegin;
 62:   PetscCall(VecGetArray(x, &x_array));
 63:   PetscCall(VecGetArray(y, &y_array));
 64:   if (!cufft->p_backward) {
 65:     /* create a plan, then execute it */
 66:     switch (ndim) {
 67:     case 1:
 68:       PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1));
 69:       break;
 70:     case 2:
 71:       PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C));
 72:       break;
 73:     case 3:
 74:       PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C));
 75:       break;
 76:     default:
 77:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
 78:     }
 79:   }
 80:   /* transfer to GPU memory */
 81:   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
 82:   /* execute transform */
 83:   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
 84:   /* transfer from GPU memory */
 85:   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
 86:   PetscCall(VecRestoreArray(y, &y_array));
 87:   PetscCall(VecRestoreArray(x, &x_array));
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
 92: {
 93:   Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;

 95:   PetscFunctionBegin;
 96:   PetscCall(PetscFree(cufft->dim));
 97:   if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward));
 98:   if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward));
 99:   PetscCallCUDA(cudaFree(cufft->devArray));
100:   PetscCall(PetscFree(A->data));
101:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0));
102:   PetscFunctionReturn(PETSC_SUCCESS);
103: }

105: /*@
106:   MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT

108:   Collective

110:   Input Parameters:
111: + comm - MPI communicator, set to `PETSC_COMM_SELF`
112: . ndim - the ndim-dimensional transform
113: - dim  - array of size `ndim`, dim[i] contains the vector length in the i-dimension

115:   Output Parameter:
116: . A - the matrix

118:   Options Database Key:
119: . -mat_cufft_plannerflags - set CuFFT planner flags

121:   Level: intermediate

123: .seealso: [](ch_matrices), `Mat`, `MATSEQCUFFT`
124: @*/
125: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
126: {
127:   Mat_CUFFT *cufft;
128:   PetscInt   m = 1;

130:   PetscFunctionBegin;
131:   PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
134:   PetscCall(MatCreate(comm, A));
135:   for (PetscInt d = 0; d < ndim; ++d) {
136:     PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
137:     m *= dim[d];
138:   }
139:   PetscCall(MatSetSizes(*A, m, m, m, m));
140:   PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));

142:   PetscCall(PetscNew(&cufft));
143:   (*A)->data = (void *)cufft;
144:   PetscCall(PetscMalloc1(ndim + 1, &cufft->dim));
145:   PetscCall(PetscArraycpy(cufft->dim, dim, ndim));

147:   cufft->ndim       = ndim;
148:   cufft->p_forward  = 0;
149:   cufft->p_backward = 0;
150:   cufft->dim[ndim]  = m;

152:   /* GPU memory allocation */
153:   PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m));

155:   (*A)->ops->mult          = MatMult_SeqCUFFT;
156:   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
157:   (*A)->assembled          = PETSC_TRUE;
158:   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;

160:   /* get runtime options ...what options????? */
161:   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
162:   PetscOptionsEnd();
163:   PetscFunctionReturn(PETSC_SUCCESS);
164: }