Actual source code: fftw.c


  2: /*
  3:     Provides an interface to the FFTW package.
  4:     Testing examples can be found in ~src/mat/tests
  5: */

  7: #include <../src/mat/impls/fft/fft.h>
  8: EXTERN_C_BEGIN
  9: #if !PetscDefined(HAVE_MPIUNI)
 10:   #include <fftw3-mpi.h>
 11: #else
 12:   #include <fftw3.h>
 13: #endif
 14: EXTERN_C_END

 16: typedef struct {
 17:   ptrdiff_t ndim_fftw, *dim_fftw;
 18: #if defined(PETSC_USE_64BIT_INDICES)
 19:   fftw_iodim64 *iodims;
 20: #else
 21:   fftw_iodim *iodims;
 22: #endif
 23:   PetscInt     partial_dim;
 24:   fftw_plan    p_forward, p_backward;
 25:   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
 26:   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
 27:                                                             executed for the arrays with which the plan was created */
 28: } Mat_FFTW;

 30: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
 31: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
 32: extern PetscErrorCode MatDestroy_FFTW(Mat);
 33: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
 34: #if !PetscDefined(HAVE_MPIUNI)
 35: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
 36: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
 37: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
 38: #endif

 40: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
 41: {
 42:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
 43:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
 44:   const PetscScalar *x_array;
 45:   PetscScalar       *y_array;
 46: #if defined(PETSC_USE_COMPLEX)
 47:   #if defined(PETSC_USE_64BIT_INDICES)
 48:   fftw_iodim64 *iodims;
 49:   #else
 50:   fftw_iodim *iodims;
 51:   #endif
 52:   PetscInt i;
 53: #endif
 54:   PetscInt ndim = fft->ndim, *dim = fft->dim;

 56:   PetscFunctionBegin;
 57:   PetscCall(VecGetArrayRead(x, &x_array));
 58:   PetscCall(VecGetArray(y, &y_array));
 59:   if (!fftw->p_forward) { /* create a plan, then execute it */
 60:     switch (ndim) {
 61:     case 1:
 62: #if defined(PETSC_USE_COMPLEX)
 63:       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 64: #else
 65:       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 66: #endif
 67:       break;
 68:     case 2:
 69: #if defined(PETSC_USE_COMPLEX)
 70:       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 71: #else
 72:       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 73: #endif
 74:       break;
 75:     case 3:
 76: #if defined(PETSC_USE_COMPLEX)
 77:       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 78: #else
 79:       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 80: #endif
 81:       break;
 82:     default:
 83: #if defined(PETSC_USE_COMPLEX)
 84:       iodims = fftw->iodims;
 85:   #if defined(PETSC_USE_64BIT_INDICES)
 86:       if (ndim) {
 87:         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
 88:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 89:         for (i = ndim - 2; i >= 0; --i) {
 90:           iodims[i].n  = (ptrdiff_t)dim[i];
 91:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
 92:         }
 93:       }
 94:       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 95:   #else
 96:       if (ndim) {
 97:         iodims[ndim - 1].n  = (int)dim[ndim - 1];
 98:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 99:         for (i = ndim - 2; i >= 0; --i) {
100:           iodims[i].n  = (int)dim[i];
101:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
102:         }
103:       }
104:       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
105:   #endif

107: #else
108:       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
109: #endif
110:       break;
111:     }
112:     fftw->finarray  = (PetscScalar *)x_array;
113:     fftw->foutarray = y_array;
114:     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
115:                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
116:     fftw_execute(fftw->p_forward);
117:   } else {                                                         /* use existing plan */
118:     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
119: #if defined(PETSC_USE_COMPLEX)
120:       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
121: #else
122:       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
123: #endif
124:     } else {
125:       fftw_execute(fftw->p_forward);
126:     }
127:   }
128:   PetscCall(VecRestoreArray(y, &y_array));
129:   PetscCall(VecRestoreArrayRead(x, &x_array));
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
134: {
135:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
136:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
137:   const PetscScalar *x_array;
138:   PetscScalar       *y_array;
139:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
140: #if defined(PETSC_USE_COMPLEX)
141:   #if defined(PETSC_USE_64BIT_INDICES)
142:   fftw_iodim64 *iodims = fftw->iodims;
143:   #else
144:   fftw_iodim *iodims = fftw->iodims;
145:   #endif
146: #endif

148:   PetscFunctionBegin;
149:   PetscCall(VecGetArrayRead(x, &x_array));
150:   PetscCall(VecGetArray(y, &y_array));
151:   if (!fftw->p_backward) { /* create a plan, then execute it */
152:     switch (ndim) {
153:     case 1:
154: #if defined(PETSC_USE_COMPLEX)
155:       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
156: #else
157:       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
158: #endif
159:       break;
160:     case 2:
161: #if defined(PETSC_USE_COMPLEX)
162:       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
163: #else
164:       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
165: #endif
166:       break;
167:     case 3:
168: #if defined(PETSC_USE_COMPLEX)
169:       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
170: #else
171:       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
172: #endif
173:       break;
174:     default:
175: #if defined(PETSC_USE_COMPLEX)
176:   #if defined(PETSC_USE_64BIT_INDICES)
177:       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
178:   #else
179:       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
180:   #endif
181: #else
182:       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
183: #endif
184:       break;
185:     }
186:     fftw->binarray  = (PetscScalar *)x_array;
187:     fftw->boutarray = y_array;
188:     fftw_execute(fftw->p_backward);
189:   } else {                                                         /* use existing plan */
190:     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
191: #if defined(PETSC_USE_COMPLEX)
192:       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
193: #else
194:       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
195: #endif
196:     } else {
197:       fftw_execute(fftw->p_backward);
198:     }
199:   }
200:   PetscCall(VecRestoreArray(y, &y_array));
201:   PetscCall(VecRestoreArrayRead(x, &x_array));
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: #if !PetscDefined(HAVE_MPIUNI)
206: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
207: {
208:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
209:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
210:   const PetscScalar *x_array;
211:   PetscScalar       *y_array;
212:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
213:   MPI_Comm           comm;

215:   PetscFunctionBegin;
216:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
217:   PetscCall(VecGetArrayRead(x, &x_array));
218:   PetscCall(VecGetArray(y, &y_array));
219:   if (!fftw->p_forward) { /* create a plan, then execute it */
220:     switch (ndim) {
221:     case 1:
222:   #if defined(PETSC_USE_COMPLEX)
223:       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
224:   #else
225:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
226:   #endif
227:       break;
228:     case 2:
229:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
230:       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
231:   #else
232:       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
233:   #endif
234:       break;
235:     case 3:
236:   #if defined(PETSC_USE_COMPLEX)
237:       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
238:   #else
239:       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
240:   #endif
241:       break;
242:     default:
243:   #if defined(PETSC_USE_COMPLEX)
244:       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245:   #else
246:       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
247:   #endif
248:       break;
249:     }
250:     fftw->finarray  = (PetscScalar *)x_array;
251:     fftw->foutarray = y_array;
252:     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
253:                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
254:     fftw_execute(fftw->p_forward);
255:   } else {                                                         /* use existing plan */
256:     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
257:       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
258:     } else {
259:       fftw_execute(fftw->p_forward);
260:     }
261:   }
262:   PetscCall(VecRestoreArray(y, &y_array));
263:   PetscCall(VecRestoreArrayRead(x, &x_array));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
268: {
269:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
270:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
271:   const PetscScalar *x_array;
272:   PetscScalar       *y_array;
273:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
274:   MPI_Comm           comm;

276:   PetscFunctionBegin;
277:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
278:   PetscCall(VecGetArrayRead(x, &x_array));
279:   PetscCall(VecGetArray(y, &y_array));
280:   if (!fftw->p_backward) { /* create a plan, then execute it */
281:     switch (ndim) {
282:     case 1:
283:   #if defined(PETSC_USE_COMPLEX)
284:       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
285:   #else
286:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
287:   #endif
288:       break;
289:     case 2:
290:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
291:       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
292:   #else
293:       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
294:   #endif
295:       break;
296:     case 3:
297:   #if defined(PETSC_USE_COMPLEX)
298:       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
299:   #else
300:       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
301:   #endif
302:       break;
303:     default:
304:   #if defined(PETSC_USE_COMPLEX)
305:       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
306:   #else
307:       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
308:   #endif
309:       break;
310:     }
311:     fftw->binarray  = (PetscScalar *)x_array;
312:     fftw->boutarray = y_array;
313:     fftw_execute(fftw->p_backward);
314:   } else {                                                         /* use existing plan */
315:     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
316:       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
317:     } else {
318:       fftw_execute(fftw->p_backward);
319:     }
320:   }
321:   PetscCall(VecRestoreArray(y, &y_array));
322:   PetscCall(VecRestoreArrayRead(x, &x_array));
323:   PetscFunctionReturn(PETSC_SUCCESS);
324: }
325: #endif

327: PetscErrorCode MatDestroy_FFTW(Mat A)
328: {
329:   Mat_FFT  *fft  = (Mat_FFT *)A->data;
330:   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;

332:   PetscFunctionBegin;
333:   fftw_destroy_plan(fftw->p_forward);
334:   fftw_destroy_plan(fftw->p_backward);
335:   if (fftw->iodims) free(fftw->iodims);
336:   PetscCall(PetscFree(fftw->dim_fftw));
337:   PetscCall(PetscFree(fft->data));
338:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
339:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
340:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
341: #if !PetscDefined(HAVE_MPIUNI)
342:   fftw_mpi_cleanup();
343: #endif
344:   PetscFunctionReturn(PETSC_SUCCESS);
345: }

347: #if !PetscDefined(HAVE_MPIUNI)
348: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
349: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
350: {
351:   PetscScalar *array;

353:   PetscFunctionBegin;
354:   PetscCall(VecGetArray(v, &array));
355:   fftw_free((fftw_complex *)array);
356:   PetscCall(VecRestoreArray(v, &array));
357:   PetscCall(VecDestroy_MPI(v));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif

362: #if !PetscDefined(HAVE_MPIUNI)
363: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
364: {
365:   Mat A;

367:   PetscFunctionBegin;
368:   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
369:   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
370:   PetscFunctionReturn(PETSC_SUCCESS);
371: }

373: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
374: {
375:   Mat A;

377:   PetscFunctionBegin;
378:   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
379:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
380:   PetscFunctionReturn(PETSC_SUCCESS);
381: }

383: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
384: {
385:   Mat A;

387:   PetscFunctionBegin;
388:   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
389:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }
392: #endif

394: /*@
395:    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
396:      parallel layout determined by `MATFFTW`

398:    Collective

400:    Input Parameter:
401: .   A - the matrix

403:    Output Parameters:
404: +   x - (optional) input vector of forward FFTW
405: .   y - (optional) output vector of forward FFTW
406: -   z - (optional) output vector of backward FFTW

408:    Options Database Key:
409: .  -mat_fftw_plannerflags - set FFTW planner flags

411:   Level: advanced

413:   Notes:
414:   The parallel layout of output of forward FFTW is always same as the input
415:   of the backward FFTW. But parallel layout of the input vector of forward
416:   FFTW might not be same as the output of backward FFTW.

418:   We need to provide enough space while doing parallel real transform.
419:   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
420:   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
421:   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
422:   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
423:   zeros if it is odd only one zero is needed.

425:   Lastly one needs some scratch space at the end of data set in each process. alloc_local
426:   figures out how much space is needed, i.e. it figures out the data+scratch space for
427:   each processor and returns that.

429:   Developer Note:
430:   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?

432: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
433: @*/
434: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
435: {
436:   PetscFunctionBegin;
437:   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
438:   PetscFunctionReturn(PETSC_SUCCESS);
439: }

441: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
442: {
443:   PetscMPIInt size, rank;
444:   MPI_Comm    comm;
445:   Mat_FFT    *fft = (Mat_FFT *)A->data;

447:   PetscFunctionBegin;
450:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

452:   PetscCallMPI(MPI_Comm_size(comm, &size));
453:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
454:   if (size == 1) { /* sequential case */
455: #if defined(PETSC_USE_COMPLEX)
456:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
457:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
458:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
459: #else
460:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
461:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
462:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
463: #endif
464: #if !PetscDefined(HAVE_MPIUNI)
465:   } else { /* parallel cases */
466:     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
467:     PetscInt      ndim = fft->ndim, *dim = fft->dim;
468:     ptrdiff_t     alloc_local, local_n0, local_0_start;
469:     ptrdiff_t     local_n1;
470:     fftw_complex *data_fout;
471:     ptrdiff_t     local_1_start;
472:   #if defined(PETSC_USE_COMPLEX)
473:     fftw_complex *data_fin, *data_bout;
474:   #else
475:     double   *data_finr, *data_boutr;
476:     PetscInt  n1, N1;
477:     ptrdiff_t temp;
478:   #endif

480:     switch (ndim) {
481:     case 1:
482:   #if !defined(PETSC_USE_COMPLEX)
483:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
484:   #else
485:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
486:       if (fin) {
487:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
488:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
489:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
490:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
491:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
492:       }
493:       if (fout) {
494:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
495:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
496:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
497:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
498:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
499:       }
500:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
501:       if (bout) {
502:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
503:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
504:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
505:         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
506:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
507:       }
508:       break;
509:   #endif
510:     case 2:
511:   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
512:       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
513:       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
514:       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
515:       if (fin) {
516:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
517:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
518:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
519:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
520:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
521:       }
522:       if (fout) {
523:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
525:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
526:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
527:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
528:       }
529:       if (bout) {
530:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
531:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
532:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
533:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
534:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
535:       }
536:   #else
537:       /* Get local size */
538:       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
539:       if (fin) {
540:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
541:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
542:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
543:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
544:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
545:       }
546:       if (fout) {
547:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
548:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
549:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
550:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
551:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
552:       }
553:       if (bout) {
554:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
555:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
556:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
557:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
558:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
559:       }
560:   #endif
561:       break;
562:     case 3:
563:   #if !defined(PETSC_USE_COMPLEX)
564:       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
565:       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
566:       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
567:       if (fin) {
568:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
569:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
570:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
571:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
572:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
573:       }
574:       if (fout) {
575:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
577:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
578:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
579:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
580:       }
581:       if (bout) {
582:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
583:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
584:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
585:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
586:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
587:       }
588:   #else
589:       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
590:       if (fin) {
591:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
592:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
593:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
594:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
595:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
596:       }
597:       if (fout) {
598:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
599:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
600:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
601:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
602:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
603:       }
604:       if (bout) {
605:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
606:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
607:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
608:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
609:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
610:       }
611:   #endif
612:       break;
613:     default:
614:   #if !defined(PETSC_USE_COMPLEX)
615:       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
616:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
617:       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
618:       N1                                    = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
619:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

621:       if (fin) {
622:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
623:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
624:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
625:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
626:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
627:       }
628:       if (fout) {
629:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
630:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
631:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
632:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
633:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
634:       }
635:       if (bout) {
636:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
637:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
638:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
639:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
640:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
641:       }
642:   #else
643:       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
644:       if (fin) {
645:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
646:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
647:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
648:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
649:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
650:       }
651:       if (fout) {
652:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
653:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
654:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
655:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
656:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
657:       }
658:       if (bout) {
659:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
660:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
661:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
662:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
663:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
664:       }
665:   #endif
666:       break;
667:     }
668:     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
669:        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
670:        memory leaks. We void these pointers here to avoid accident uses.
671:     */
672:     if (fin) (*fin)->ops->replacearray = NULL;
673:     if (fout) (*fout)->ops->replacearray = NULL;
674:     if (bout) (*bout)->ops->replacearray = NULL;
675: #endif
676:   }
677:   PetscFunctionReturn(PETSC_SUCCESS);
678: }

680: /*@
681:    VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.

683:    Collective

685:    Input Parameters:
686: +  A - FFTW matrix
687: -  x - the PETSc vector

689:    Output Parameter:
690: .  y - the FFTW vector

692:    Level: intermediate

694:    Note:
695:    For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
696:    one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
697:    zeros. This routine does that job by scattering operation.

699: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
700: @*/
701: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
702: {
703:   PetscFunctionBegin;
704:   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
705:   PetscFunctionReturn(PETSC_SUCCESS);
706: }

708: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
709: {
710:   MPI_Comm    comm;
711:   Mat_FFT    *fft = (Mat_FFT *)A->data;
712:   PetscInt    low;
713:   PetscMPIInt rank, size;
714:   PetscInt    vsize, vsize1;
715:   VecScatter  vecscat;
716:   IS          list1;

718:   PetscFunctionBegin;
719:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
720:   PetscCallMPI(MPI_Comm_size(comm, &size));
721:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
722:   PetscCall(VecGetOwnershipRange(y, &low, NULL));

724:   if (size == 1) {
725:     PetscCall(VecGetSize(x, &vsize));
726:     PetscCall(VecGetSize(y, &vsize1));
727:     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
728:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
729:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
730:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
731:     PetscCall(VecScatterDestroy(&vecscat));
732:     PetscCall(ISDestroy(&list1));
733: #if !PetscDefined(HAVE_MPIUNI)
734:   } else {
735:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
736:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
737:     ptrdiff_t local_n0, local_0_start;
738:     ptrdiff_t local_n1, local_1_start;
739:     IS        list2;
740:   #if !defined(PETSC_USE_COMPLEX)
741:     PetscInt  i, j, k, partial_dim;
742:     PetscInt *indx1, *indx2, tempindx, tempindx1;
743:     PetscInt  NM;
744:     ptrdiff_t temp;
745:   #endif

747:     switch (ndim) {
748:     case 1:
749:   #if defined(PETSC_USE_COMPLEX)
750:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

752:       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
753:       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
754:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
755:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
756:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
757:       PetscCall(VecScatterDestroy(&vecscat));
758:       PetscCall(ISDestroy(&list1));
759:       PetscCall(ISDestroy(&list2));
760:   #else
761:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
762:   #endif
763:       break;
764:     case 2:
765:   #if defined(PETSC_USE_COMPLEX)
766:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

768:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
769:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
770:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
771:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773:       PetscCall(VecScatterDestroy(&vecscat));
774:       PetscCall(ISDestroy(&list1));
775:       PetscCall(ISDestroy(&list2));
776:   #else
777:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

779:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
780:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));

782:       if (dim[1] % 2 == 0) {
783:         NM = dim[1] + 2;
784:       } else {
785:         NM = dim[1] + 1;
786:       }
787:       for (i = 0; i < local_n0; i++) {
788:         for (j = 0; j < dim[1]; j++) {
789:           tempindx  = i * dim[1] + j;
790:           tempindx1 = i * NM + j;

792:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
793:           indx2[tempindx] = low + tempindx1;
794:         }
795:       }

797:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
798:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

800:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
801:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
802:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
803:       PetscCall(VecScatterDestroy(&vecscat));
804:       PetscCall(ISDestroy(&list1));
805:       PetscCall(ISDestroy(&list2));
806:       PetscCall(PetscFree(indx1));
807:       PetscCall(PetscFree(indx2));
808:   #endif
809:       break;

811:     case 3:
812:   #if defined(PETSC_USE_COMPLEX)
813:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

815:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
816:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
817:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
818:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
819:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
820:       PetscCall(VecScatterDestroy(&vecscat));
821:       PetscCall(ISDestroy(&list1));
822:       PetscCall(ISDestroy(&list2));
823:   #else
824:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
825:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
826:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

828:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
829:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));

831:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
832:       else NM = dim[2] + 1;

834:       for (i = 0; i < local_n0; i++) {
835:         for (j = 0; j < dim[1]; j++) {
836:           for (k = 0; k < dim[2]; k++) {
837:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
838:             tempindx1 = i * dim[1] * NM + j * NM + k;

840:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
841:             indx2[tempindx] = low + tempindx1;
842:           }
843:         }
844:       }

846:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
847:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
848:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
849:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
850:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
851:       PetscCall(VecScatterDestroy(&vecscat));
852:       PetscCall(ISDestroy(&list1));
853:       PetscCall(ISDestroy(&list2));
854:       PetscCall(PetscFree(indx1));
855:       PetscCall(PetscFree(indx2));
856:   #endif
857:       break;

859:     default:
860:   #if defined(PETSC_USE_COMPLEX)
861:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

863:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
864:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
865:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
866:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
867:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
868:       PetscCall(VecScatterDestroy(&vecscat));
869:       PetscCall(ISDestroy(&list1));
870:       PetscCall(ISDestroy(&list2));
871:   #else
872:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
873:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
874:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

876:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

878:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

880:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

882:       partial_dim = fftw->partial_dim;

884:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
885:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));

887:       if (dim[ndim - 1] % 2 == 0) NM = 2;
888:       else NM = 1;

890:       j = low;
891:       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
892:         indx1[i] = local_0_start * partial_dim + i;
893:         indx2[i] = j;
894:         if (k % dim[ndim - 1] == 0) j += NM;
895:         j++;
896:       }
897:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
898:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
899:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
900:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
901:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902:       PetscCall(VecScatterDestroy(&vecscat));
903:       PetscCall(ISDestroy(&list1));
904:       PetscCall(ISDestroy(&list2));
905:       PetscCall(PetscFree(indx1));
906:       PetscCall(PetscFree(indx2));
907:   #endif
908:       break;
909:     }
910: #endif
911:   }
912:   PetscFunctionReturn(PETSC_SUCCESS);
913: }

915: /*@
916:    VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.

918:    Collective

920:     Input Parameters:
921: +   A - `MATFFTW` matrix
922: -   x - FFTW vector

924:    Output Parameter:
925: .  y - PETSc vector

927:    Level: intermediate

929:    Note:
930:    While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
931:    `VecScatterFFTWToPetsc()` removes those extra zeros.

933: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
934: @*/
935: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
936: {
937:   PetscFunctionBegin;
938:   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
939:   PetscFunctionReturn(PETSC_SUCCESS);
940: }

942: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
943: {
944:   MPI_Comm    comm;
945:   Mat_FFT    *fft = (Mat_FFT *)A->data;
946:   PetscInt    low;
947:   PetscMPIInt rank, size;
948:   VecScatter  vecscat;
949:   IS          list1;

951:   PetscFunctionBegin;
952:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
953:   PetscCallMPI(MPI_Comm_size(comm, &size));
954:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
955:   PetscCall(VecGetOwnershipRange(x, &low, NULL));

957:   if (size == 1) {
958:     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
959:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
960:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
961:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
962:     PetscCall(VecScatterDestroy(&vecscat));
963:     PetscCall(ISDestroy(&list1));

965: #if !PetscDefined(HAVE_MPIUNI)
966:   } else {
967:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
968:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
969:     ptrdiff_t local_n0, local_0_start;
970:     ptrdiff_t local_n1, local_1_start;
971:     IS        list2;
972:   #if !defined(PETSC_USE_COMPLEX)
973:     PetscInt  i, j, k, partial_dim;
974:     PetscInt *indx1, *indx2, tempindx, tempindx1;
975:     PetscInt  NM;
976:     ptrdiff_t temp;
977:   #endif
978:     switch (ndim) {
979:     case 1:
980:   #if defined(PETSC_USE_COMPLEX)
981:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

983:       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
984:       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
985:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
986:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
987:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
988:       PetscCall(VecScatterDestroy(&vecscat));
989:       PetscCall(ISDestroy(&list1));
990:       PetscCall(ISDestroy(&list2));
991:   #else
992:       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
993:   #endif
994:       break;
995:     case 2:
996:   #if defined(PETSC_USE_COMPLEX)
997:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

999:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1000:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1001:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1002:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1003:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1004:       PetscCall(VecScatterDestroy(&vecscat));
1005:       PetscCall(ISDestroy(&list1));
1006:       PetscCall(ISDestroy(&list2));
1007:   #else
1008:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1010:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
1011:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));

1013:       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1014:       else NM = dim[1] + 1;

1016:       for (i = 0; i < local_n0; i++) {
1017:         for (j = 0; j < dim[1]; j++) {
1018:           tempindx  = i * dim[1] + j;
1019:           tempindx1 = i * NM + j;

1021:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
1022:           indx2[tempindx] = low + tempindx1;
1023:         }
1024:       }

1026:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1027:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

1029:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1030:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1031:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1032:       PetscCall(VecScatterDestroy(&vecscat));
1033:       PetscCall(ISDestroy(&list1));
1034:       PetscCall(ISDestroy(&list2));
1035:       PetscCall(PetscFree(indx1));
1036:       PetscCall(PetscFree(indx2));
1037:   #endif
1038:       break;
1039:     case 3:
1040:   #if defined(PETSC_USE_COMPLEX)
1041:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1043:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1044:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1045:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1046:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1047:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1048:       PetscCall(VecScatterDestroy(&vecscat));
1049:       PetscCall(ISDestroy(&list1));
1050:       PetscCall(ISDestroy(&list2));
1051:   #else
1052:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1054:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
1055:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));

1057:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1058:       else NM = dim[2] + 1;

1060:       for (i = 0; i < local_n0; i++) {
1061:         for (j = 0; j < dim[1]; j++) {
1062:           for (k = 0; k < dim[2]; k++) {
1063:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1064:             tempindx1 = i * dim[1] * NM + j * NM + k;

1066:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1067:             indx2[tempindx] = low + tempindx1;
1068:           }
1069:         }
1070:       }

1072:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1073:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));

1075:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1076:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1077:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1078:       PetscCall(VecScatterDestroy(&vecscat));
1079:       PetscCall(ISDestroy(&list1));
1080:       PetscCall(ISDestroy(&list2));
1081:       PetscCall(PetscFree(indx1));
1082:       PetscCall(PetscFree(indx2));
1083:   #endif
1084:       break;
1085:     default:
1086:   #if defined(PETSC_USE_COMPLEX)
1087:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

1089:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1090:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1091:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1092:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1093:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1094:       PetscCall(VecScatterDestroy(&vecscat));
1095:       PetscCall(ISDestroy(&list1));
1096:       PetscCall(ISDestroy(&list2));
1097:   #else
1098:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

1100:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

1102:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1104:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

1106:       partial_dim = fftw->partial_dim;

1108:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
1109:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));

1111:       if (dim[ndim - 1] % 2 == 0) NM = 2;
1112:       else NM = 1;

1114:       j = low;
1115:       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1116:         indx1[i] = local_0_start * partial_dim + i;
1117:         indx2[i] = j;
1118:         if (k % dim[ndim - 1] == 0) j += NM;
1119:         j++;
1120:       }
1121:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1122:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));

1124:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1125:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1126:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1127:       PetscCall(VecScatterDestroy(&vecscat));
1128:       PetscCall(ISDestroy(&list1));
1129:       PetscCall(ISDestroy(&list2));
1130:       PetscCall(PetscFree(indx1));
1131:       PetscCall(PetscFree(indx2));
1132:   #endif
1133:       break;
1134:     }
1135: #endif
1136:   }
1137:   PetscFunctionReturn(PETSC_SUCCESS);
1138: }

1140: /*MC
1141:   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.

1143:   Level: intermediate

1145: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1146: M*/
1147: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1148: {
1149:   MPI_Comm    comm;
1150:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1151:   Mat_FFTW   *fftw;
1152:   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1153:   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1154:   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1155:   PetscBool   flg;
1156:   PetscInt    p_flag, partial_dim = 1, ctr;
1157:   PetscMPIInt size, rank;
1158:   ptrdiff_t  *pdim;
1159: #if !defined(PETSC_USE_COMPLEX)
1160:   PetscInt tot_dim;
1161: #endif

1163:   PetscFunctionBegin;
1164:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1165:   PetscCallMPI(MPI_Comm_size(comm, &size));
1166:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

1168: #if !PetscDefined(HAVE_MPIUNI)
1169:   fftw_mpi_init();
1170: #endif
1171:   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1172:   pdim[0] = dim[0];
1173: #if !defined(PETSC_USE_COMPLEX)
1174:   tot_dim = 2 * dim[0];
1175: #endif
1176:   for (ctr = 1; ctr < ndim; ctr++) {
1177:     partial_dim *= dim[ctr];
1178:     pdim[ctr] = dim[ctr];
1179: #if !defined(PETSC_USE_COMPLEX)
1180:     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1181:     else tot_dim *= dim[ctr];
1182: #endif
1183:   }

1185:   if (size == 1) {
1186: #if defined(PETSC_USE_COMPLEX)
1187:     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1188:     fft->n = fft->N;
1189: #else
1190:     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1191:     fft->n       = tot_dim;
1192: #endif
1193: #if !PetscDefined(HAVE_MPIUNI)
1194:   } else {
1195:     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1196:   #if !defined(PETSC_USE_COMPLEX)
1197:     ptrdiff_t temp;
1198:     PetscInt  N1;
1199:   #endif

1201:     switch (ndim) {
1202:     case 1:
1203:   #if !defined(PETSC_USE_COMPLEX)
1204:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1205:   #else
1206:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1207:       fft->n = (PetscInt)local_n0;
1208:       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1209:   #endif
1210:       break;
1211:     case 2:
1212:   #if defined(PETSC_USE_COMPLEX)
1213:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1214:       fft->n = (PetscInt)local_n0 * dim[1];
1215:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1216:   #else
1217:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1219:       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1220:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1221:   #endif
1222:       break;
1223:     case 3:
1224:   #if defined(PETSC_USE_COMPLEX)
1225:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1227:       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1228:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1229:   #else
1230:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1232:       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1233:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1234:   #endif
1235:       break;
1236:     default:
1237:   #if defined(PETSC_USE_COMPLEX)
1238:       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);

1240:       fft->n = (PetscInt)local_n0 * partial_dim;
1241:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1242:   #else
1243:       temp = pdim[ndim - 1];

1245:       pdim[ndim - 1] = temp / 2 + 1;

1247:       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1249:       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1250:       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);

1252:       pdim[ndim - 1] = temp;

1254:       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1255:   #endif
1256:       break;
1257:     }
1258: #endif
1259:   }
1260:   free(pdim);
1261:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1262:   PetscCall(PetscNew(&fftw));
1263:   fft->data = (void *)fftw;

1265:   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1266:   fftw->partial_dim = partial_dim;

1268:   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1269:   if (size == 1) {
1270: #if defined(PETSC_USE_64BIT_INDICES)
1271:     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1272: #else
1273:     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1274: #endif
1275:   }

1277:   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];

1279:   fftw->p_forward  = NULL;
1280:   fftw->p_backward = NULL;
1281:   fftw->p_flag     = FFTW_ESTIMATE;

1283:   if (size == 1) {
1284:     A->ops->mult          = MatMult_SeqFFTW;
1285:     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1286: #if !PetscDefined(HAVE_MPIUNI)
1287:   } else {
1288:     A->ops->mult          = MatMult_MPIFFTW;
1289:     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1290: #endif
1291:   }
1292:   fft->matdestroy = MatDestroy_FFTW;
1293:   A->assembled    = PETSC_TRUE;
1294:   A->preallocated = PETSC_TRUE;

1296:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1297:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1298:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));

1300:   /* get runtime options */
1301:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1302:   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1303:   if (flg) fftw->p_flag = iplans[p_flag];
1304:   PetscOptionsEnd();
1305:   PetscFunctionReturn(PETSC_SUCCESS);
1306: }