Actual source code: fftw.c
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #if !PetscDefined(HAVE_MPIUNI)
10: #include <fftw3-mpi.h>
11: #else
12: #include <fftw3.h>
13: #endif
14: EXTERN_C_END
16: typedef struct {
17: ptrdiff_t ndim_fftw, *dim_fftw;
18: #if defined(PETSC_USE_64BIT_INDICES)
19: fftw_iodim64 *iodims;
20: #else
21: fftw_iodim *iodims;
22: #endif
23: PetscInt partial_dim;
24: fftw_plan p_forward, p_backward;
25: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26: PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
27: executed for the arrays with which the plan was created */
28: } Mat_FFTW;
30: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
32: extern PetscErrorCode MatDestroy_FFTW(Mat);
33: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
34: #if !PetscDefined(HAVE_MPIUNI)
35: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38: #endif
40: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
41: {
42: Mat_FFT *fft = (Mat_FFT *)A->data;
43: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
44: const PetscScalar *x_array;
45: PetscScalar *y_array;
46: #if defined(PETSC_USE_COMPLEX)
47: #if defined(PETSC_USE_64BIT_INDICES)
48: fftw_iodim64 *iodims;
49: #else
50: fftw_iodim *iodims;
51: #endif
52: PetscInt i;
53: #endif
54: PetscInt ndim = fft->ndim, *dim = fft->dim;
56: PetscFunctionBegin;
57: PetscCall(VecGetArrayRead(x, &x_array));
58: PetscCall(VecGetArray(y, &y_array));
59: if (!fftw->p_forward) { /* create a plan, then execute it */
60: switch (ndim) {
61: case 1:
62: #if defined(PETSC_USE_COMPLEX)
63: fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
64: #else
65: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
66: #endif
67: break;
68: case 2:
69: #if defined(PETSC_USE_COMPLEX)
70: fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
71: #else
72: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
73: #endif
74: break;
75: case 3:
76: #if defined(PETSC_USE_COMPLEX)
77: fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
78: #else
79: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
80: #endif
81: break;
82: default:
83: #if defined(PETSC_USE_COMPLEX)
84: iodims = fftw->iodims;
85: #if defined(PETSC_USE_64BIT_INDICES)
86: if (ndim) {
87: iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
88: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
89: for (i = ndim - 2; i >= 0; --i) {
90: iodims[i].n = (ptrdiff_t)dim[i];
91: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
92: }
93: }
94: fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
95: #else
96: if (ndim) {
97: iodims[ndim - 1].n = (int)dim[ndim - 1];
98: iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
99: for (i = ndim - 2; i >= 0; --i) {
100: iodims[i].n = (int)dim[i];
101: iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
102: }
103: }
104: fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
105: #endif
107: #else
108: fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
109: #endif
110: break;
111: }
112: fftw->finarray = (PetscScalar *)x_array;
113: fftw->foutarray = y_array;
114: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
115: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
116: fftw_execute(fftw->p_forward);
117: } else { /* use existing plan */
118: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
119: #if defined(PETSC_USE_COMPLEX)
120: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
121: #else
122: fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
123: #endif
124: } else {
125: fftw_execute(fftw->p_forward);
126: }
127: }
128: PetscCall(VecRestoreArray(y, &y_array));
129: PetscCall(VecRestoreArrayRead(x, &x_array));
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
134: {
135: Mat_FFT *fft = (Mat_FFT *)A->data;
136: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
137: const PetscScalar *x_array;
138: PetscScalar *y_array;
139: PetscInt ndim = fft->ndim, *dim = fft->dim;
140: #if defined(PETSC_USE_COMPLEX)
141: #if defined(PETSC_USE_64BIT_INDICES)
142: fftw_iodim64 *iodims = fftw->iodims;
143: #else
144: fftw_iodim *iodims = fftw->iodims;
145: #endif
146: #endif
148: PetscFunctionBegin;
149: PetscCall(VecGetArrayRead(x, &x_array));
150: PetscCall(VecGetArray(y, &y_array));
151: if (!fftw->p_backward) { /* create a plan, then execute it */
152: switch (ndim) {
153: case 1:
154: #if defined(PETSC_USE_COMPLEX)
155: fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
156: #else
157: fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
158: #endif
159: break;
160: case 2:
161: #if defined(PETSC_USE_COMPLEX)
162: fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
163: #else
164: fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
165: #endif
166: break;
167: case 3:
168: #if defined(PETSC_USE_COMPLEX)
169: fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
170: #else
171: fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
172: #endif
173: break;
174: default:
175: #if defined(PETSC_USE_COMPLEX)
176: #if defined(PETSC_USE_64BIT_INDICES)
177: fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
178: #else
179: fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
180: #endif
181: #else
182: fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
183: #endif
184: break;
185: }
186: fftw->binarray = (PetscScalar *)x_array;
187: fftw->boutarray = y_array;
188: fftw_execute(fftw->p_backward);
189: } else { /* use existing plan */
190: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
191: #if defined(PETSC_USE_COMPLEX)
192: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
193: #else
194: fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
195: #endif
196: } else {
197: fftw_execute(fftw->p_backward);
198: }
199: }
200: PetscCall(VecRestoreArray(y, &y_array));
201: PetscCall(VecRestoreArrayRead(x, &x_array));
202: PetscFunctionReturn(PETSC_SUCCESS);
203: }
205: #if !PetscDefined(HAVE_MPIUNI)
206: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
207: {
208: Mat_FFT *fft = (Mat_FFT *)A->data;
209: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
210: const PetscScalar *x_array;
211: PetscScalar *y_array;
212: PetscInt ndim = fft->ndim, *dim = fft->dim;
213: MPI_Comm comm;
215: PetscFunctionBegin;
216: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
217: PetscCall(VecGetArrayRead(x, &x_array));
218: PetscCall(VecGetArray(y, &y_array));
219: if (!fftw->p_forward) { /* create a plan, then execute it */
220: switch (ndim) {
221: case 1:
222: #if defined(PETSC_USE_COMPLEX)
223: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
224: #else
225: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
226: #endif
227: break;
228: case 2:
229: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
230: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
231: #else
232: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
233: #endif
234: break;
235: case 3:
236: #if defined(PETSC_USE_COMPLEX)
237: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
238: #else
239: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
240: #endif
241: break;
242: default:
243: #if defined(PETSC_USE_COMPLEX)
244: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245: #else
246: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
247: #endif
248: break;
249: }
250: fftw->finarray = (PetscScalar *)x_array;
251: fftw->foutarray = y_array;
252: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
253: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
254: fftw_execute(fftw->p_forward);
255: } else { /* use existing plan */
256: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
257: fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
258: } else {
259: fftw_execute(fftw->p_forward);
260: }
261: }
262: PetscCall(VecRestoreArray(y, &y_array));
263: PetscCall(VecRestoreArrayRead(x, &x_array));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
268: {
269: Mat_FFT *fft = (Mat_FFT *)A->data;
270: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
271: const PetscScalar *x_array;
272: PetscScalar *y_array;
273: PetscInt ndim = fft->ndim, *dim = fft->dim;
274: MPI_Comm comm;
276: PetscFunctionBegin;
277: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
278: PetscCall(VecGetArrayRead(x, &x_array));
279: PetscCall(VecGetArray(y, &y_array));
280: if (!fftw->p_backward) { /* create a plan, then execute it */
281: switch (ndim) {
282: case 1:
283: #if defined(PETSC_USE_COMPLEX)
284: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
285: #else
286: SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
287: #endif
288: break;
289: case 2:
290: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
291: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
292: #else
293: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
294: #endif
295: break;
296: case 3:
297: #if defined(PETSC_USE_COMPLEX)
298: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
299: #else
300: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
301: #endif
302: break;
303: default:
304: #if defined(PETSC_USE_COMPLEX)
305: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
306: #else
307: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
308: #endif
309: break;
310: }
311: fftw->binarray = (PetscScalar *)x_array;
312: fftw->boutarray = y_array;
313: fftw_execute(fftw->p_backward);
314: } else { /* use existing plan */
315: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
316: fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
317: } else {
318: fftw_execute(fftw->p_backward);
319: }
320: }
321: PetscCall(VecRestoreArray(y, &y_array));
322: PetscCall(VecRestoreArrayRead(x, &x_array));
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
325: #endif
327: PetscErrorCode MatDestroy_FFTW(Mat A)
328: {
329: Mat_FFT *fft = (Mat_FFT *)A->data;
330: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
332: PetscFunctionBegin;
333: fftw_destroy_plan(fftw->p_forward);
334: fftw_destroy_plan(fftw->p_backward);
335: if (fftw->iodims) free(fftw->iodims);
336: PetscCall(PetscFree(fftw->dim_fftw));
337: PetscCall(PetscFree(fft->data));
338: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
339: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
340: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
341: #if !PetscDefined(HAVE_MPIUNI)
342: fftw_mpi_cleanup();
343: #endif
344: PetscFunctionReturn(PETSC_SUCCESS);
345: }
347: #if !PetscDefined(HAVE_MPIUNI)
348: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
349: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
350: {
351: PetscScalar *array;
353: PetscFunctionBegin;
354: PetscCall(VecGetArray(v, &array));
355: fftw_free((fftw_complex *)array);
356: PetscCall(VecRestoreArray(v, &array));
357: PetscCall(VecDestroy_MPI(v));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
360: #endif
362: #if !PetscDefined(HAVE_MPIUNI)
363: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
364: {
365: Mat A;
367: PetscFunctionBegin;
368: PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
369: PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
370: PetscFunctionReturn(PETSC_SUCCESS);
371: }
373: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
374: {
375: Mat A;
377: PetscFunctionBegin;
378: PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
379: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
380: PetscFunctionReturn(PETSC_SUCCESS);
381: }
383: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
384: {
385: Mat A;
387: PetscFunctionBegin;
388: PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
389: PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
392: #endif
394: /*@
395: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
396: parallel layout determined by `MATFFTW`
398: Collective
400: Input Parameter:
401: . A - the matrix
403: Output Parameters:
404: + x - (optional) input vector of forward FFTW
405: . y - (optional) output vector of forward FFTW
406: - z - (optional) output vector of backward FFTW
408: Options Database Key:
409: . -mat_fftw_plannerflags - set FFTW planner flags
411: Level: advanced
413: Notes:
414: The parallel layout of output of forward FFTW is always same as the input
415: of the backward FFTW. But parallel layout of the input vector of forward
416: FFTW might not be same as the output of backward FFTW.
418: We need to provide enough space while doing parallel real transform.
419: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
420: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
421: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
422: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
423: zeros if it is odd only one zero is needed.
425: Lastly one needs some scratch space at the end of data set in each process. alloc_local
426: figures out how much space is needed, i.e. it figures out the data+scratch space for
427: each processor and returns that.
429: Developer Note:
430: How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
432: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
433: @*/
434: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
435: {
436: PetscFunctionBegin;
437: PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
438: PetscFunctionReturn(PETSC_SUCCESS);
439: }
441: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
442: {
443: PetscMPIInt size, rank;
444: MPI_Comm comm;
445: Mat_FFT *fft = (Mat_FFT *)A->data;
447: PetscFunctionBegin;
450: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
452: PetscCallMPI(MPI_Comm_size(comm, &size));
453: PetscCallMPI(MPI_Comm_rank(comm, &rank));
454: if (size == 1) { /* sequential case */
455: #if defined(PETSC_USE_COMPLEX)
456: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
457: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
458: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
459: #else
460: if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
461: if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
462: if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
463: #endif
464: #if !PetscDefined(HAVE_MPIUNI)
465: } else { /* parallel cases */
466: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
467: PetscInt ndim = fft->ndim, *dim = fft->dim;
468: ptrdiff_t alloc_local, local_n0, local_0_start;
469: ptrdiff_t local_n1;
470: fftw_complex *data_fout;
471: ptrdiff_t local_1_start;
472: #if defined(PETSC_USE_COMPLEX)
473: fftw_complex *data_fin, *data_bout;
474: #else
475: double *data_finr, *data_boutr;
476: PetscInt n1, N1;
477: ptrdiff_t temp;
478: #endif
480: switch (ndim) {
481: case 1:
482: #if !defined(PETSC_USE_COMPLEX)
483: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
484: #else
485: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
486: if (fin) {
487: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
488: PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
489: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
490: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
491: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
492: }
493: if (fout) {
494: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
495: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
496: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
497: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
498: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
499: }
500: alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
501: if (bout) {
502: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
503: PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
504: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
505: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
506: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
507: }
508: break;
509: #endif
510: case 2:
511: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
512: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
513: N1 = 2 * dim[0] * (dim[1] / 2 + 1);
514: n1 = 2 * local_n0 * (dim[1] / 2 + 1);
515: if (fin) {
516: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
517: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
518: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
519: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
520: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
521: }
522: if (fout) {
523: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
525: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
526: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
527: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
528: }
529: if (bout) {
530: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
531: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
532: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
533: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
534: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
535: }
536: #else
537: /* Get local size */
538: alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
539: if (fin) {
540: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
541: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
542: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
543: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
544: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
545: }
546: if (fout) {
547: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
548: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
549: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
550: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
551: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
552: }
553: if (bout) {
554: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
555: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
556: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
557: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
558: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
559: }
560: #endif
561: break;
562: case 3:
563: #if !defined(PETSC_USE_COMPLEX)
564: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
565: N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
566: n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
567: if (fin) {
568: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
569: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
570: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
571: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
572: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
573: }
574: if (fout) {
575: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576: PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
577: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
578: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
579: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
580: }
581: if (bout) {
582: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
583: PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
584: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
585: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
586: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
587: }
588: #else
589: alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
590: if (fin) {
591: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
592: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
593: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
594: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
595: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
596: }
597: if (fout) {
598: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
599: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
600: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
601: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
602: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
603: }
604: if (bout) {
605: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
606: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
607: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
608: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
609: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
610: }
611: #endif
612: break;
613: default:
614: #if !defined(PETSC_USE_COMPLEX)
615: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
616: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
617: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
618: N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
619: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
621: if (fin) {
622: data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
623: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
624: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
625: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
626: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
627: }
628: if (fout) {
629: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
630: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
631: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
632: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
633: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
634: }
635: if (bout) {
636: data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
637: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
638: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
639: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
640: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
641: }
642: #else
643: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
644: if (fin) {
645: data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
646: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
647: PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
648: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
649: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
650: }
651: if (fout) {
652: data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
653: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
654: PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
655: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
656: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
657: }
658: if (bout) {
659: data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
660: PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
661: PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
662: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
663: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
664: }
665: #endif
666: break;
667: }
668: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
669: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
670: memory leaks. We void these pointers here to avoid accident uses.
671: */
672: if (fin) (*fin)->ops->replacearray = NULL;
673: if (fout) (*fout)->ops->replacearray = NULL;
674: if (bout) (*bout)->ops->replacearray = NULL;
675: #endif
676: }
677: PetscFunctionReturn(PETSC_SUCCESS);
678: }
680: /*@
681: VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
683: Collective
685: Input Parameters:
686: + A - FFTW matrix
687: - x - the PETSc vector
689: Output Parameter:
690: . y - the FFTW vector
692: Level: intermediate
694: Note:
695: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
696: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
697: zeros. This routine does that job by scattering operation.
699: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
700: @*/
701: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
702: {
703: PetscFunctionBegin;
704: PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
705: PetscFunctionReturn(PETSC_SUCCESS);
706: }
708: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
709: {
710: MPI_Comm comm;
711: Mat_FFT *fft = (Mat_FFT *)A->data;
712: PetscInt low;
713: PetscMPIInt rank, size;
714: PetscInt vsize, vsize1;
715: VecScatter vecscat;
716: IS list1;
718: PetscFunctionBegin;
719: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
720: PetscCallMPI(MPI_Comm_size(comm, &size));
721: PetscCallMPI(MPI_Comm_rank(comm, &rank));
722: PetscCall(VecGetOwnershipRange(y, &low, NULL));
724: if (size == 1) {
725: PetscCall(VecGetSize(x, &vsize));
726: PetscCall(VecGetSize(y, &vsize1));
727: PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
728: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
729: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
730: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
731: PetscCall(VecScatterDestroy(&vecscat));
732: PetscCall(ISDestroy(&list1));
733: #if !PetscDefined(HAVE_MPIUNI)
734: } else {
735: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
736: PetscInt ndim = fft->ndim, *dim = fft->dim;
737: ptrdiff_t local_n0, local_0_start;
738: ptrdiff_t local_n1, local_1_start;
739: IS list2;
740: #if !defined(PETSC_USE_COMPLEX)
741: PetscInt i, j, k, partial_dim;
742: PetscInt *indx1, *indx2, tempindx, tempindx1;
743: PetscInt NM;
744: ptrdiff_t temp;
745: #endif
747: switch (ndim) {
748: case 1:
749: #if defined(PETSC_USE_COMPLEX)
750: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
752: PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
753: PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
754: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
755: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
756: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
757: PetscCall(VecScatterDestroy(&vecscat));
758: PetscCall(ISDestroy(&list1));
759: PetscCall(ISDestroy(&list2));
760: #else
761: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
762: #endif
763: break;
764: case 2:
765: #if defined(PETSC_USE_COMPLEX)
766: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
768: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
769: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
770: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
771: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
773: PetscCall(VecScatterDestroy(&vecscat));
774: PetscCall(ISDestroy(&list1));
775: PetscCall(ISDestroy(&list2));
776: #else
777: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
779: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
780: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
782: if (dim[1] % 2 == 0) {
783: NM = dim[1] + 2;
784: } else {
785: NM = dim[1] + 1;
786: }
787: for (i = 0; i < local_n0; i++) {
788: for (j = 0; j < dim[1]; j++) {
789: tempindx = i * dim[1] + j;
790: tempindx1 = i * NM + j;
792: indx1[tempindx] = local_0_start * dim[1] + tempindx;
793: indx2[tempindx] = low + tempindx1;
794: }
795: }
797: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
798: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
800: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
801: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
802: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
803: PetscCall(VecScatterDestroy(&vecscat));
804: PetscCall(ISDestroy(&list1));
805: PetscCall(ISDestroy(&list2));
806: PetscCall(PetscFree(indx1));
807: PetscCall(PetscFree(indx2));
808: #endif
809: break;
811: case 3:
812: #if defined(PETSC_USE_COMPLEX)
813: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
815: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
816: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
817: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
818: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
819: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
820: PetscCall(VecScatterDestroy(&vecscat));
821: PetscCall(ISDestroy(&list1));
822: PetscCall(ISDestroy(&list2));
823: #else
824: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
825: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
826: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
828: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
829: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
831: if (dim[2] % 2 == 0) NM = dim[2] + 2;
832: else NM = dim[2] + 1;
834: for (i = 0; i < local_n0; i++) {
835: for (j = 0; j < dim[1]; j++) {
836: for (k = 0; k < dim[2]; k++) {
837: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
838: tempindx1 = i * dim[1] * NM + j * NM + k;
840: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
841: indx2[tempindx] = low + tempindx1;
842: }
843: }
844: }
846: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
847: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
848: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
849: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
850: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
851: PetscCall(VecScatterDestroy(&vecscat));
852: PetscCall(ISDestroy(&list1));
853: PetscCall(ISDestroy(&list2));
854: PetscCall(PetscFree(indx1));
855: PetscCall(PetscFree(indx2));
856: #endif
857: break;
859: default:
860: #if defined(PETSC_USE_COMPLEX)
861: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
863: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
864: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
865: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
866: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
867: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
868: PetscCall(VecScatterDestroy(&vecscat));
869: PetscCall(ISDestroy(&list1));
870: PetscCall(ISDestroy(&list2));
871: #else
872: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
873: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
874: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
876: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
878: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
880: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
882: partial_dim = fftw->partial_dim;
884: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
885: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
887: if (dim[ndim - 1] % 2 == 0) NM = 2;
888: else NM = 1;
890: j = low;
891: for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
892: indx1[i] = local_0_start * partial_dim + i;
893: indx2[i] = j;
894: if (k % dim[ndim - 1] == 0) j += NM;
895: j++;
896: }
897: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
898: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
899: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
900: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
901: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902: PetscCall(VecScatterDestroy(&vecscat));
903: PetscCall(ISDestroy(&list1));
904: PetscCall(ISDestroy(&list2));
905: PetscCall(PetscFree(indx1));
906: PetscCall(PetscFree(indx2));
907: #endif
908: break;
909: }
910: #endif
911: }
912: PetscFunctionReturn(PETSC_SUCCESS);
913: }
915: /*@
916: VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
918: Collective
920: Input Parameters:
921: + A - `MATFFTW` matrix
922: - x - FFTW vector
924: Output Parameter:
925: . y - PETSc vector
927: Level: intermediate
929: Note:
930: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
931: `VecScatterFFTWToPetsc()` removes those extra zeros.
933: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
934: @*/
935: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
936: {
937: PetscFunctionBegin;
938: PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
939: PetscFunctionReturn(PETSC_SUCCESS);
940: }
942: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
943: {
944: MPI_Comm comm;
945: Mat_FFT *fft = (Mat_FFT *)A->data;
946: PetscInt low;
947: PetscMPIInt rank, size;
948: VecScatter vecscat;
949: IS list1;
951: PetscFunctionBegin;
952: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
953: PetscCallMPI(MPI_Comm_size(comm, &size));
954: PetscCallMPI(MPI_Comm_rank(comm, &rank));
955: PetscCall(VecGetOwnershipRange(x, &low, NULL));
957: if (size == 1) {
958: PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
959: PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
960: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
961: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
962: PetscCall(VecScatterDestroy(&vecscat));
963: PetscCall(ISDestroy(&list1));
965: #if !PetscDefined(HAVE_MPIUNI)
966: } else {
967: Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
968: PetscInt ndim = fft->ndim, *dim = fft->dim;
969: ptrdiff_t local_n0, local_0_start;
970: ptrdiff_t local_n1, local_1_start;
971: IS list2;
972: #if !defined(PETSC_USE_COMPLEX)
973: PetscInt i, j, k, partial_dim;
974: PetscInt *indx1, *indx2, tempindx, tempindx1;
975: PetscInt NM;
976: ptrdiff_t temp;
977: #endif
978: switch (ndim) {
979: case 1:
980: #if defined(PETSC_USE_COMPLEX)
981: fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
983: PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
984: PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
985: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
986: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
987: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
988: PetscCall(VecScatterDestroy(&vecscat));
989: PetscCall(ISDestroy(&list1));
990: PetscCall(ISDestroy(&list2));
991: #else
992: SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
993: #endif
994: break;
995: case 2:
996: #if defined(PETSC_USE_COMPLEX)
997: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
999: PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1000: PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1001: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1002: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1003: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1004: PetscCall(VecScatterDestroy(&vecscat));
1005: PetscCall(ISDestroy(&list1));
1006: PetscCall(ISDestroy(&list2));
1007: #else
1008: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1010: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
1011: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
1013: if (dim[1] % 2 == 0) NM = dim[1] + 2;
1014: else NM = dim[1] + 1;
1016: for (i = 0; i < local_n0; i++) {
1017: for (j = 0; j < dim[1]; j++) {
1018: tempindx = i * dim[1] + j;
1019: tempindx1 = i * NM + j;
1021: indx1[tempindx] = local_0_start * dim[1] + tempindx;
1022: indx2[tempindx] = low + tempindx1;
1023: }
1024: }
1026: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1027: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
1029: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1030: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1031: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1032: PetscCall(VecScatterDestroy(&vecscat));
1033: PetscCall(ISDestroy(&list1));
1034: PetscCall(ISDestroy(&list2));
1035: PetscCall(PetscFree(indx1));
1036: PetscCall(PetscFree(indx2));
1037: #endif
1038: break;
1039: case 3:
1040: #if defined(PETSC_USE_COMPLEX)
1041: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1043: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1044: PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1045: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1046: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1047: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1048: PetscCall(VecScatterDestroy(&vecscat));
1049: PetscCall(ISDestroy(&list1));
1050: PetscCall(ISDestroy(&list2));
1051: #else
1052: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1054: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
1055: PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
1057: if (dim[2] % 2 == 0) NM = dim[2] + 2;
1058: else NM = dim[2] + 1;
1060: for (i = 0; i < local_n0; i++) {
1061: for (j = 0; j < dim[1]; j++) {
1062: for (k = 0; k < dim[2]; k++) {
1063: tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
1064: tempindx1 = i * dim[1] * NM + j * NM + k;
1066: indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1067: indx2[tempindx] = low + tempindx1;
1068: }
1069: }
1070: }
1072: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1073: PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
1075: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1076: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1077: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1078: PetscCall(VecScatterDestroy(&vecscat));
1079: PetscCall(ISDestroy(&list1));
1080: PetscCall(ISDestroy(&list2));
1081: PetscCall(PetscFree(indx1));
1082: PetscCall(PetscFree(indx2));
1083: #endif
1084: break;
1085: default:
1086: #if defined(PETSC_USE_COMPLEX)
1087: fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1089: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1090: PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1091: PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1092: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1093: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1094: PetscCall(VecScatterDestroy(&vecscat));
1095: PetscCall(ISDestroy(&list1));
1096: PetscCall(ISDestroy(&list2));
1097: #else
1098: temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1100: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1102: fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1104: (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1106: partial_dim = fftw->partial_dim;
1108: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
1109: PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1111: if (dim[ndim - 1] % 2 == 0) NM = 2;
1112: else NM = 1;
1114: j = low;
1115: for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1116: indx1[i] = local_0_start * partial_dim + i;
1117: indx2[i] = j;
1118: if (k % dim[ndim - 1] == 0) j += NM;
1119: j++;
1120: }
1121: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1122: PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1124: PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1125: PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1126: PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1127: PetscCall(VecScatterDestroy(&vecscat));
1128: PetscCall(ISDestroy(&list1));
1129: PetscCall(ISDestroy(&list2));
1130: PetscCall(PetscFree(indx1));
1131: PetscCall(PetscFree(indx2));
1132: #endif
1133: break;
1134: }
1135: #endif
1136: }
1137: PetscFunctionReturn(PETSC_SUCCESS);
1138: }
1140: /*MC
1141: MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1143: Level: intermediate
1145: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1146: M*/
1147: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1148: {
1149: MPI_Comm comm;
1150: Mat_FFT *fft = (Mat_FFT *)A->data;
1151: Mat_FFTW *fftw;
1152: PetscInt ndim = fft->ndim, *dim = fft->dim;
1153: const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1154: unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1155: PetscBool flg;
1156: PetscInt p_flag, partial_dim = 1, ctr;
1157: PetscMPIInt size, rank;
1158: ptrdiff_t *pdim;
1159: #if !defined(PETSC_USE_COMPLEX)
1160: PetscInt tot_dim;
1161: #endif
1163: PetscFunctionBegin;
1164: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1165: PetscCallMPI(MPI_Comm_size(comm, &size));
1166: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1168: #if !PetscDefined(HAVE_MPIUNI)
1169: fftw_mpi_init();
1170: #endif
1171: pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1172: pdim[0] = dim[0];
1173: #if !defined(PETSC_USE_COMPLEX)
1174: tot_dim = 2 * dim[0];
1175: #endif
1176: for (ctr = 1; ctr < ndim; ctr++) {
1177: partial_dim *= dim[ctr];
1178: pdim[ctr] = dim[ctr];
1179: #if !defined(PETSC_USE_COMPLEX)
1180: if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1181: else tot_dim *= dim[ctr];
1182: #endif
1183: }
1185: if (size == 1) {
1186: #if defined(PETSC_USE_COMPLEX)
1187: PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1188: fft->n = fft->N;
1189: #else
1190: PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1191: fft->n = tot_dim;
1192: #endif
1193: #if !PetscDefined(HAVE_MPIUNI)
1194: } else {
1195: ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1196: #if !defined(PETSC_USE_COMPLEX)
1197: ptrdiff_t temp;
1198: PetscInt N1;
1199: #endif
1201: switch (ndim) {
1202: case 1:
1203: #if !defined(PETSC_USE_COMPLEX)
1204: SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1205: #else
1206: fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1207: fft->n = (PetscInt)local_n0;
1208: PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1209: #endif
1210: break;
1211: case 2:
1212: #if defined(PETSC_USE_COMPLEX)
1213: fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1214: fft->n = (PetscInt)local_n0 * dim[1];
1215: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1216: #else
1217: fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1219: fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1220: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1221: #endif
1222: break;
1223: case 3:
1224: #if defined(PETSC_USE_COMPLEX)
1225: fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1227: fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1228: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1229: #else
1230: fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1232: fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1233: PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1234: #endif
1235: break;
1236: default:
1237: #if defined(PETSC_USE_COMPLEX)
1238: fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1240: fft->n = (PetscInt)local_n0 * partial_dim;
1241: PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1242: #else
1243: temp = pdim[ndim - 1];
1245: pdim[ndim - 1] = temp / 2 + 1;
1247: fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1249: fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1250: N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1252: pdim[ndim - 1] = temp;
1254: PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1255: #endif
1256: break;
1257: }
1258: #endif
1259: }
1260: free(pdim);
1261: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1262: PetscCall(PetscNew(&fftw));
1263: fft->data = (void *)fftw;
1265: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1266: fftw->partial_dim = partial_dim;
1268: PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1269: if (size == 1) {
1270: #if defined(PETSC_USE_64BIT_INDICES)
1271: fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1272: #else
1273: fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1274: #endif
1275: }
1277: for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1279: fftw->p_forward = NULL;
1280: fftw->p_backward = NULL;
1281: fftw->p_flag = FFTW_ESTIMATE;
1283: if (size == 1) {
1284: A->ops->mult = MatMult_SeqFFTW;
1285: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1286: #if !PetscDefined(HAVE_MPIUNI)
1287: } else {
1288: A->ops->mult = MatMult_MPIFFTW;
1289: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1290: #endif
1291: }
1292: fft->matdestroy = MatDestroy_FFTW;
1293: A->assembled = PETSC_TRUE;
1294: A->preallocated = PETSC_TRUE;
1296: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1297: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1298: PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1300: /* get runtime options */
1301: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1302: PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1303: if (flg) fftw->p_flag = iplans[p_flag];
1304: PetscOptionsEnd();
1305: PetscFunctionReturn(PETSC_SUCCESS);
1306: }