Actual source code: petsclandau.h
1: #ifndef PETSCLANDAU_H
2: #define PETSCLANDAU_H
4: #include <petscdmplex.h>
5: #include <petscts.h>
7: PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt);
8: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm, PetscInt, const char[], Vec *, Mat *, DM *);
9: PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM *);
10: PETSC_EXTERN PetscErrorCode DMPlexLandauAccess(DM, Vec, PetscErrorCode (*)(DM, Vec, PetscInt, PetscInt, PetscInt, void *), void *);
11: PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, PetscInt, void *);
12: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat);
13: PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal, Vec, Vec, Vec, void *);
14: PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal, Vec, Vec, PetscReal, Mat, Mat, void *);
16: typedef int LandauIdx;
18: /* the Fokker-Planck-Landau context */
19: #if !defined(LANDAU_MAX_SPECIES)
20: #if defined(PETSC_USE_DMLANDAU_2D)
21: #define LANDAU_MAX_SPECIES 10
22: #define LANDAU_MAX_GRIDS 3
23: #else
24: #define LANDAU_MAX_SPECIES 10
25: #define LANDAU_MAX_GRIDS 3
26: #endif
27: #else
28: #define LANDAU_MAX_GRIDS 3
29: #endif
31: #if !defined(LANDAU_MAX_Q)
32: #if defined(LANDAU_MAX_NQ)
33: #error "LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
34: #endif
35: #if defined(PETSC_USE_DMLANDAU_2D)
36: #define LANDAU_MAX_Q 6
37: #else
38: // 3D CUDA fails with > 3 (KK-CUDA is OK)
39: #define LANDAU_MAX_Q 4
40: #endif
41: #else
42: #undef LANDAU_MAX_NQ
43: #endif
45: #if defined(PETSC_USE_DMLANDAU_2D)
46: #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q
47: #define LANDAU_MAX_NQ (LANDAU_MAX_Q * LANDAU_MAX_Q)
48: #define LANDAU_MAX_BATCH_SZ 1024
49: #define LANDAU_DIM 2
50: #else
51: #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q * LANDAU_MAX_Q)
52: #define LANDAU_MAX_NQ (LANDAU_MAX_Q * LANDAU_MAX_Q * LANDAU_MAX_Q)
53: #define LANDAU_MAX_BATCH_SZ 64
54: #define LANDAU_DIM 3
55: #endif
57: typedef enum {
58: LANDAU_CUDA,
59: LANDAU_KOKKOS,
60: LANDAU_CPU
61: } LandauDeviceType;
63: // static data - will be "device" data
64: typedef struct {
65: void *invJ; // nip*dim*dim
66: void *D; // nq*nb*dim
67: void *B; // nq*nb
68: void *alpha; // ns
69: void *beta; // ns
70: void *invMass; // ns
71: void *w; // nip
72: void *x; // nip
73: void *y; // nip
74: void *z; // nip
75: void *Eq_m; // ns - dynamic
76: void *f; // nip*Nf - dynamic (IP)
77: void *dfdx; // nip*Nf - dynamic (IP)
78: void *dfdy; // nip*Nf - dynamic (IP)
79: void *dfdz; // nip*Nf - dynamic (IP)
80: int dim_, ns_, nip_, nq_, nb_;
81: void *NCells; // remove and use elem_offset - TODO
82: void *species_offset; // for each grid, but same for all batched vertices
83: void *mat_offset; // for each grid, but same for all batched vertices
84: void *elem_offset; // for each grid, but same for all batched vertices
85: void *ip_offset; // for each grid, but same for all batched vertices
86: void *ipf_offset; // for each grid, but same for all batched vertices
87: void *ipfdf_data; // for each grid, but same for all batched vertices
88: void *maps; // for each grid, but same for all batched vertices
89: // COO
90: void *coo_elem_offsets;
91: void *coo_elem_point_offsets;
92: void *coo_elem_fullNb;
93: void *coo_vals;
94: void *lambdas;
95: LandauIdx coo_n_cellsTot;
96: LandauIdx coo_size;
97: LandauIdx coo_max_fullnb;
98: } LandauStaticData;
100: typedef enum {
101: LANDAU_EX2_TSSOLVE,
102: LANDAU_MATRIX_TOTAL,
103: LANDAU_OPERATOR,
104: LANDAU_JACOBIAN_COUNT,
105: LANDAU_JACOBIAN,
106: LANDAU_MASS,
107: LANDAU_F_DF,
108: LANDAU_KERNEL,
109: KSP_FACTOR,
110: KSP_SOLVE,
111: LANDAU_NUM_TIMERS
112: } LandauOMPTimers;
114: typedef struct {
115: PetscBool interpolate; /* Generate intermediate mesh elements */
116: PetscBool gpu_assembly;
117: MPI_Comm comm; /* global communicator to use for errors and diagnostics */
118: double times[LANDAU_NUM_TIMERS];
119: PetscBool use_matrix_mass;
120: /* FE */
121: PetscFE fe[LANDAU_MAX_SPECIES];
122: /* geometry */
123: PetscReal radius[LANDAU_MAX_GRIDS];
124: PetscReal radius_par[LANDAU_MAX_GRIDS];
125: PetscReal radius_perp[LANDAU_MAX_GRIDS];
126: PetscReal re_radius; /* RE: radius of refinement along v_perp=0, z>0 */
127: PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */
128: PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
129: PetscBool sphere;
130: PetscReal sphere_inner_radius_90degree;
131: PetscReal sphere_inner_radius_45degree;
132: PetscInt cells0[3];
133: /* AMR */
134: PetscBool use_p4est;
135: PetscInt numRERefine; /* RE: refinement along v_perp=0, z > 0 */
136: PetscInt nZRefine1; /* RE: origin refinement after v_perp=0 refinement */
137: PetscInt nZRefine2; /* RE: origin refinement after origin AMR refinement */
138: PetscInt numAMRRefine[LANDAU_MAX_GRIDS]; /* normal AMR - refine from origin */
139: PetscInt postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
140: /* relativistic */
141: PetscBool use_energy_tensor_trick;
142: PetscBool use_relativistic_corrections;
143: /* physics */
144: PetscReal thermal_temps[LANDAU_MAX_SPECIES];
145: PetscReal masses[LANDAU_MAX_SPECIES]; /* mass of each species */
146: PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species */
147: PetscReal n[LANDAU_MAX_SPECIES]; /* number density of each species */
148: PetscReal m_0; /* reference mass */
149: PetscReal v_0; /* reference velocity */
150: PetscReal n_0; /* reference number density */
151: PetscReal t_0; /* reference time */
152: PetscReal Ez;
153: PetscReal epsilon0;
154: PetscReal k;
155: PetscReal lambdas[LANDAU_MAX_GRIDS][LANDAU_MAX_GRIDS];
156: PetscReal electronShift;
157: PetscInt num_species;
158: PetscInt num_grids;
159: PetscInt species_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
160: PetscInt mat_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
161: // batching
162: PetscBool jacobian_field_major_order; // this could be a type but lets not get pedantic
163: VecScatter plex_batch;
164: Vec work_vec;
165: IS batch_is;
166: PetscErrorCode (*seqaij_mult)(Mat, Vec, Vec);
167: PetscErrorCode (*seqaij_multtranspose)(Mat, Vec, Vec);
168: PetscErrorCode (*seqaij_solve)(Mat, Vec, Vec);
169: PetscErrorCode (*seqaij_getdiagonal)(Mat, Vec);
170: /* COO */
171: PetscBool coo_assembly;
172: /* cache */
173: Mat J;
174: Mat M;
175: Vec X;
176: /* derived type */
177: void *data;
178: /* computing */
179: LandauDeviceType deviceType;
180: DM pack;
181: DM plex[LANDAU_MAX_GRIDS];
182: LandauStaticData SData_d; /* static geometric data on device */
183: /* diagnostics */
184: PetscInt verbose;
185: PetscLogEvent events[20];
186: PetscLogStage stage;
187: PetscObjectState norm_state;
188: PetscInt batch_sz;
189: PetscInt batch_view_idx;
190: } LandauCtx;
192: #define LANDAU_SPECIES_MAJOR
193: #if !defined(LANDAU_SPECIES_MAJOR)
194: #define LAND_PACK_IDX(_b, _g) (_b * ctx->num_grids + _g)
195: #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_b * _mat_off[_ngrid] + _mat_off[_g])
196: #else
197: #define LAND_PACK_IDX(_b, _g) (_g * ctx->batch_sz + _b)
198: #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_nbch * _mat_off[_g] + _b * (_mat_off[_g + 1] - _mat_off[_g]))
199: #endif
201: typedef struct {
202: PetscReal scale;
203: LandauIdx gid; // Landau matrix index (<10,000)
204: } pointInterpolationP4est;
205: typedef struct _lP4estVertexMaps {
206: LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems * LANDAU_MAX_NQ (spoof for max , Nb) on device,
207: LandauIdx num_elements;
208: LandauIdx num_reduced;
209: LandauIdx num_face; // (Q or Q^2 for 3D)
210: LandauDeviceType deviceType;
211: PetscInt Nf;
212: pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
213: struct _lP4estVertexMaps *d_self;
214: void *vp1, *vp2, *vp3;
215: PetscInt numgrids;
216: } P4estVertexMaps;
218: PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *);
219: #if defined(PETSC_HAVE_CUDA)
220: PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
221: PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
222: PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt);
223: PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
224: PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *);
225: #endif
226: #if defined(PETSC_HAVE_KOKKOS)
227: PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
228: PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
229: PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
230: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
231: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData *);
232: #endif
234: #endif /* PETSCLANDAU_H */