Actual source code: petsclandau.h

  1: #ifndef PETSCLANDAU_H
  2: #define PETSCLANDAU_H

  4: #include <petscdmplex.h>
  5: #include <petscts.h>

  7: PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt);
  8: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm, PetscInt, const char[], Vec *, Mat *, DM *);
  9: PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM *);
 10: PETSC_EXTERN PetscErrorCode DMPlexLandauAccess(DM, Vec, PetscErrorCode (*)(DM, Vec, PetscInt, PetscInt, PetscInt, void *), void *);
 11: PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, PetscInt, void *);
 12: PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat);
 13: PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal, Vec, Vec, Vec, void *);
 14: PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal, Vec, Vec, PetscReal, Mat, Mat, void *);

 16: typedef int LandauIdx;

 18: /* the Fokker-Planck-Landau context */
 19: #if !defined(LANDAU_MAX_SPECIES)
 20:   #if defined(PETSC_USE_DMLANDAU_2D)
 21:     #define LANDAU_MAX_SPECIES 10
 22:     #define LANDAU_MAX_GRIDS   3
 23:   #else
 24:     #define LANDAU_MAX_SPECIES 10
 25:     #define LANDAU_MAX_GRIDS   3
 26:   #endif
 27: #else
 28:   #define LANDAU_MAX_GRIDS 3
 29: #endif

 31: #if !defined(LANDAU_MAX_Q)
 32:   #if defined(LANDAU_MAX_NQ)
 33:     #error "LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
 34:   #endif
 35:   #if defined(PETSC_USE_DMLANDAU_2D)
 36:     #define LANDAU_MAX_Q 6
 37:   #else
 38:     // 3D CUDA fails with > 3 (KK-CUDA is OK)
 39:     #define LANDAU_MAX_Q 4
 40:   #endif
 41: #else
 42:   #undef LANDAU_MAX_NQ
 43: #endif

 45: #if defined(PETSC_USE_DMLANDAU_2D)
 46:   #define LANDAU_MAX_Q_FACE   LANDAU_MAX_Q
 47:   #define LANDAU_MAX_NQ       (LANDAU_MAX_Q * LANDAU_MAX_Q)
 48:   #define LANDAU_MAX_BATCH_SZ 1024
 49:   #define LANDAU_DIM          2
 50: #else
 51:   #define LANDAU_MAX_Q_FACE   (LANDAU_MAX_Q * LANDAU_MAX_Q)
 52:   #define LANDAU_MAX_NQ       (LANDAU_MAX_Q * LANDAU_MAX_Q * LANDAU_MAX_Q)
 53:   #define LANDAU_MAX_BATCH_SZ 64
 54:   #define LANDAU_DIM          3
 55: #endif

 57: typedef enum {
 58:   LANDAU_CUDA,
 59:   LANDAU_KOKKOS,
 60:   LANDAU_CPU
 61: } LandauDeviceType;

 63: // static data - will be "device" data
 64: typedef struct {
 65:   void *invJ;    // nip*dim*dim
 66:   void *D;       // nq*nb*dim
 67:   void *B;       // nq*nb
 68:   void *alpha;   // ns
 69:   void *beta;    // ns
 70:   void *invMass; // ns
 71:   void *w;       // nip
 72:   void *x;       // nip
 73:   void *y;       // nip
 74:   void *z;       // nip
 75:   void *Eq_m;    // ns - dynamic
 76:   void *f;       //  nip*Nf - dynamic (IP)
 77:   void *dfdx;    // nip*Nf - dynamic (IP)
 78:   void *dfdy;    // nip*Nf - dynamic (IP)
 79:   void *dfdz;    // nip*Nf - dynamic (IP)
 80:   int   dim_, ns_, nip_, nq_, nb_;
 81:   void *NCells;         // remove and use elem_offset - TODO
 82:   void *species_offset; // for each grid, but same for all batched vertices
 83:   void *mat_offset;     // for each grid, but same for all batched vertices
 84:   void *elem_offset;    // for each grid, but same for all batched vertices
 85:   void *ip_offset;      // for each grid, but same for all batched vertices
 86:   void *ipf_offset;     // for each grid, but same for all batched vertices
 87:   void *ipfdf_data;     // for each grid, but same for all batched vertices
 88:   void *maps;           // for each grid, but same for all batched vertices
 89:   // COO
 90:   void     *coo_elem_offsets;
 91:   void     *coo_elem_point_offsets;
 92:   void     *coo_elem_fullNb;
 93:   void     *coo_vals;
 94:   void     *lambdas;
 95:   LandauIdx coo_n_cellsTot;
 96:   LandauIdx coo_size;
 97:   LandauIdx coo_max_fullnb;
 98: } LandauStaticData;

100: typedef enum {
101:   LANDAU_EX2_TSSOLVE,
102:   LANDAU_MATRIX_TOTAL,
103:   LANDAU_OPERATOR,
104:   LANDAU_JACOBIAN_COUNT,
105:   LANDAU_JACOBIAN,
106:   LANDAU_MASS,
107:   LANDAU_F_DF,
108:   LANDAU_KERNEL,
109:   KSP_FACTOR,
110:   KSP_SOLVE,
111:   LANDAU_NUM_TIMERS
112: } LandauOMPTimers;

114: typedef struct {
115:   PetscBool interpolate; /* Generate intermediate mesh elements */
116:   PetscBool gpu_assembly;
117:   MPI_Comm  comm; /* global communicator to use for errors and diagnostics */
118:   double    times[LANDAU_NUM_TIMERS];
119:   PetscBool use_matrix_mass;
120:   /* FE */
121:   PetscFE fe[LANDAU_MAX_SPECIES];
122:   /* geometry  */
123:   PetscReal radius[LANDAU_MAX_GRIDS];
124:   PetscReal radius_par[LANDAU_MAX_GRIDS];
125:   PetscReal radius_perp[LANDAU_MAX_GRIDS];
126:   PetscReal re_radius;      /* RE: radius of refinement along v_perp=0, z>0 */
127:   PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */
128:   PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
129:   PetscBool sphere;
130:   PetscReal sphere_inner_radius_90degree;
131:   PetscReal sphere_inner_radius_45degree;
132:   PetscInt  cells0[3];
133:   /* AMR */
134:   PetscBool use_p4est;
135:   PetscInt  numRERefine;                     /* RE: refinement along v_perp=0, z > 0 */
136:   PetscInt  nZRefine1;                       /* RE: origin refinement after v_perp=0 refinement */
137:   PetscInt  nZRefine2;                       /* RE: origin refinement after origin AMR refinement */
138:   PetscInt  numAMRRefine[LANDAU_MAX_GRIDS];  /* normal AMR - refine from origin */
139:   PetscInt  postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
140:   /* relativistic */
141:   PetscBool use_energy_tensor_trick;
142:   PetscBool use_relativistic_corrections;
143:   /* physics */
144:   PetscReal thermal_temps[LANDAU_MAX_SPECIES];
145:   PetscReal masses[LANDAU_MAX_SPECIES];  /* mass of each species  */
146:   PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species  */
147:   PetscReal n[LANDAU_MAX_SPECIES];       /* number density of each species  */
148:   PetscReal m_0;                         /* reference mass */
149:   PetscReal v_0;                         /* reference velocity */
150:   PetscReal n_0;                         /* reference number density */
151:   PetscReal t_0;                         /* reference time */
152:   PetscReal Ez;
153:   PetscReal epsilon0;
154:   PetscReal k;
155:   PetscReal lambdas[LANDAU_MAX_GRIDS][LANDAU_MAX_GRIDS];
156:   PetscReal electronShift;
157:   PetscInt  num_species;
158:   PetscInt  num_grids;
159:   PetscInt  species_offset[LANDAU_MAX_GRIDS + 1]; // for each grid, but same for all batched vertices
160:   PetscInt  mat_offset[LANDAU_MAX_GRIDS + 1];     // for each grid, but same for all batched vertices
161:   // batching
162:   PetscBool  jacobian_field_major_order; // this could be a type but lets not get pedantic
163:   VecScatter plex_batch;
164:   Vec        work_vec;
165:   IS         batch_is;
166:   PetscErrorCode (*seqaij_mult)(Mat, Vec, Vec);
167:   PetscErrorCode (*seqaij_multtranspose)(Mat, Vec, Vec);
168:   PetscErrorCode (*seqaij_solve)(Mat, Vec, Vec);
169:   PetscErrorCode (*seqaij_getdiagonal)(Mat, Vec);
170:   /* COO */
171:   PetscBool coo_assembly;
172:   /* cache */
173:   Mat J;
174:   Mat M;
175:   Vec X;
176:   /* derived type */
177:   void *data;
178:   /* computing */
179:   LandauDeviceType deviceType;
180:   DM               pack;
181:   DM               plex[LANDAU_MAX_GRIDS];
182:   LandauStaticData SData_d; /* static geometric data on device */
183:   /* diagnostics */
184:   PetscInt         verbose;
185:   PetscLogEvent    events[20];
186:   PetscLogStage    stage;
187:   PetscObjectState norm_state;
188:   PetscInt         batch_sz;
189:   PetscInt         batch_view_idx;
190: } LandauCtx;

192: #define LANDAU_SPECIES_MAJOR
193: #if !defined(LANDAU_SPECIES_MAJOR)
194:   #define LAND_PACK_IDX(_b, _g)                         (_b * ctx->num_grids + _g)
195:   #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_b * _mat_off[_ngrid] + _mat_off[_g])
196: #else
197:   #define LAND_PACK_IDX(_b, _g)                         (_g * ctx->batch_sz + _b)
198:   #define LAND_MOFFSET(_b, _g, _nbch, _ngrid, _mat_off) (_nbch * _mat_off[_g] + _b * (_mat_off[_g + 1] - _mat_off[_g]))
199: #endif

201: typedef struct {
202:   PetscReal scale;
203:   LandauIdx gid; // Landau matrix index (<10,000)
204: } pointInterpolationP4est;
205: typedef struct _lP4estVertexMaps {
206:   LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems *  LANDAU_MAX_NQ (spoof for max , Nb) on device,
207:   LandauIdx        num_elements;
208:   LandauIdx        num_reduced;
209:   LandauIdx        num_face; // (Q or Q^2 for 3D)
210:   LandauDeviceType deviceType;
211:   PetscInt         Nf;
212:   pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
213:   struct _lP4estVertexMaps *d_self;
214:   void                     *vp1, *vp2, *vp3;
215:   PetscInt                  numgrids;
216: } P4estVertexMaps;

218: PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *);
219: #if defined(PETSC_HAVE_CUDA)
220: PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
221: PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
222: PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt);
223: PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
224: PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *);
225: #endif
226: #if defined(PETSC_HAVE_KOKKOS)
227: PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
228: PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
229: PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
230: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], LandauStaticData *);
231: PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData *);
232: #endif

234: #endif /* PETSCLANDAU_H */