Actual source code: mkl_pardiso.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
5: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
6: #define MKL_ILP64
7: #endif
8: #include <mkl_pardiso.h>
10: PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int);
12: /*
13: * Possible mkl_pardiso phases that controls the execution of the solver.
14: * For more information check mkl_pardiso manual.
15: */
16: #define JOB_ANALYSIS 11
17: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
18: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
19: #define JOB_NUMERICAL_FACTORIZATION 22
20: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
21: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
22: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
23: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
24: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
25: #define JOB_RELEASE_OF_LU_MEMORY 0
26: #define JOB_RELEASE_OF_ALL_MEMORY -1
28: #define IPARM_SIZE 64
30: #if defined(PETSC_USE_64BIT_INDICES)
31: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
32: #define INT_TYPE long long int
33: #define MKL_PARDISO pardiso
34: #define MKL_PARDISO_INIT pardisoinit
35: #else
36: /* this is the case where the MKL BLAS/LAPACK are 32-bit integers but the 64-bit integer version of
37: of Pardiso code is used; hence the need for the 64 below*/
38: #define INT_TYPE long long int
39: #define MKL_PARDISO pardiso_64
40: #define MKL_PARDISO_INIT pardiso_64init
41: void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm[])
42: {
43: int iparm_copy[IPARM_SIZE], mtype_copy, i;
45: mtype_copy = *mtype;
46: pardisoinit(pt, &mtype_copy, iparm_copy);
47: for (i = 0; i < IPARM_SIZE; i++) iparm[i] = iparm_copy[i];
48: }
49: #endif
50: #else
51: #define INT_TYPE int
52: #define MKL_PARDISO pardiso
53: #define MKL_PARDISO_INIT pardisoinit
54: #endif
56: /*
57: * Internal data structure.
58: * For more information check mkl_pardiso manual.
59: */
60: typedef struct {
61: /* Configuration vector*/
62: INT_TYPE iparm[IPARM_SIZE];
64: /*
65: * Internal mkl_pardiso memory location.
66: * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak.
67: */
68: void *pt[IPARM_SIZE];
70: /* Basic mkl_pardiso info*/
71: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
73: /* Matrix structure*/
74: void *a;
75: INT_TYPE *ia, *ja;
77: /* Number of non-zero elements*/
78: INT_TYPE nz;
80: /* Row permutaton vector*/
81: INT_TYPE *perm;
83: /* Define if matrix preserves sparse structure.*/
84: MatStructure matstruc;
86: PetscBool needsym;
87: PetscBool freeaij;
89: /* Schur complement */
90: PetscScalar *schur;
91: PetscInt schur_size;
92: PetscInt *schur_idxs;
93: PetscScalar *schur_work;
94: PetscBLASInt schur_work_size;
95: PetscBool solve_interior;
97: /* True if mkl_pardiso function have been used.*/
98: PetscBool CleanUp;
100: /* Conversion to a format suitable for MKL */
101: PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool *, INT_TYPE *, INT_TYPE **, INT_TYPE **, PetscScalar **);
102: } Mat_MKL_PARDISO;
104: PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v)
105: {
106: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
107: PetscInt bs = A->rmap->bs, i;
109: PetscFunctionBegin;
110: PetscCheck(sym, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "This should not happen");
111: *v = aa->a;
112: if (bs == 1) { /* already in the correct format */
113: /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */
114: *r = (INT_TYPE *)aa->i;
115: *c = (INT_TYPE *)aa->j;
116: *nnz = (INT_TYPE)aa->nz;
117: *free = PETSC_FALSE;
118: } else if (reuse == MAT_INITIAL_MATRIX) {
119: PetscInt m = A->rmap->n, nz = aa->nz;
120: PetscInt *row, *col;
121: PetscCall(PetscMalloc2(m + 1, &row, nz, &col));
122: for (i = 0; i < m + 1; i++) row[i] = aa->i[i] + 1;
123: for (i = 0; i < nz; i++) col[i] = aa->j[i] + 1;
124: *r = (INT_TYPE *)row;
125: *c = (INT_TYPE *)col;
126: *nnz = (INT_TYPE)nz;
127: *free = PETSC_TRUE;
128: }
129: PetscFunctionReturn(PETSC_SUCCESS);
130: }
132: PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v)
133: {
134: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
135: PetscInt bs = A->rmap->bs, i;
137: PetscFunctionBegin;
138: if (!sym) {
139: *v = aa->a;
140: if (bs == 1) { /* already in the correct format */
141: /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */
142: *r = (INT_TYPE *)aa->i;
143: *c = (INT_TYPE *)aa->j;
144: *nnz = (INT_TYPE)aa->nz;
145: *free = PETSC_FALSE;
146: PetscFunctionReturn(PETSC_SUCCESS);
147: } else if (reuse == MAT_INITIAL_MATRIX) {
148: PetscInt m = A->rmap->n, nz = aa->nz;
149: PetscInt *row, *col;
150: PetscCall(PetscMalloc2(m + 1, &row, nz, &col));
151: for (i = 0; i < m + 1; i++) row[i] = aa->i[i] + 1;
152: for (i = 0; i < nz; i++) col[i] = aa->j[i] + 1;
153: *r = (INT_TYPE *)row;
154: *c = (INT_TYPE *)col;
155: *nnz = (INT_TYPE)nz;
156: }
157: *free = PETSC_TRUE;
158: } else {
159: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "This should not happen");
160: }
161: PetscFunctionReturn(PETSC_SUCCESS);
162: }
164: PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v)
165: {
166: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
167: PetscScalar *aav;
169: PetscFunctionBegin;
170: PetscCall(MatSeqAIJGetArrayRead(A, (const PetscScalar **)&aav));
171: if (!sym) { /* already in the correct format */
172: *v = aav;
173: *r = (INT_TYPE *)aa->i;
174: *c = (INT_TYPE *)aa->j;
175: *nnz = (INT_TYPE)aa->nz;
176: *free = PETSC_FALSE;
177: } else if (reuse == MAT_INITIAL_MATRIX) { /* need to get the triangular part */
178: PetscScalar *vals, *vv;
179: PetscInt *row, *col, *jj;
180: PetscInt m = A->rmap->n, nz, i;
182: nz = 0;
183: for (i = 0; i < m; i++) nz += aa->i[i + 1] - aa->diag[i];
184: PetscCall(PetscMalloc2(m + 1, &row, nz, &col));
185: PetscCall(PetscMalloc1(nz, &vals));
186: jj = col;
187: vv = vals;
189: row[0] = 0;
190: for (i = 0; i < m; i++) {
191: PetscInt *aj = aa->j + aa->diag[i];
192: PetscScalar *av = aav + aa->diag[i];
193: PetscInt rl = aa->i[i + 1] - aa->diag[i], j;
195: for (j = 0; j < rl; j++) {
196: *jj = *aj;
197: jj++;
198: aj++;
199: *vv = *av;
200: vv++;
201: av++;
202: }
203: row[i + 1] = row[i] + rl;
204: }
205: *v = vals;
206: *r = (INT_TYPE *)row;
207: *c = (INT_TYPE *)col;
208: *nnz = (INT_TYPE)nz;
209: *free = PETSC_TRUE;
210: } else {
211: PetscScalar *vv;
212: PetscInt m = A->rmap->n, i;
214: vv = *v;
215: for (i = 0; i < m; i++) {
216: PetscScalar *av = aav + aa->diag[i];
217: PetscInt rl = aa->i[i + 1] - aa->diag[i], j;
218: for (j = 0; j < rl; j++) {
219: *vv = *av;
220: vv++;
221: av++;
222: }
223: }
224: *free = PETSC_TRUE;
225: }
226: PetscCall(MatSeqAIJRestoreArrayRead(A, (const PetscScalar **)&aav));
227: PetscFunctionReturn(PETSC_SUCCESS);
228: }
230: static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat F, PetscScalar *B, PetscScalar *X)
231: {
232: Mat_MKL_PARDISO *mpardiso = (Mat_MKL_PARDISO *)F->data;
233: Mat S, Xmat, Bmat;
234: MatFactorSchurStatus schurstatus;
236: PetscFunctionBegin;
237: PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
238: PetscCheck(X != B || schurstatus != MAT_FACTOR_SCHUR_INVERTED, PETSC_COMM_SELF, PETSC_ERR_SUP, "X and B cannot point to the same address");
239: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mpardiso->schur_size, mpardiso->nrhs, B, &Bmat));
240: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mpardiso->schur_size, mpardiso->nrhs, X, &Xmat));
241: PetscCall(MatSetType(Bmat, ((PetscObject)S)->type_name));
242: PetscCall(MatSetType(Xmat, ((PetscObject)S)->type_name));
243: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
244: PetscCall(MatBindToCPU(Xmat, S->boundtocpu));
245: PetscCall(MatBindToCPU(Bmat, S->boundtocpu));
246: #endif
248: #if defined(PETSC_USE_COMPLEX)
249: PetscCheck(mpardiso->iparm[12 - 1] != 1, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Hermitian solve not implemented yet");
250: #endif
252: switch (schurstatus) {
253: case MAT_FACTOR_SCHUR_FACTORED:
254: if (!mpardiso->iparm[12 - 1]) {
255: PetscCall(MatMatSolve(S, Bmat, Xmat));
256: } else { /* transpose solve */
257: PetscCall(MatMatSolveTranspose(S, Bmat, Xmat));
258: }
259: break;
260: case MAT_FACTOR_SCHUR_INVERTED:
261: PetscCall(MatProductCreateWithMat(S, Bmat, NULL, Xmat));
262: if (!mpardiso->iparm[12 - 1]) {
263: PetscCall(MatProductSetType(Xmat, MATPRODUCT_AB));
264: } else { /* transpose solve */
265: PetscCall(MatProductSetType(Xmat, MATPRODUCT_AtB));
266: }
267: PetscCall(MatProductSetFromOptions(Xmat));
268: PetscCall(MatProductSymbolic(Xmat));
269: PetscCall(MatProductNumeric(Xmat));
270: PetscCall(MatProductClear(Xmat));
271: break;
272: default:
273: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %" PetscInt_FMT, F->schur_status);
274: break;
275: }
276: PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
277: PetscCall(MatDestroy(&Bmat));
278: PetscCall(MatDestroy(&Xmat));
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is)
283: {
284: Mat_MKL_PARDISO *mpardiso = (Mat_MKL_PARDISO *)F->data;
285: const PetscScalar *arr;
286: const PetscInt *idxs;
287: PetscInt size, i;
288: PetscMPIInt csize;
289: PetscBool sorted;
291: PetscFunctionBegin;
292: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &csize));
293: PetscCheck(csize <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "MKL_PARDISO parallel Schur complements not yet supported from PETSc");
294: PetscCall(ISSorted(is, &sorted));
295: PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS for MKL_PARDISO Schur complements needs to be sorted");
296: PetscCall(ISGetLocalSize(is, &size));
297: PetscCall(PetscFree(mpardiso->schur_work));
298: PetscCall(PetscBLASIntCast(PetscMax(mpardiso->n, 2 * size), &mpardiso->schur_work_size));
299: PetscCall(PetscMalloc1(mpardiso->schur_work_size, &mpardiso->schur_work));
300: PetscCall(MatDestroy(&F->schur));
301: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
302: PetscCall(MatDenseGetArrayRead(F->schur, &arr));
303: mpardiso->schur = (PetscScalar *)arr;
304: mpardiso->schur_size = size;
305: PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
306: if (mpardiso->mtype == 2) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
308: PetscCall(PetscFree(mpardiso->schur_idxs));
309: PetscCall(PetscMalloc1(size, &mpardiso->schur_idxs));
310: PetscCall(PetscArrayzero(mpardiso->perm, mpardiso->n));
311: PetscCall(ISGetIndices(is, &idxs));
312: PetscCall(PetscArraycpy(mpardiso->schur_idxs, idxs, size));
313: for (i = 0; i < size; i++) mpardiso->perm[idxs[i]] = 1;
314: PetscCall(ISRestoreIndices(is, &idxs));
315: if (size) { /* turn on Schur switch if the set of indices is not empty */
316: mpardiso->iparm[36 - 1] = 2;
317: }
318: PetscFunctionReturn(PETSC_SUCCESS);
319: }
321: PetscErrorCode MatDestroy_MKL_PARDISO(Mat A)
322: {
323: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
325: PetscFunctionBegin;
326: if (mat_mkl_pardiso->CleanUp) {
327: mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
329: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, NULL, NULL, NULL, NULL, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, NULL,
330: &mat_mkl_pardiso->err);
331: }
332: PetscCall(PetscFree(mat_mkl_pardiso->perm));
333: PetscCall(PetscFree(mat_mkl_pardiso->schur_work));
334: PetscCall(PetscFree(mat_mkl_pardiso->schur_idxs));
335: if (mat_mkl_pardiso->freeaij) {
336: PetscCall(PetscFree2(mat_mkl_pardiso->ia, mat_mkl_pardiso->ja));
337: if (mat_mkl_pardiso->iparm[34] == 1) PetscCall(PetscFree(mat_mkl_pardiso->a));
338: }
339: PetscCall(PetscFree(A->data));
341: /* clear composed functions */
342: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
343: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
344: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMkl_PardisoSetCntl_C", NULL));
345: PetscFunctionReturn(PETSC_SUCCESS);
346: }
348: static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce)
349: {
350: PetscFunctionBegin;
351: if (reduce) { /* data given for the whole matrix */
352: PetscInt i, m = 0, p = 0;
353: for (i = 0; i < mpardiso->nrhs; i++) {
354: PetscInt j;
355: for (j = 0; j < mpardiso->schur_size; j++) schur[p + j] = whole[m + mpardiso->schur_idxs[j]];
356: m += mpardiso->n;
357: p += mpardiso->schur_size;
358: }
359: } else { /* from Schur to whole */
360: PetscInt i, m = 0, p = 0;
361: for (i = 0; i < mpardiso->nrhs; i++) {
362: PetscInt j;
363: for (j = 0; j < mpardiso->schur_size; j++) whole[m + mpardiso->schur_idxs[j]] = schur[p + j];
364: m += mpardiso->n;
365: p += mpardiso->schur_size;
366: }
367: }
368: PetscFunctionReturn(PETSC_SUCCESS);
369: }
371: PetscErrorCode MatSolve_MKL_PARDISO(Mat A, Vec b, Vec x)
372: {
373: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
374: PetscScalar *xarray;
375: const PetscScalar *barray;
377: PetscFunctionBegin;
378: mat_mkl_pardiso->nrhs = 1;
379: PetscCall(VecGetArrayWrite(x, &xarray));
380: PetscCall(VecGetArrayRead(b, &barray));
382: if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
383: else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
385: if (barray == xarray) { /* if the two vectors share the same memory */
386: PetscScalar *work;
387: if (!mat_mkl_pardiso->schur_work) {
388: PetscCall(PetscMalloc1(mat_mkl_pardiso->n, &work));
389: } else {
390: work = mat_mkl_pardiso->schur_work;
391: }
392: mat_mkl_pardiso->iparm[6 - 1] = 1;
393: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, NULL, &mat_mkl_pardiso->nrhs,
394: mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)work, &mat_mkl_pardiso->err);
395: if (!mat_mkl_pardiso->schur_work) PetscCall(PetscFree(work));
396: } else {
397: mat_mkl_pardiso->iparm[6 - 1] = 0;
398: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
399: &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err);
400: }
401: PetscCall(VecRestoreArrayRead(b, &barray));
403: PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL_PARDISO: err=%d. Please check manual", mat_mkl_pardiso->err);
405: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
406: if (!mat_mkl_pardiso->solve_interior) {
407: PetscInt shift = mat_mkl_pardiso->schur_size;
409: PetscCall(MatFactorFactorizeSchurComplement(A));
410: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
411: if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0;
413: /* solve Schur complement */
414: PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work, PETSC_TRUE));
415: PetscCall(MatMKLPardisoSolveSchur_Private(A, mat_mkl_pardiso->schur_work, mat_mkl_pardiso->schur_work + shift));
416: PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work + shift, PETSC_FALSE));
417: } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substituted to xarray[schur] will be neglected */
418: PetscInt i;
419: for (i = 0; i < mat_mkl_pardiso->schur_size; i++) xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.;
420: }
422: /* expansion phase */
423: mat_mkl_pardiso->iparm[6 - 1] = 1;
424: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
425: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
426: &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
427: &mat_mkl_pardiso->err);
429: PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL_PARDISO: err=%d. Please check manual", mat_mkl_pardiso->err);
430: mat_mkl_pardiso->iparm[6 - 1] = 0;
431: }
432: PetscCall(VecRestoreArrayWrite(x, &xarray));
433: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A, Vec b, Vec x)
438: {
439: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
440: PetscInt oiparm12;
442: PetscFunctionBegin;
443: oiparm12 = mat_mkl_pardiso->iparm[12 - 1];
444: mat_mkl_pardiso->iparm[12 - 1] = 2;
445: PetscCall(MatSolve_MKL_PARDISO(A, b, x));
446: mat_mkl_pardiso->iparm[12 - 1] = oiparm12;
447: PetscFunctionReturn(PETSC_SUCCESS);
448: }
450: PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A, Mat B, Mat X)
451: {
452: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)(A)->data;
453: const PetscScalar *barray;
454: PetscScalar *xarray;
455: PetscBool flg;
457: PetscFunctionBegin;
458: PetscCall(PetscObjectBaseTypeCompare((PetscObject)B, MATSEQDENSE, &flg));
459: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATSEQDENSE matrix");
460: if (X != B) {
461: PetscCall(PetscObjectBaseTypeCompare((PetscObject)X, MATSEQDENSE, &flg));
462: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATSEQDENSE matrix");
463: }
465: PetscCall(MatGetSize(B, NULL, (PetscInt *)&mat_mkl_pardiso->nrhs));
467: if (mat_mkl_pardiso->nrhs > 0) {
468: PetscCall(MatDenseGetArrayRead(B, &barray));
469: PetscCall(MatDenseGetArrayWrite(X, &xarray));
471: PetscCheck(barray != xarray, PETSC_COMM_SELF, PETSC_ERR_SUP, "B and X cannot share the same memory location");
472: if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
473: else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
475: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
476: &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err);
477: PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL_PARDISO: err=%d. Please check manual", mat_mkl_pardiso->err);
479: PetscCall(MatDenseRestoreArrayRead(B, &barray));
480: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
481: PetscScalar *o_schur_work = NULL;
483: /* solve Schur complement */
484: if (!mat_mkl_pardiso->solve_interior) {
485: PetscInt shift = mat_mkl_pardiso->schur_size * mat_mkl_pardiso->nrhs, scale;
486: PetscInt mem = mat_mkl_pardiso->n * mat_mkl_pardiso->nrhs;
488: PetscCall(MatFactorFactorizeSchurComplement(A));
489: /* allocate extra memory if it is needed */
490: scale = 1;
491: if (A->schur_status == MAT_FACTOR_SCHUR_INVERTED) scale = 2;
492: mem *= scale;
493: if (mem > mat_mkl_pardiso->schur_work_size) {
494: o_schur_work = mat_mkl_pardiso->schur_work;
495: PetscCall(PetscMalloc1(mem, &mat_mkl_pardiso->schur_work));
496: }
497: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
498: if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0;
499: PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work, PETSC_TRUE));
500: PetscCall(MatMKLPardisoSolveSchur_Private(A, mat_mkl_pardiso->schur_work, mat_mkl_pardiso->schur_work + shift));
501: PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work + shift, PETSC_FALSE));
502: } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substituted to xarray[schur,n] will be neglected */
503: PetscInt i, n, m = 0;
504: for (n = 0; n < mat_mkl_pardiso->nrhs; n++) {
505: for (i = 0; i < mat_mkl_pardiso->schur_size; i++) xarray[mat_mkl_pardiso->schur_idxs[i] + m] = 0.;
506: m += mat_mkl_pardiso->n;
507: }
508: }
510: /* expansion phase */
511: mat_mkl_pardiso->iparm[6 - 1] = 1;
512: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
513: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
514: &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
515: &mat_mkl_pardiso->err);
516: if (o_schur_work) { /* restore original schur_work (minimal size) */
517: PetscCall(PetscFree(mat_mkl_pardiso->schur_work));
518: mat_mkl_pardiso->schur_work = o_schur_work;
519: }
520: PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL_PARDISO: err=%d. Please check manual", mat_mkl_pardiso->err);
521: mat_mkl_pardiso->iparm[6 - 1] = 0;
522: }
523: PetscCall(MatDenseRestoreArrayWrite(X, &xarray));
524: }
525: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
526: PetscFunctionReturn(PETSC_SUCCESS);
527: }
529: PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F, Mat A, const MatFactorInfo *info)
530: {
531: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)(F)->data;
533: PetscFunctionBegin;
534: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
535: PetscCall((*mat_mkl_pardiso->Convert)(A, mat_mkl_pardiso->needsym, MAT_REUSE_MATRIX, &mat_mkl_pardiso->freeaij, &mat_mkl_pardiso->nz, &mat_mkl_pardiso->ia, &mat_mkl_pardiso->ja, (PetscScalar **)&mat_mkl_pardiso->a));
537: mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION;
538: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
539: &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, (void *)mat_mkl_pardiso->schur, &mat_mkl_pardiso->err);
540: PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL_PARDISO: err=%d. Please check manual", mat_mkl_pardiso->err);
542: /* report flops */
543: if (mat_mkl_pardiso->iparm[18] > 0) PetscCall(PetscLogFlops(PetscPowRealInt(10., 6) * mat_mkl_pardiso->iparm[18]));
545: if (F->schur) { /* schur output from pardiso is in row major format */
546: #if defined(PETSC_HAVE_CUDA)
547: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
548: #endif
549: PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
550: PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
551: }
552: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
553: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: PetscErrorCode MatSetFromOptions_MKL_PARDISO(Mat F, Mat A)
558: {
559: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
560: PetscInt icntl, bs, threads = 1;
561: PetscBool flg;
563: PetscFunctionBegin;
564: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MKL_PARDISO Options", "Mat");
566: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_65", "Suggested number of threads to use within PARDISO", "None", threads, &threads, &flg));
567: if (flg) PetscSetMKL_PARDISOThreads((int)threads);
569: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_66", "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", "None", mat_mkl_pardiso->maxfct, &icntl, &flg));
570: if (flg) mat_mkl_pardiso->maxfct = icntl;
572: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_67", "Indicates the actual matrix for the solution phase", "None", mat_mkl_pardiso->mnum, &icntl, &flg));
573: if (flg) mat_mkl_pardiso->mnum = icntl;
575: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_68", "Message level information", "None", mat_mkl_pardiso->msglvl, &icntl, &flg));
576: if (flg) mat_mkl_pardiso->msglvl = icntl;
578: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_69", "Defines the matrix type", "None", mat_mkl_pardiso->mtype, &icntl, &flg));
579: if (flg) {
580: void *pt[IPARM_SIZE];
581: mat_mkl_pardiso->mtype = icntl;
582: icntl = mat_mkl_pardiso->iparm[34];
583: bs = mat_mkl_pardiso->iparm[36];
584: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
585: #if defined(PETSC_USE_REAL_SINGLE)
586: mat_mkl_pardiso->iparm[27] = 1;
587: #else
588: mat_mkl_pardiso->iparm[27] = 0;
589: #endif
590: mat_mkl_pardiso->iparm[34] = icntl;
591: mat_mkl_pardiso->iparm[36] = bs;
592: }
594: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_1", "Use default values (if 0)", "None", mat_mkl_pardiso->iparm[0], &icntl, &flg));
595: if (flg) mat_mkl_pardiso->iparm[0] = icntl;
597: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_2", "Fill-in reducing ordering for the input matrix", "None", mat_mkl_pardiso->iparm[1], &icntl, &flg));
598: if (flg) mat_mkl_pardiso->iparm[1] = icntl;
600: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_4", "Preconditioned CGS/CG", "None", mat_mkl_pardiso->iparm[3], &icntl, &flg));
601: if (flg) mat_mkl_pardiso->iparm[3] = icntl;
603: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_5", "User permutation", "None", mat_mkl_pardiso->iparm[4], &icntl, &flg));
604: if (flg) mat_mkl_pardiso->iparm[4] = icntl;
606: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_6", "Write solution on x", "None", mat_mkl_pardiso->iparm[5], &icntl, &flg));
607: if (flg) mat_mkl_pardiso->iparm[5] = icntl;
609: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_8", "Iterative refinement step", "None", mat_mkl_pardiso->iparm[7], &icntl, &flg));
610: if (flg) mat_mkl_pardiso->iparm[7] = icntl;
612: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_10", "Pivoting perturbation", "None", mat_mkl_pardiso->iparm[9], &icntl, &flg));
613: if (flg) mat_mkl_pardiso->iparm[9] = icntl;
615: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_11", "Scaling vectors", "None", mat_mkl_pardiso->iparm[10], &icntl, &flg));
616: if (flg) mat_mkl_pardiso->iparm[10] = icntl;
618: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_12", "Solve with transposed or conjugate transposed matrix A", "None", mat_mkl_pardiso->iparm[11], &icntl, &flg));
619: if (flg) mat_mkl_pardiso->iparm[11] = icntl;
621: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_13", "Improved accuracy using (non-) symmetric weighted matching", "None", mat_mkl_pardiso->iparm[12], &icntl, &flg));
622: if (flg) mat_mkl_pardiso->iparm[12] = icntl;
624: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_18", "Numbers of non-zero elements", "None", mat_mkl_pardiso->iparm[17], &icntl, &flg));
625: if (flg) mat_mkl_pardiso->iparm[17] = icntl;
627: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_19", "Report number of floating point operations (0 to disable)", "None", mat_mkl_pardiso->iparm[18], &icntl, &flg));
628: if (flg) mat_mkl_pardiso->iparm[18] = icntl;
630: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_21", "Pivoting for symmetric indefinite matrices", "None", mat_mkl_pardiso->iparm[20], &icntl, &flg));
631: if (flg) mat_mkl_pardiso->iparm[20] = icntl;
633: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_24", "Parallel factorization control", "None", mat_mkl_pardiso->iparm[23], &icntl, &flg));
634: if (flg) mat_mkl_pardiso->iparm[23] = icntl;
636: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_25", "Parallel forward/backward solve control", "None", mat_mkl_pardiso->iparm[24], &icntl, &flg));
637: if (flg) mat_mkl_pardiso->iparm[24] = icntl;
639: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_27", "Matrix checker", "None", mat_mkl_pardiso->iparm[26], &icntl, &flg));
640: if (flg) mat_mkl_pardiso->iparm[26] = icntl;
642: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_31", "Partial solve and computing selected components of the solution vectors", "None", mat_mkl_pardiso->iparm[30], &icntl, &flg));
643: if (flg) mat_mkl_pardiso->iparm[30] = icntl;
645: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_34", "Optimal number of threads for conditional numerical reproducibility (CNR) mode", "None", mat_mkl_pardiso->iparm[33], &icntl, &flg));
646: if (flg) mat_mkl_pardiso->iparm[33] = icntl;
648: PetscCall(PetscOptionsInt("-mat_mkl_pardiso_60", "Intel MKL_PARDISO mode", "None", mat_mkl_pardiso->iparm[59], &icntl, &flg));
649: if (flg) mat_mkl_pardiso->iparm[59] = icntl;
650: PetscOptionsEnd();
651: PetscFunctionReturn(PETSC_SUCCESS);
652: }
654: PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso)
655: {
656: PetscInt i, bs;
657: PetscBool match;
659: PetscFunctionBegin;
660: for (i = 0; i < IPARM_SIZE; i++) mat_mkl_pardiso->iparm[i] = 0;
661: for (i = 0; i < IPARM_SIZE; i++) mat_mkl_pardiso->pt[i] = 0;
662: #if defined(PETSC_USE_REAL_SINGLE)
663: mat_mkl_pardiso->iparm[27] = 1;
664: #else
665: mat_mkl_pardiso->iparm[27] = 0;
666: #endif
667: /* Default options for both sym and unsym */
668: mat_mkl_pardiso->iparm[0] = 1; /* Solver default parameters overridden with provided by iparm */
669: mat_mkl_pardiso->iparm[1] = 2; /* Metis reordering */
670: mat_mkl_pardiso->iparm[5] = 0; /* Write solution into x */
671: mat_mkl_pardiso->iparm[7] = 0; /* Max number of iterative refinement steps */
672: mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
673: mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
674: #if 0
675: mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/
676: #endif
677: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &match, MATSEQBAIJ, MATSEQSBAIJ, ""));
678: PetscCall(MatGetBlockSize(A, &bs));
679: if (!match || bs == 1) {
680: mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */
681: mat_mkl_pardiso->n = A->rmap->N;
682: } else {
683: mat_mkl_pardiso->iparm[34] = 0; /* Cluster Sparse Solver use Fortran-style indexing for ia and ja arrays */
684: mat_mkl_pardiso->iparm[36] = bs;
685: mat_mkl_pardiso->n = A->rmap->N / bs;
686: }
687: mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on rank-0 */
689: mat_mkl_pardiso->CleanUp = PETSC_FALSE;
690: mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */
691: mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */
692: mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */
693: mat_mkl_pardiso->phase = -1;
694: mat_mkl_pardiso->err = 0;
696: mat_mkl_pardiso->nrhs = 1;
697: mat_mkl_pardiso->err = 0;
698: mat_mkl_pardiso->phase = -1;
700: if (ftype == MAT_FACTOR_LU) {
701: mat_mkl_pardiso->iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */
702: mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
703: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
704: } else {
705: mat_mkl_pardiso->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */
706: mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */
707: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
708: #if defined(PETSC_USE_DEBUG)
709: mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */
710: #endif
711: }
712: PetscCall(PetscCalloc1(A->rmap->N * sizeof(INT_TYPE), &mat_mkl_pardiso->perm));
713: mat_mkl_pardiso->schur_size = 0;
714: PetscFunctionReturn(PETSC_SUCCESS);
715: }
717: PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F, Mat A, const MatFactorInfo *info)
718: {
719: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
721: PetscFunctionBegin;
722: mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
723: PetscCall(MatSetFromOptions_MKL_PARDISO(F, A));
724: /* throw away any previously computed structure */
725: if (mat_mkl_pardiso->freeaij) {
726: PetscCall(PetscFree2(mat_mkl_pardiso->ia, mat_mkl_pardiso->ja));
727: if (mat_mkl_pardiso->iparm[34] == 1) PetscCall(PetscFree(mat_mkl_pardiso->a));
728: }
729: PetscCall((*mat_mkl_pardiso->Convert)(A, mat_mkl_pardiso->needsym, MAT_INITIAL_MATRIX, &mat_mkl_pardiso->freeaij, &mat_mkl_pardiso->nz, &mat_mkl_pardiso->ia, &mat_mkl_pardiso->ja, (PetscScalar **)&mat_mkl_pardiso->a));
730: if (mat_mkl_pardiso->iparm[34] == 1) mat_mkl_pardiso->n = A->rmap->N;
731: else mat_mkl_pardiso->n = A->rmap->N / A->rmap->bs;
733: mat_mkl_pardiso->phase = JOB_ANALYSIS;
735: /* reset flops counting if requested */
736: if (mat_mkl_pardiso->iparm[18]) mat_mkl_pardiso->iparm[18] = -1;
738: MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
739: &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, NULL, &mat_mkl_pardiso->err);
740: PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL_PARDISO: err=%d. Please check manual", mat_mkl_pardiso->err);
742: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
744: if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO;
745: else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO;
747: F->ops->solve = MatSolve_MKL_PARDISO;
748: F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO;
749: F->ops->matsolve = MatMatSolve_MKL_PARDISO;
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }
753: PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
754: {
755: PetscFunctionBegin;
756: PetscCall(MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info));
757: PetscFunctionReturn(PETSC_SUCCESS);
758: }
760: #if !defined(PETSC_USE_COMPLEX)
761: PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
762: {
763: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
765: PetscFunctionBegin;
766: if (nneg) *nneg = mat_mkl_pardiso->iparm[22];
767: if (npos) *npos = mat_mkl_pardiso->iparm[21];
768: if (nzero) *nzero = F->rmap->N - (mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]);
769: PetscFunctionReturn(PETSC_SUCCESS);
770: }
771: #endif
773: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F, Mat A, IS r, const MatFactorInfo *info)
774: {
775: PetscFunctionBegin;
776: PetscCall(MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info));
777: F->ops->getinertia = NULL;
778: #if !defined(PETSC_USE_COMPLEX)
779: F->ops->getinertia = MatGetInertia_MKL_PARDISO;
780: #endif
781: PetscFunctionReturn(PETSC_SUCCESS);
782: }
784: PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer)
785: {
786: PetscBool iascii;
787: PetscViewerFormat format;
788: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
789: PetscInt i;
791: PetscFunctionBegin;
792: if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(PETSC_SUCCESS);
794: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
795: if (iascii) {
796: PetscCall(PetscViewerGetFormat(viewer, &format));
797: if (format == PETSC_VIEWER_ASCII_INFO) {
798: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO run parameters:\n"));
799: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO phase: %d \n", mat_mkl_pardiso->phase));
800: for (i = 1; i <= 64; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO iparm[%d]: %d \n", i, mat_mkl_pardiso->iparm[i - 1]));
801: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct));
802: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum));
803: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype));
804: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO n: %d \n", mat_mkl_pardiso->n));
805: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs));
806: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl));
807: }
808: }
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info)
813: {
814: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
816: PetscFunctionBegin;
817: info->block_size = 1.0;
818: info->nz_used = mat_mkl_pardiso->iparm[17];
819: info->nz_allocated = mat_mkl_pardiso->iparm[17];
820: info->nz_unneeded = 0.0;
821: info->assemblies = 0.0;
822: info->mallocs = 0.0;
823: info->memory = 0.0;
824: info->fill_ratio_given = 0;
825: info->fill_ratio_needed = 0;
826: info->factor_mallocs = 0;
827: PetscFunctionReturn(PETSC_SUCCESS);
828: }
830: PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F, PetscInt icntl, PetscInt ival)
831: {
832: PetscInt backup, bs;
833: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
835: PetscFunctionBegin;
836: if (icntl <= 64) {
837: mat_mkl_pardiso->iparm[icntl - 1] = ival;
838: } else {
839: if (icntl == 65) PetscSetMKL_PARDISOThreads(ival);
840: else if (icntl == 66) mat_mkl_pardiso->maxfct = ival;
841: else if (icntl == 67) mat_mkl_pardiso->mnum = ival;
842: else if (icntl == 68) mat_mkl_pardiso->msglvl = ival;
843: else if (icntl == 69) {
844: void *pt[IPARM_SIZE];
845: backup = mat_mkl_pardiso->iparm[34];
846: bs = mat_mkl_pardiso->iparm[36];
847: mat_mkl_pardiso->mtype = ival;
848: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
849: #if defined(PETSC_USE_REAL_SINGLE)
850: mat_mkl_pardiso->iparm[27] = 1;
851: #else
852: mat_mkl_pardiso->iparm[27] = 0;
853: #endif
854: mat_mkl_pardiso->iparm[34] = backup;
855: mat_mkl_pardiso->iparm[36] = bs;
856: } else if (icntl == 70) mat_mkl_pardiso->solve_interior = (PetscBool) !!ival;
857: }
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: /*@
862: MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters
864: Logically Collective
866: Input Parameters:
867: + F - the factored matrix obtained by calling `MatGetFactor()`
868: . icntl - index of Mkl_Pardiso parameter
869: - ival - value of Mkl_Pardiso parameter
871: Options Database Key:
872: . -mat_mkl_pardiso_<icntl> <ival> - change the option numbered icntl to the value ival
874: Level: beginner
876: References:
877: . * - Mkl_Pardiso Users' Guide
879: .seealso: [](ch_matrices), `Mat`, `MATSOLVERMKL_PARDISO`, `MatGetFactor()`
880: @*/
881: PetscErrorCode MatMkl_PardisoSetCntl(Mat F, PetscInt icntl, PetscInt ival)
882: {
883: PetscFunctionBegin;
884: PetscTryMethod(F, "MatMkl_PardisoSetCntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
885: PetscFunctionReturn(PETSC_SUCCESS);
886: }
888: /*MC
889: MATSOLVERMKL_PARDISO - A matrix type providing direct solvers, LU, for
890: `MATSEQAIJ` matrices via the external package MKL_PARDISO.
892: Use `-pc_type lu` `-pc_factor_mat_solver_type mkl_pardiso` to use this direct solver
894: Options Database Keys:
895: + -mat_mkl_pardiso_65 - Suggested number of threads to use within MKL_PARDISO
896: . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
897: . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase
898: . -mat_mkl_pardiso_68 - Message level information, use 1 to get detailed information on the solver options
899: . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
900: . -mat_mkl_pardiso_1 - Use default values
901: . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix
902: . -mat_mkl_pardiso_4 - Preconditioned CGS/CG
903: . -mat_mkl_pardiso_5 - User permutation
904: . -mat_mkl_pardiso_6 - Write solution on x
905: . -mat_mkl_pardiso_8 - Iterative refinement step
906: . -mat_mkl_pardiso_10 - Pivoting perturbation
907: . -mat_mkl_pardiso_11 - Scaling vectors
908: . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A
909: . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching
910: . -mat_mkl_pardiso_18 - Numbers of non-zero elements
911: . -mat_mkl_pardiso_19 - Report number of floating point operations
912: . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices
913: . -mat_mkl_pardiso_24 - Parallel factorization control
914: . -mat_mkl_pardiso_25 - Parallel forward/backward solve control
915: . -mat_mkl_pardiso_27 - Matrix checker
916: . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors
917: . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
918: - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode
920: Level: beginner
922: Notes:
923: Use `-mat_mkl_pardiso_68 1` to display the number of threads the solver is using. MKL does not provide a way to directly access this
924: information.
926: For more information on the options check the MKL_Pardiso manual
928: .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMkl_PardisoSetCntl()`
929: M*/
930: static PetscErrorCode MatFactorGetSolverType_mkl_pardiso(Mat A, MatSolverType *type)
931: {
932: PetscFunctionBegin;
933: *type = MATSOLVERMKL_PARDISO;
934: PetscFunctionReturn(PETSC_SUCCESS);
935: }
937: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A, MatFactorType ftype, Mat *F)
938: {
939: Mat B;
940: Mat_MKL_PARDISO *mat_mkl_pardiso;
941: PetscBool isSeqAIJ, isSeqBAIJ, isSeqSBAIJ;
943: PetscFunctionBegin;
944: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
945: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
946: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
947: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
948: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
949: PetscCall(PetscStrallocpy("mkl_pardiso", &((PetscObject)B)->type_name));
950: PetscCall(MatSetUp(B));
952: PetscCall(PetscNew(&mat_mkl_pardiso));
953: B->data = mat_mkl_pardiso;
955: PetscCall(MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso));
956: if (ftype == MAT_FACTOR_LU) {
957: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO;
958: B->factortype = MAT_FACTOR_LU;
959: mat_mkl_pardiso->needsym = PETSC_FALSE;
960: if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
961: else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
962: else {
963: PetscCheck(!isSeqSBAIJ, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead");
964: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO LU with %s format", ((PetscObject)A)->type_name);
965: }
966: #if defined(PETSC_USE_COMPLEX)
967: mat_mkl_pardiso->mtype = 13;
968: #else
969: mat_mkl_pardiso->mtype = 11;
970: #endif
971: } else {
972: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO;
973: B->factortype = MAT_FACTOR_CHOLESKY;
974: if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
975: else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
976: else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij;
977: else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO CHOLESKY with %s format", ((PetscObject)A)->type_name);
979: mat_mkl_pardiso->needsym = PETSC_TRUE;
980: #if !defined(PETSC_USE_COMPLEX)
981: if (A->spd == PETSC_BOOL3_TRUE) mat_mkl_pardiso->mtype = 2;
982: else mat_mkl_pardiso->mtype = -2;
983: #else
984: mat_mkl_pardiso->mtype = 6;
985: PetscCheck(A->hermitian != PETSC_BOOL3_TRUE, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO CHOLESKY with Hermitian matrices! Use MAT_FACTOR_LU instead");
986: #endif
987: }
988: B->ops->destroy = MatDestroy_MKL_PARDISO;
989: B->ops->view = MatView_MKL_PARDISO;
990: B->ops->getinfo = MatGetInfo_MKL_PARDISO;
991: B->factortype = ftype;
992: B->assembled = PETSC_TRUE;
994: PetscCall(PetscFree(B->solvertype));
995: PetscCall(PetscStrallocpy(MATSOLVERMKL_PARDISO, &B->solvertype));
997: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mkl_pardiso));
998: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MKL_PARDISO));
999: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMkl_PardisoSetCntl_C", MatMkl_PardisoSetCntl_MKL_PARDISO));
1001: *F = B;
1002: PetscFunctionReturn(PETSC_SUCCESS);
1003: }
1005: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_Pardiso(void)
1006: {
1007: PetscFunctionBegin;
1008: PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mkl_pardiso));
1009: PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mkl_pardiso));
1010: PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mkl_pardiso));
1011: PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mkl_pardiso));
1012: PetscFunctionReturn(PETSC_SUCCESS);
1013: }