Actual source code: mumps.c
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
5: #include <petscpkg_version.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #if defined(PETSC_USE_REAL_SINGLE)
13: #include <cmumps_c.h>
14: #else
15: #include <zmumps_c.h>
16: #endif
17: #else
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <smumps_c.h>
20: #else
21: #include <dmumps_c.h>
22: #endif
23: #endif
24: EXTERN_C_END
25: #define JOB_INIT -1
26: #define JOB_NULL 0
27: #define JOB_FACTSYMBOLIC 1
28: #define JOB_FACTNUMERIC 2
29: #define JOB_SOLVE 3
30: #define JOB_END -2
32: /* calls to MUMPS */
33: #if defined(PETSC_USE_COMPLEX)
34: #if defined(PETSC_USE_REAL_SINGLE)
35: #define MUMPS_c cmumps_c
36: #else
37: #define MUMPS_c zmumps_c
38: #endif
39: #else
40: #if defined(PETSC_USE_REAL_SINGLE)
41: #define MUMPS_c smumps_c
42: #else
43: #define MUMPS_c dmumps_c
44: #endif
45: #endif
47: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49: naming convention in PetscMPIInt, PetscBLASInt etc.
50: */
51: typedef MUMPS_INT PetscMUMPSInt;
53: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56: #endif
57: #else
58: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60: #endif
61: #endif
63: #define MPIU_MUMPSINT MPI_INT
64: #define PETSC_MUMPS_INT_MAX 2147483647
65: #define PETSC_MUMPS_INT_MIN -2147483648
67: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68: static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69: {
70: PetscFunctionBegin;
71: #if PetscDefined(USE_64BIT_INDICES)
72: PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
73: #endif
74: *b = (PetscMUMPSInt)(a);
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: /* Put these utility routines here since they are only used in this file */
79: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
80: {
81: PetscInt myval;
82: PetscBool myset;
83: PetscFunctionBegin;
84: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85: PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
86: if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
87: if (set) *set = myset;
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
90: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
92: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94: #define PetscMUMPS_c(mumps) \
95: do { \
96: if (mumps->use_petsc_omp_support) { \
97: if (mumps->is_omp_master) { \
98: PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
100: PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
101: PetscCall(PetscFPTrapPop()); \
102: PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
103: } \
104: PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
105: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
106: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
107: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
108: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
109: */ \
110: PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
111: PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
112: PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
113: PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
114: } else { \
115: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
116: PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
117: PetscCall(PetscFPTrapPop()); \
118: } \
119: } while (0)
120: #else
121: #define PetscMUMPS_c(mumps) \
122: do { \
123: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
124: PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
125: PetscCall(PetscFPTrapPop()); \
126: } while (0)
127: #endif
129: /* declare MumpsScalar */
130: #if defined(PETSC_USE_COMPLEX)
131: #if defined(PETSC_USE_REAL_SINGLE)
132: #define MumpsScalar mumps_complex
133: #else
134: #define MumpsScalar mumps_double_complex
135: #endif
136: #else
137: #define MumpsScalar PetscScalar
138: #endif
140: /* macros s.t. indices match MUMPS documentation */
141: #define ICNTL(I) icntl[(I)-1]
142: #define CNTL(I) cntl[(I)-1]
143: #define INFOG(I) infog[(I)-1]
144: #define INFO(I) info[(I)-1]
145: #define RINFOG(I) rinfog[(I)-1]
146: #define RINFO(I) rinfo[(I)-1]
148: typedef struct Mat_MUMPS Mat_MUMPS;
149: struct Mat_MUMPS {
150: #if defined(PETSC_USE_COMPLEX)
151: #if defined(PETSC_USE_REAL_SINGLE)
152: CMUMPS_STRUC_C id;
153: #else
154: ZMUMPS_STRUC_C id;
155: #endif
156: #else
157: #if defined(PETSC_USE_REAL_SINGLE)
158: SMUMPS_STRUC_C id;
159: #else
160: DMUMPS_STRUC_C id;
161: #endif
162: #endif
164: MatStructure matstruc;
165: PetscMPIInt myid, petsc_size;
166: PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */
167: PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
168: PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
169: PetscMUMPSInt sym;
170: MPI_Comm mumps_comm;
171: PetscMUMPSInt *ICNTL_pre;
172: PetscReal *CNTL_pre;
173: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
174: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
175: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
176: PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc;
177: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
178: PetscInt *rhs_nrow, max_nrhs;
179: PetscMPIInt *rhs_recvcounts, *rhs_disps;
180: PetscScalar *rhs_loc, *rhs_recvbuf;
181: #endif
182: Vec b_seq, x_seq;
183: PetscInt ninfo, *info; /* which INFO to display */
184: PetscInt sizeredrhs;
185: PetscScalar *schur_sol;
186: PetscInt schur_sizesol;
187: PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
188: PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
189: PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
191: /* stuff used by petsc/mumps OpenMP support*/
192: PetscBool use_petsc_omp_support;
193: PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
194: MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
195: PetscInt64 *recvcount; /* a collection of nnz on omp_master */
196: PetscMPIInt tag, omp_comm_size;
197: PetscBool is_omp_master; /* is this rank the master of omp_comm */
198: MPI_Request *reqs;
199: };
201: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
202: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
203: */
204: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
205: {
206: PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
208: PetscFunctionBegin;
209: #if defined(PETSC_USE_64BIT_INDICES)
210: {
211: PetscInt i;
212: if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
213: PetscCall(PetscFree(mumps->ia_alloc));
214: PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
215: mumps->cur_ilen = nrow + 1;
216: }
217: if (nnz > mumps->cur_jlen) {
218: PetscCall(PetscFree(mumps->ja_alloc));
219: PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
220: mumps->cur_jlen = nnz;
221: }
222: for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
223: for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
224: *ia_mumps = mumps->ia_alloc;
225: *ja_mumps = mumps->ja_alloc;
226: }
227: #else
228: *ia_mumps = ia;
229: *ja_mumps = ja;
230: #endif
231: PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
232: PetscFunctionReturn(PETSC_SUCCESS);
233: }
235: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
236: {
237: PetscFunctionBegin;
238: PetscCall(PetscFree(mumps->id.listvar_schur));
239: PetscCall(PetscFree(mumps->id.redrhs));
240: PetscCall(PetscFree(mumps->schur_sol));
241: mumps->id.size_schur = 0;
242: mumps->id.schur_lld = 0;
243: mumps->id.ICNTL(19) = 0;
244: PetscFunctionReturn(PETSC_SUCCESS);
245: }
247: /* solve with rhs in mumps->id.redrhs and return in the same location */
248: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
249: {
250: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
251: Mat S, B, X;
252: MatFactorSchurStatus schurstatus;
253: PetscInt sizesol;
255: PetscFunctionBegin;
256: PetscCall(MatFactorFactorizeSchurComplement(F));
257: PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
258: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
259: PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
260: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
261: PetscCall(MatBindToCPU(B, S->boundtocpu));
262: #endif
263: switch (schurstatus) {
264: case MAT_FACTOR_SCHUR_FACTORED:
265: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
266: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
267: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
268: PetscCall(MatBindToCPU(X, S->boundtocpu));
269: #endif
270: if (!mumps->id.ICNTL(9)) { /* transpose solve */
271: PetscCall(MatMatSolveTranspose(S, B, X));
272: } else {
273: PetscCall(MatMatSolve(S, B, X));
274: }
275: break;
276: case MAT_FACTOR_SCHUR_INVERTED:
277: sizesol = mumps->id.nrhs * mumps->id.size_schur;
278: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
279: PetscCall(PetscFree(mumps->schur_sol));
280: PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
281: mumps->schur_sizesol = sizesol;
282: }
283: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
284: PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
285: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
286: PetscCall(MatBindToCPU(X, S->boundtocpu));
287: #endif
288: PetscCall(MatProductCreateWithMat(S, B, NULL, X));
289: if (!mumps->id.ICNTL(9)) { /* transpose solve */
290: PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
291: } else {
292: PetscCall(MatProductSetType(X, MATPRODUCT_AB));
293: }
294: PetscCall(MatProductSetFromOptions(X));
295: PetscCall(MatProductSymbolic(X));
296: PetscCall(MatProductNumeric(X));
298: PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
299: break;
300: default:
301: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
302: }
303: PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
304: PetscCall(MatDestroy(&B));
305: PetscCall(MatDestroy(&X));
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
310: {
311: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
313: PetscFunctionBegin;
314: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
315: PetscFunctionReturn(PETSC_SUCCESS);
316: }
317: if (!expansion) { /* prepare for the condensation step */
318: PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
319: /* allocate MUMPS internal array to store reduced right-hand sides */
320: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
321: PetscCall(PetscFree(mumps->id.redrhs));
322: mumps->id.lredrhs = mumps->id.size_schur;
323: PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
324: mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
325: }
326: mumps->id.ICNTL(26) = 1; /* condensation phase */
327: } else { /* prepare for the expansion step */
328: /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
329: PetscCall(MatMumpsSolveSchur_Private(F));
330: mumps->id.ICNTL(26) = 2; /* expansion phase */
331: PetscMUMPS_c(mumps);
332: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
333: /* restore defaults */
334: mumps->id.ICNTL(26) = -1;
335: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
336: if (mumps->id.nrhs > 1) {
337: PetscCall(PetscFree(mumps->id.redrhs));
338: mumps->id.lredrhs = 0;
339: mumps->sizeredrhs = 0;
340: }
341: }
342: PetscFunctionReturn(PETSC_SUCCESS);
343: }
345: /*
346: MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
348: input:
349: A - matrix in aij,baij or sbaij format
350: shift - 0: C style output triple; 1: Fortran style output triple.
351: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
352: MAT_REUSE_MATRIX: only the values in v array are updated
353: output:
354: nnz - dim of r, c, and v (number of local nonzero entries of A)
355: r, c, v - row and col index, matrix values (matrix triples)
357: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
358: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
359: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
361: */
363: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
364: {
365: const PetscScalar *av;
366: const PetscInt *ai, *aj, *ajj, M = A->rmap->n;
367: PetscInt64 nz, rnz, i, j, k;
368: PetscMUMPSInt *row, *col;
369: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
371: PetscFunctionBegin;
372: PetscCall(MatSeqAIJGetArrayRead(A, &av));
373: mumps->val = (PetscScalar *)av;
374: if (reuse == MAT_INITIAL_MATRIX) {
375: nz = aa->nz;
376: ai = aa->i;
377: aj = aa->j;
378: PetscCall(PetscMalloc2(nz, &row, nz, &col));
379: for (i = k = 0; i < M; i++) {
380: rnz = ai[i + 1] - ai[i];
381: ajj = aj + ai[i];
382: for (j = 0; j < rnz; j++) {
383: PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
384: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
385: k++;
386: }
387: }
388: mumps->irn = row;
389: mumps->jcn = col;
390: mumps->nnz = nz;
391: }
392: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
396: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
397: {
398: PetscInt64 nz, i, j, k, r;
399: Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
400: PetscMUMPSInt *row, *col;
402: PetscFunctionBegin;
403: mumps->val = a->val;
404: if (reuse == MAT_INITIAL_MATRIX) {
405: nz = a->sliidx[a->totalslices];
406: PetscCall(PetscMalloc2(nz, &row, nz, &col));
407: for (i = k = 0; i < a->totalslices; i++) {
408: for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
409: }
410: for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
411: mumps->irn = row;
412: mumps->jcn = col;
413: mumps->nnz = nz;
414: }
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
419: {
420: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
421: const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
422: PetscInt64 M, nz, idx = 0, rnz, i, j, k, m;
423: PetscInt bs;
424: PetscMUMPSInt *row, *col;
426: PetscFunctionBegin;
427: PetscCall(MatGetBlockSize(A, &bs));
428: M = A->rmap->N / bs;
429: mumps->val = aa->a;
430: if (reuse == MAT_INITIAL_MATRIX) {
431: ai = aa->i;
432: aj = aa->j;
433: nz = bs2 * aa->nz;
434: PetscCall(PetscMalloc2(nz, &row, nz, &col));
435: for (i = 0; i < M; i++) {
436: ajj = aj + ai[i];
437: rnz = ai[i + 1] - ai[i];
438: for (k = 0; k < rnz; k++) {
439: for (j = 0; j < bs; j++) {
440: for (m = 0; m < bs; m++) {
441: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
442: PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
443: idx++;
444: }
445: }
446: }
447: }
448: mumps->irn = row;
449: mumps->jcn = col;
450: mumps->nnz = nz;
451: }
452: PetscFunctionReturn(PETSC_SUCCESS);
453: }
455: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
456: {
457: const PetscInt *ai, *aj, *ajj;
458: PetscInt bs;
459: PetscInt64 nz, rnz, i, j, k, m;
460: PetscMUMPSInt *row, *col;
461: PetscScalar *val;
462: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
463: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
464: #if defined(PETSC_USE_COMPLEX)
465: PetscBool isset, hermitian;
466: #endif
468: PetscFunctionBegin;
469: #if defined(PETSC_USE_COMPLEX)
470: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
471: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
472: #endif
473: ai = aa->i;
474: aj = aa->j;
475: PetscCall(MatGetBlockSize(A, &bs));
476: if (reuse == MAT_INITIAL_MATRIX) {
477: const PetscInt64 alloc_size = aa->nz * bs2;
479: PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
480: if (bs > 1) {
481: PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
482: mumps->val = mumps->val_alloc;
483: } else {
484: mumps->val = aa->a;
485: }
486: mumps->irn = row;
487: mumps->jcn = col;
488: } else {
489: if (bs == 1) mumps->val = aa->a;
490: row = mumps->irn;
491: col = mumps->jcn;
492: }
493: val = mumps->val;
495: nz = 0;
496: if (bs > 1) {
497: for (i = 0; i < mbs; i++) {
498: rnz = ai[i + 1] - ai[i];
499: ajj = aj + ai[i];
500: for (j = 0; j < rnz; j++) {
501: for (k = 0; k < bs; k++) {
502: for (m = 0; m < bs; m++) {
503: if (ajj[j] > i || k >= m) {
504: if (reuse == MAT_INITIAL_MATRIX) {
505: PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
506: PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
507: }
508: val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
509: }
510: }
511: }
512: }
513: }
514: } else if (reuse == MAT_INITIAL_MATRIX) {
515: for (i = 0; i < mbs; i++) {
516: rnz = ai[i + 1] - ai[i];
517: ajj = aj + ai[i];
518: for (j = 0; j < rnz; j++) {
519: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
520: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
521: nz++;
522: }
523: }
524: PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
525: }
526: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
531: {
532: const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n;
533: PetscInt64 nz, rnz, i, j;
534: const PetscScalar *av, *v1;
535: PetscScalar *val;
536: PetscMUMPSInt *row, *col;
537: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
538: PetscBool missing;
539: #if defined(PETSC_USE_COMPLEX)
540: PetscBool hermitian, isset;
541: #endif
543: PetscFunctionBegin;
544: #if defined(PETSC_USE_COMPLEX)
545: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
546: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
547: #endif
548: PetscCall(MatSeqAIJGetArrayRead(A, &av));
549: ai = aa->i;
550: aj = aa->j;
551: adiag = aa->diag;
552: PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
553: if (reuse == MAT_INITIAL_MATRIX) {
554: /* count nz in the upper triangular part of A */
555: nz = 0;
556: if (missing) {
557: for (i = 0; i < M; i++) {
558: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
559: for (j = ai[i]; j < ai[i + 1]; j++) {
560: if (aj[j] < i) continue;
561: nz++;
562: }
563: } else {
564: nz += ai[i + 1] - adiag[i];
565: }
566: }
567: } else {
568: for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
569: }
570: PetscCall(PetscMalloc2(nz, &row, nz, &col));
571: PetscCall(PetscMalloc1(nz, &val));
572: mumps->nnz = nz;
573: mumps->irn = row;
574: mumps->jcn = col;
575: mumps->val = mumps->val_alloc = val;
577: nz = 0;
578: if (missing) {
579: for (i = 0; i < M; i++) {
580: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
581: for (j = ai[i]; j < ai[i + 1]; j++) {
582: if (aj[j] < i) continue;
583: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
584: PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
585: val[nz] = av[j];
586: nz++;
587: }
588: } else {
589: rnz = ai[i + 1] - adiag[i];
590: ajj = aj + adiag[i];
591: v1 = av + adiag[i];
592: for (j = 0; j < rnz; j++) {
593: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
594: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
595: val[nz++] = v1[j];
596: }
597: }
598: }
599: } else {
600: for (i = 0; i < M; i++) {
601: rnz = ai[i + 1] - adiag[i];
602: ajj = aj + adiag[i];
603: v1 = av + adiag[i];
604: for (j = 0; j < rnz; j++) {
605: PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
606: PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
607: val[nz++] = v1[j];
608: }
609: }
610: }
611: } else {
612: nz = 0;
613: val = mumps->val;
614: if (missing) {
615: for (i = 0; i < M; i++) {
616: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
617: for (j = ai[i]; j < ai[i + 1]; j++) {
618: if (aj[j] < i) continue;
619: val[nz++] = av[j];
620: }
621: } else {
622: rnz = ai[i + 1] - adiag[i];
623: v1 = av + adiag[i];
624: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
625: }
626: }
627: } else {
628: for (i = 0; i < M; i++) {
629: rnz = ai[i + 1] - adiag[i];
630: v1 = av + adiag[i];
631: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
632: }
633: }
634: }
635: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
636: PetscFunctionReturn(PETSC_SUCCESS);
637: }
639: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
640: {
641: const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
642: PetscInt bs;
643: PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB;
644: PetscMUMPSInt *row, *col;
645: const PetscScalar *av, *bv, *v1, *v2;
646: PetscScalar *val;
647: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data;
648: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data;
649: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data;
650: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
651: #if defined(PETSC_USE_COMPLEX)
652: PetscBool hermitian, isset;
653: #endif
655: PetscFunctionBegin;
656: #if defined(PETSC_USE_COMPLEX)
657: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
658: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
659: #endif
660: PetscCall(MatGetBlockSize(A, &bs));
661: rstart = A->rmap->rstart;
662: ai = aa->i;
663: aj = aa->j;
664: bi = bb->i;
665: bj = bb->j;
666: av = aa->a;
667: bv = bb->a;
669: garray = mat->garray;
671: if (reuse == MAT_INITIAL_MATRIX) {
672: nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
673: PetscCall(PetscMalloc2(nz, &row, nz, &col));
674: PetscCall(PetscMalloc1(nz, &val));
675: /* can not decide the exact mumps->nnz now because of the SBAIJ */
676: mumps->irn = row;
677: mumps->jcn = col;
678: mumps->val = mumps->val_alloc = val;
679: } else {
680: val = mumps->val;
681: }
683: jj = 0;
684: irow = rstart;
685: for (i = 0; i < mbs; i++) {
686: ajj = aj + ai[i]; /* ptr to the beginning of this row */
687: countA = ai[i + 1] - ai[i];
688: countB = bi[i + 1] - bi[i];
689: bjj = bj + bi[i];
690: v1 = av + ai[i] * bs2;
691: v2 = bv + bi[i] * bs2;
693: if (bs > 1) {
694: /* A-part */
695: for (j = 0; j < countA; j++) {
696: for (k = 0; k < bs; k++) {
697: for (m = 0; m < bs; m++) {
698: if (rstart + ajj[j] * bs > irow || k >= m) {
699: if (reuse == MAT_INITIAL_MATRIX) {
700: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
701: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
702: }
703: val[jj++] = v1[j * bs2 + m + k * bs];
704: }
705: }
706: }
707: }
709: /* B-part */
710: for (j = 0; j < countB; j++) {
711: for (k = 0; k < bs; k++) {
712: for (m = 0; m < bs; m++) {
713: if (reuse == MAT_INITIAL_MATRIX) {
714: PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
715: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
716: }
717: val[jj++] = v2[j * bs2 + m + k * bs];
718: }
719: }
720: }
721: } else {
722: /* A-part */
723: for (j = 0; j < countA; j++) {
724: if (reuse == MAT_INITIAL_MATRIX) {
725: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
726: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
727: }
728: val[jj++] = v1[j];
729: }
731: /* B-part */
732: for (j = 0; j < countB; j++) {
733: if (reuse == MAT_INITIAL_MATRIX) {
734: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
735: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
736: }
737: val[jj++] = v2[j];
738: }
739: }
740: irow += bs;
741: }
742: mumps->nnz = jj;
743: PetscFunctionReturn(PETSC_SUCCESS);
744: }
746: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
747: {
748: const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
749: PetscInt64 rstart, nz, i, j, jj, irow, countA, countB;
750: PetscMUMPSInt *row, *col;
751: const PetscScalar *av, *bv, *v1, *v2;
752: PetscScalar *val;
753: Mat Ad, Ao;
754: Mat_SeqAIJ *aa;
755: Mat_SeqAIJ *bb;
757: PetscFunctionBegin;
758: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
759: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
760: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
762: aa = (Mat_SeqAIJ *)(Ad)->data;
763: bb = (Mat_SeqAIJ *)(Ao)->data;
764: ai = aa->i;
765: aj = aa->j;
766: bi = bb->i;
767: bj = bb->j;
769: rstart = A->rmap->rstart;
771: if (reuse == MAT_INITIAL_MATRIX) {
772: nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
773: PetscCall(PetscMalloc2(nz, &row, nz, &col));
774: PetscCall(PetscMalloc1(nz, &val));
775: mumps->nnz = nz;
776: mumps->irn = row;
777: mumps->jcn = col;
778: mumps->val = mumps->val_alloc = val;
779: } else {
780: val = mumps->val;
781: }
783: jj = 0;
784: irow = rstart;
785: for (i = 0; i < m; i++) {
786: ajj = aj + ai[i]; /* ptr to the beginning of this row */
787: countA = ai[i + 1] - ai[i];
788: countB = bi[i + 1] - bi[i];
789: bjj = bj + bi[i];
790: v1 = av + ai[i];
791: v2 = bv + bi[i];
793: /* A-part */
794: for (j = 0; j < countA; j++) {
795: if (reuse == MAT_INITIAL_MATRIX) {
796: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
797: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
798: }
799: val[jj++] = v1[j];
800: }
802: /* B-part */
803: for (j = 0; j < countB; j++) {
804: if (reuse == MAT_INITIAL_MATRIX) {
805: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
806: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
807: }
808: val[jj++] = v2[j];
809: }
810: irow++;
811: }
812: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
813: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
814: PetscFunctionReturn(PETSC_SUCCESS);
815: }
817: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
818: {
819: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data;
820: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data;
821: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data;
822: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
823: const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
824: const PetscInt bs2 = mat->bs2;
825: PetscInt bs;
826: PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx;
827: PetscMUMPSInt *row, *col;
828: const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
829: PetscScalar *val;
831: PetscFunctionBegin;
832: PetscCall(MatGetBlockSize(A, &bs));
833: if (reuse == MAT_INITIAL_MATRIX) {
834: nz = bs2 * (aa->nz + bb->nz);
835: PetscCall(PetscMalloc2(nz, &row, nz, &col));
836: PetscCall(PetscMalloc1(nz, &val));
837: mumps->nnz = nz;
838: mumps->irn = row;
839: mumps->jcn = col;
840: mumps->val = mumps->val_alloc = val;
841: } else {
842: val = mumps->val;
843: }
845: jj = 0;
846: irow = rstart;
847: for (i = 0; i < mbs; i++) {
848: countA = ai[i + 1] - ai[i];
849: countB = bi[i + 1] - bi[i];
850: ajj = aj + ai[i];
851: bjj = bj + bi[i];
852: v1 = av + bs2 * ai[i];
853: v2 = bv + bs2 * bi[i];
855: idx = 0;
856: /* A-part */
857: for (k = 0; k < countA; k++) {
858: for (j = 0; j < bs; j++) {
859: for (n = 0; n < bs; n++) {
860: if (reuse == MAT_INITIAL_MATRIX) {
861: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
862: PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
863: }
864: val[jj++] = v1[idx++];
865: }
866: }
867: }
869: idx = 0;
870: /* B-part */
871: for (k = 0; k < countB; k++) {
872: for (j = 0; j < bs; j++) {
873: for (n = 0; n < bs; n++) {
874: if (reuse == MAT_INITIAL_MATRIX) {
875: PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
876: PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
877: }
878: val[jj++] = v2[idx++];
879: }
880: }
881: }
882: irow += bs;
883: }
884: PetscFunctionReturn(PETSC_SUCCESS);
885: }
887: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
888: {
889: const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
890: PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
891: PetscMUMPSInt *row, *col;
892: const PetscScalar *av, *bv, *v1, *v2;
893: PetscScalar *val;
894: Mat Ad, Ao;
895: Mat_SeqAIJ *aa;
896: Mat_SeqAIJ *bb;
897: #if defined(PETSC_USE_COMPLEX)
898: PetscBool hermitian, isset;
899: #endif
901: PetscFunctionBegin;
902: #if defined(PETSC_USE_COMPLEX)
903: PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
904: PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
905: #endif
906: PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
907: PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
908: PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
910: aa = (Mat_SeqAIJ *)(Ad)->data;
911: bb = (Mat_SeqAIJ *)(Ao)->data;
912: ai = aa->i;
913: aj = aa->j;
914: adiag = aa->diag;
915: bi = bb->i;
916: bj = bb->j;
918: rstart = A->rmap->rstart;
920: if (reuse == MAT_INITIAL_MATRIX) {
921: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
922: nzb = 0; /* num of upper triangular entries in mat->B */
923: for (i = 0; i < m; i++) {
924: nza += (ai[i + 1] - adiag[i]);
925: countB = bi[i + 1] - bi[i];
926: bjj = bj + bi[i];
927: for (j = 0; j < countB; j++) {
928: if (garray[bjj[j]] > rstart) nzb++;
929: }
930: }
932: nz = nza + nzb; /* total nz of upper triangular part of mat */
933: PetscCall(PetscMalloc2(nz, &row, nz, &col));
934: PetscCall(PetscMalloc1(nz, &val));
935: mumps->nnz = nz;
936: mumps->irn = row;
937: mumps->jcn = col;
938: mumps->val = mumps->val_alloc = val;
939: } else {
940: val = mumps->val;
941: }
943: jj = 0;
944: irow = rstart;
945: for (i = 0; i < m; i++) {
946: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
947: v1 = av + adiag[i];
948: countA = ai[i + 1] - adiag[i];
949: countB = bi[i + 1] - bi[i];
950: bjj = bj + bi[i];
951: v2 = bv + bi[i];
953: /* A-part */
954: for (j = 0; j < countA; j++) {
955: if (reuse == MAT_INITIAL_MATRIX) {
956: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
957: PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
958: }
959: val[jj++] = v1[j];
960: }
962: /* B-part */
963: for (j = 0; j < countB; j++) {
964: if (garray[bjj[j]] > rstart) {
965: if (reuse == MAT_INITIAL_MATRIX) {
966: PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
967: PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
968: }
969: val[jj++] = v2[j];
970: }
971: }
972: irow++;
973: }
974: PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
975: PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
976: PetscFunctionReturn(PETSC_SUCCESS);
977: }
979: PetscErrorCode MatDestroy_MUMPS(Mat A)
980: {
981: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
983: PetscFunctionBegin;
984: PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
985: PetscCall(VecScatterDestroy(&mumps->scat_rhs));
986: PetscCall(VecScatterDestroy(&mumps->scat_sol));
987: PetscCall(VecDestroy(&mumps->b_seq));
988: PetscCall(VecDestroy(&mumps->x_seq));
989: PetscCall(PetscFree(mumps->id.perm_in));
990: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
991: PetscCall(PetscFree(mumps->val_alloc));
992: PetscCall(PetscFree(mumps->info));
993: PetscCall(PetscFree(mumps->ICNTL_pre));
994: PetscCall(PetscFree(mumps->CNTL_pre));
995: PetscCall(MatMumpsResetSchur_Private(mumps));
996: if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
997: mumps->id.job = JOB_END;
998: PetscMUMPS_c(mumps);
999: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1000: if (mumps->mumps_comm != MPI_COMM_NULL) {
1001: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1002: else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1003: }
1004: }
1005: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1006: if (mumps->use_petsc_omp_support) {
1007: PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1008: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1009: PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1010: }
1011: #endif
1012: PetscCall(PetscFree(mumps->ia_alloc));
1013: PetscCall(PetscFree(mumps->ja_alloc));
1014: PetscCall(PetscFree(mumps->recvcount));
1015: PetscCall(PetscFree(mumps->reqs));
1016: PetscCall(PetscFree(mumps->irhs_loc));
1017: PetscCall(PetscFree(A->data));
1019: /* clear composed functions */
1020: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1021: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1022: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1023: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1024: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1025: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1026: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1027: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1028: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1029: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1030: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1031: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1032: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1033: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1034: PetscFunctionReturn(PETSC_SUCCESS);
1035: }
1037: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1038: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1039: {
1040: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1041: const PetscMPIInt ompsize = mumps->omp_comm_size;
1042: PetscInt i, m, M, rstart;
1044: PetscFunctionBegin;
1045: PetscCall(MatGetSize(A, &M, NULL));
1046: PetscCall(MatGetLocalSize(A, &m, NULL));
1047: PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1048: if (ompsize == 1) {
1049: if (!mumps->irhs_loc) {
1050: mumps->nloc_rhs = m;
1051: PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1052: PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1053: for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1054: }
1055: mumps->id.rhs_loc = (MumpsScalar *)array;
1056: } else {
1057: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1058: const PetscInt *ranges;
1059: PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks;
1060: MPI_Group petsc_group, omp_group;
1061: PetscScalar *recvbuf = NULL;
1063: if (mumps->is_omp_master) {
1064: /* Lazily initialize the omp stuff for distributed rhs */
1065: if (!mumps->irhs_loc) {
1066: PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1067: PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1068: PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1069: PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1070: for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1071: PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1073: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1074: mumps->nloc_rhs = 0;
1075: PetscCall(MatGetOwnershipRanges(A, &ranges));
1076: for (j = 0; j < ompsize; j++) {
1077: mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1078: mumps->nloc_rhs += mumps->rhs_nrow[j];
1079: }
1080: PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1081: for (j = k = 0; j < ompsize; j++) {
1082: for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1083: }
1085: PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1086: PetscCallMPI(MPI_Group_free(&petsc_group));
1087: PetscCallMPI(MPI_Group_free(&omp_group));
1088: }
1090: /* Realloc buffers when current nrhs is bigger than what we have met */
1091: if (nrhs > mumps->max_nrhs) {
1092: PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1093: PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1094: mumps->max_nrhs = nrhs;
1095: }
1097: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1098: for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1099: mumps->rhs_disps[0] = 0;
1100: for (j = 1; j < ompsize; j++) {
1101: mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1102: PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1103: }
1104: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1105: }
1107: PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1108: PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1110: if (mumps->is_omp_master) {
1111: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1112: PetscScalar *dst, *dstbase = mumps->rhs_loc;
1113: for (j = 0; j < ompsize; j++) {
1114: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1115: dst = dstbase;
1116: for (i = 0; i < nrhs; i++) {
1117: PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1118: src += mumps->rhs_nrow[j];
1119: dst += mumps->nloc_rhs;
1120: }
1121: dstbase += mumps->rhs_nrow[j];
1122: }
1123: }
1124: mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1125: }
1126: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1127: }
1128: mumps->id.nrhs = nrhs;
1129: mumps->id.nloc_rhs = mumps->nloc_rhs;
1130: mumps->id.lrhs_loc = mumps->nloc_rhs;
1131: mumps->id.irhs_loc = mumps->irhs_loc;
1132: PetscFunctionReturn(PETSC_SUCCESS);
1133: }
1135: PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1136: {
1137: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1138: const PetscScalar *rarray = NULL;
1139: PetscScalar *array;
1140: IS is_iden, is_petsc;
1141: PetscInt i;
1142: PetscBool second_solve = PETSC_FALSE;
1143: static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1145: PetscFunctionBegin;
1146: PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM "
1147: "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",
1148: &cite1));
1149: PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel "
1150: "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",
1151: &cite2));
1153: if (A->factorerrortype) {
1154: PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1155: PetscCall(VecSetInf(x));
1156: PetscFunctionReturn(PETSC_SUCCESS);
1157: }
1159: mumps->id.nrhs = 1;
1160: if (mumps->petsc_size > 1) {
1161: if (mumps->ICNTL20 == 10) {
1162: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1163: PetscCall(VecGetArrayRead(b, &rarray));
1164: PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1165: } else {
1166: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1167: PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1168: PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1169: if (!mumps->myid) {
1170: PetscCall(VecGetArray(mumps->b_seq, &array));
1171: mumps->id.rhs = (MumpsScalar *)array;
1172: }
1173: }
1174: } else { /* petsc_size == 1 */
1175: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1176: PetscCall(VecCopy(b, x));
1177: PetscCall(VecGetArray(x, &array));
1178: mumps->id.rhs = (MumpsScalar *)array;
1179: }
1181: /*
1182: handle condensation step of Schur complement (if any)
1183: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1184: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1185: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1186: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1187: */
1188: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1189: PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1190: second_solve = PETSC_TRUE;
1191: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1192: }
1193: /* solve phase */
1194: mumps->id.job = JOB_SOLVE;
1195: PetscMUMPS_c(mumps);
1196: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1198: /* handle expansion step of Schur complement (if any) */
1199: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1201: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1202: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1203: /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1204: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1205: }
1206: if (!mumps->scat_sol) { /* create scatter scat_sol */
1207: PetscInt *isol2_loc = NULL;
1208: PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1209: PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1210: for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */
1211: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1212: PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1213: PetscCall(ISDestroy(&is_iden));
1214: PetscCall(ISDestroy(&is_petsc));
1215: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1216: }
1218: PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1219: PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1220: }
1222: if (mumps->petsc_size > 1) {
1223: if (mumps->ICNTL20 == 10) {
1224: PetscCall(VecRestoreArrayRead(b, &rarray));
1225: } else if (!mumps->myid) {
1226: PetscCall(VecRestoreArray(mumps->b_seq, &array));
1227: }
1228: } else PetscCall(VecRestoreArray(x, &array));
1230: PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1231: PetscFunctionReturn(PETSC_SUCCESS);
1232: }
1234: PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1235: {
1236: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1237: const PetscMUMPSInt value = mumps->id.ICNTL(9);
1239: PetscFunctionBegin;
1240: mumps->id.ICNTL(9) = 0;
1241: PetscCall(MatSolve_MUMPS(A, b, x));
1242: mumps->id.ICNTL(9) = value;
1243: PetscFunctionReturn(PETSC_SUCCESS);
1244: }
1246: PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1247: {
1248: Mat Bt = NULL;
1249: PetscBool denseX, denseB, flg, flgT;
1250: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1251: PetscInt i, nrhs, M;
1252: PetscScalar *array;
1253: const PetscScalar *rbray;
1254: PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0;
1255: PetscMUMPSInt *isol_loc, *isol_loc_save;
1256: PetscScalar *bray, *sol_loc, *sol_loc_save;
1257: IS is_to, is_from;
1258: PetscInt k, proc, j, m, myrstart;
1259: const PetscInt *rstart;
1260: Vec v_mpi, msol_loc;
1261: VecScatter scat_sol;
1262: Vec b_seq;
1263: VecScatter scat_rhs;
1264: PetscScalar *aa;
1265: PetscInt spnr, *ia, *ja;
1266: Mat_MPIAIJ *b = NULL;
1268: PetscFunctionBegin;
1269: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1270: PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1272: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1273: if (denseB) {
1274: PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1275: mumps->id.ICNTL(20) = 0; /* dense RHS */
1276: } else { /* sparse B */
1277: PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1278: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1279: if (flgT) { /* input B is transpose of actual RHS matrix,
1280: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1281: PetscCall(MatTransposeGetMat(B, &Bt));
1282: } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1283: mumps->id.ICNTL(20) = 1; /* sparse RHS */
1284: }
1286: PetscCall(MatGetSize(B, &M, &nrhs));
1287: mumps->id.nrhs = nrhs;
1288: mumps->id.lrhs = M;
1289: mumps->id.rhs = NULL;
1291: if (mumps->petsc_size == 1) {
1292: PetscScalar *aa;
1293: PetscInt spnr, *ia, *ja;
1294: PetscBool second_solve = PETSC_FALSE;
1296: PetscCall(MatDenseGetArray(X, &array));
1297: mumps->id.rhs = (MumpsScalar *)array;
1299: if (denseB) {
1300: /* copy B to X */
1301: PetscCall(MatDenseGetArrayRead(B, &rbray));
1302: PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1303: PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1304: } else { /* sparse B */
1305: PetscCall(MatSeqAIJGetArray(Bt, &aa));
1306: PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1307: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1308: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1309: mumps->id.rhs_sparse = (MumpsScalar *)aa;
1310: }
1311: /* handle condensation step of Schur complement (if any) */
1312: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1313: second_solve = PETSC_TRUE;
1314: PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1315: }
1316: /* solve phase */
1317: mumps->id.job = JOB_SOLVE;
1318: PetscMUMPS_c(mumps);
1319: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1321: /* handle expansion step of Schur complement (if any) */
1322: if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1323: if (!denseB) { /* sparse B */
1324: PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1325: PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1326: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1327: }
1328: PetscCall(MatDenseRestoreArray(X, &array));
1329: PetscFunctionReturn(PETSC_SUCCESS);
1330: }
1332: /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1333: PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1335: /* create msol_loc to hold mumps local solution */
1336: isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1337: sol_loc_save = (PetscScalar *)mumps->id.sol_loc;
1339: lsol_loc = mumps->id.lsol_loc;
1340: nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1341: PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1342: mumps->id.sol_loc = (MumpsScalar *)sol_loc;
1343: mumps->id.isol_loc = isol_loc;
1345: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1347: if (denseB) {
1348: if (mumps->ICNTL20 == 10) {
1349: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1350: PetscCall(MatDenseGetArrayRead(B, &rbray));
1351: PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1352: PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1353: PetscCall(MatGetLocalSize(B, &m, NULL));
1354: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1355: } else {
1356: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1357: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1358: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1359: 0, re-arrange B into desired order, which is a local operation.
1360: */
1362: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1363: /* wrap dense rhs matrix B into a vector v_mpi */
1364: PetscCall(MatGetLocalSize(B, &m, NULL));
1365: PetscCall(MatDenseGetArray(B, &bray));
1366: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1367: PetscCall(MatDenseRestoreArray(B, &bray));
1369: /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1370: if (!mumps->myid) {
1371: PetscInt *idx;
1372: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1373: PetscCall(PetscMalloc1(nrhs * M, &idx));
1374: PetscCall(MatGetOwnershipRanges(B, &rstart));
1375: k = 0;
1376: for (proc = 0; proc < mumps->petsc_size; proc++) {
1377: for (j = 0; j < nrhs; j++) {
1378: for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1379: }
1380: }
1382: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1383: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1384: PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1385: } else {
1386: PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1387: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1388: PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1389: }
1390: PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1391: PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1392: PetscCall(ISDestroy(&is_to));
1393: PetscCall(ISDestroy(&is_from));
1394: PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1396: if (!mumps->myid) { /* define rhs on the host */
1397: PetscCall(VecGetArray(b_seq, &bray));
1398: mumps->id.rhs = (MumpsScalar *)bray;
1399: PetscCall(VecRestoreArray(b_seq, &bray));
1400: }
1401: }
1402: } else { /* sparse B */
1403: b = (Mat_MPIAIJ *)Bt->data;
1405: /* wrap dense X into a vector v_mpi */
1406: PetscCall(MatGetLocalSize(X, &m, NULL));
1407: PetscCall(MatDenseGetArray(X, &bray));
1408: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1409: PetscCall(MatDenseRestoreArray(X, &bray));
1411: if (!mumps->myid) {
1412: PetscCall(MatSeqAIJGetArray(b->A, &aa));
1413: PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1414: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1415: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1416: mumps->id.rhs_sparse = (MumpsScalar *)aa;
1417: } else {
1418: mumps->id.irhs_ptr = NULL;
1419: mumps->id.irhs_sparse = NULL;
1420: mumps->id.nz_rhs = 0;
1421: mumps->id.rhs_sparse = NULL;
1422: }
1423: }
1425: /* solve phase */
1426: mumps->id.job = JOB_SOLVE;
1427: PetscMUMPS_c(mumps);
1428: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1430: /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1431: PetscCall(MatDenseGetArray(X, &array));
1432: PetscCall(VecPlaceArray(v_mpi, array));
1434: /* create scatter scat_sol */
1435: PetscCall(MatGetOwnershipRanges(X, &rstart));
1436: /* iidx: index for scatter mumps solution to petsc X */
1438: PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1439: PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1440: for (i = 0; i < lsol_loc; i++) {
1441: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1443: for (proc = 0; proc < mumps->petsc_size; proc++) {
1444: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1445: myrstart = rstart[proc];
1446: k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1447: iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1448: m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1449: break;
1450: }
1451: }
1453: for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1454: }
1455: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1456: PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1457: PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1458: PetscCall(ISDestroy(&is_from));
1459: PetscCall(ISDestroy(&is_to));
1460: PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1461: PetscCall(MatDenseRestoreArray(X, &array));
1463: /* free spaces */
1464: mumps->id.sol_loc = (MumpsScalar *)sol_loc_save;
1465: mumps->id.isol_loc = isol_loc_save;
1467: PetscCall(PetscFree2(sol_loc, isol_loc));
1468: PetscCall(PetscFree(idxx));
1469: PetscCall(VecDestroy(&msol_loc));
1470: PetscCall(VecDestroy(&v_mpi));
1471: if (!denseB) {
1472: if (!mumps->myid) {
1473: b = (Mat_MPIAIJ *)Bt->data;
1474: PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1475: PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1476: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1477: }
1478: } else {
1479: if (mumps->ICNTL20 == 0) {
1480: PetscCall(VecDestroy(&b_seq));
1481: PetscCall(VecScatterDestroy(&scat_rhs));
1482: }
1483: }
1484: PetscCall(VecScatterDestroy(&scat_sol));
1485: PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1486: PetscFunctionReturn(PETSC_SUCCESS);
1487: }
1489: PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1490: {
1491: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1492: const PetscMUMPSInt value = mumps->id.ICNTL(9);
1494: PetscFunctionBegin;
1495: mumps->id.ICNTL(9) = 0;
1496: PetscCall(MatMatSolve_MUMPS(A, B, X));
1497: mumps->id.ICNTL(9) = value;
1498: PetscFunctionReturn(PETSC_SUCCESS);
1499: }
1501: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1502: {
1503: PetscBool flg;
1504: Mat B;
1506: PetscFunctionBegin;
1507: PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1508: PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1510: /* Create B=Bt^T that uses Bt's data structure */
1511: PetscCall(MatCreateTranspose(Bt, &B));
1513: PetscCall(MatMatSolve_MUMPS(A, B, X));
1514: PetscCall(MatDestroy(&B));
1515: PetscFunctionReturn(PETSC_SUCCESS);
1516: }
1518: #if !defined(PETSC_USE_COMPLEX)
1519: /*
1520: input:
1521: F: numeric factor
1522: output:
1523: nneg: total number of negative pivots
1524: nzero: total number of zero pivots
1525: npos: (global dimension of F) - nneg - nzero
1526: */
1527: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1528: {
1529: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1530: PetscMPIInt size;
1532: PetscFunctionBegin;
1533: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1534: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1535: PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1537: if (nneg) *nneg = mumps->id.INFOG(12);
1538: if (nzero || npos) {
1539: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1540: if (nzero) *nzero = mumps->id.INFOG(28);
1541: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1542: }
1543: PetscFunctionReturn(PETSC_SUCCESS);
1544: }
1545: #endif
1547: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1548: {
1549: PetscInt i, nreqs;
1550: PetscMUMPSInt *irn, *jcn;
1551: PetscMPIInt count;
1552: PetscInt64 totnnz, remain;
1553: const PetscInt osize = mumps->omp_comm_size;
1554: PetscScalar *val;
1556: PetscFunctionBegin;
1557: if (osize > 1) {
1558: if (reuse == MAT_INITIAL_MATRIX) {
1559: /* master first gathers counts of nonzeros to receive */
1560: if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1561: PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1563: /* Then each computes number of send/recvs */
1564: if (mumps->is_omp_master) {
1565: /* Start from 1 since self communication is not done in MPI */
1566: nreqs = 0;
1567: for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1568: } else {
1569: nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1570: }
1571: PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1573: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1574: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1575: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1576: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1577: */
1578: nreqs = 0; /* counter for actual send/recvs */
1579: if (mumps->is_omp_master) {
1580: for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1581: PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1582: PetscCall(PetscMalloc1(totnnz, &val));
1584: /* Self communication */
1585: PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1586: PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1587: PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1589: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1590: PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1591: PetscCall(PetscFree(mumps->val_alloc));
1592: mumps->nnz = totnnz;
1593: mumps->irn = irn;
1594: mumps->jcn = jcn;
1595: mumps->val = mumps->val_alloc = val;
1597: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1598: jcn += mumps->recvcount[0];
1599: val += mumps->recvcount[0];
1601: /* Remote communication */
1602: for (i = 1; i < osize; i++) {
1603: count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1604: remain = mumps->recvcount[i] - count;
1605: while (count > 0) {
1606: PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1607: PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1608: PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1609: irn += count;
1610: jcn += count;
1611: val += count;
1612: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1613: remain -= count;
1614: }
1615: }
1616: } else {
1617: irn = mumps->irn;
1618: jcn = mumps->jcn;
1619: val = mumps->val;
1620: count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1621: remain = mumps->nnz - count;
1622: while (count > 0) {
1623: PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1624: PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1625: PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1626: irn += count;
1627: jcn += count;
1628: val += count;
1629: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1630: remain -= count;
1631: }
1632: }
1633: } else {
1634: nreqs = 0;
1635: if (mumps->is_omp_master) {
1636: val = mumps->val + mumps->recvcount[0];
1637: for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1638: count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1639: remain = mumps->recvcount[i] - count;
1640: while (count > 0) {
1641: PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1642: val += count;
1643: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1644: remain -= count;
1645: }
1646: }
1647: } else {
1648: val = mumps->val;
1649: count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1650: remain = mumps->nnz - count;
1651: while (count > 0) {
1652: PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1653: val += count;
1654: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1655: remain -= count;
1656: }
1657: }
1658: }
1659: PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1660: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1661: }
1662: PetscFunctionReturn(PETSC_SUCCESS);
1663: }
1665: PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1666: {
1667: Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1668: PetscBool isMPIAIJ;
1670: PetscFunctionBegin;
1671: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1672: if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1673: PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1674: PetscFunctionReturn(PETSC_SUCCESS);
1675: }
1677: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1678: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1680: /* numerical factorization phase */
1681: mumps->id.job = JOB_FACTNUMERIC;
1682: if (!mumps->id.ICNTL(18)) { /* A is centralized */
1683: if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1684: } else {
1685: mumps->id.a_loc = (MumpsScalar *)mumps->val;
1686: }
1687: PetscMUMPS_c(mumps);
1688: if (mumps->id.INFOG(1) < 0) {
1689: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1690: if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1691: PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1692: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1693: } else if (mumps->id.INFOG(1) == -13) {
1694: PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1695: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1696: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1697: PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1698: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1699: } else {
1700: PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1701: F->factorerrortype = MAT_FACTOR_OTHER;
1702: }
1703: }
1704: PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, " mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1706: F->assembled = PETSC_TRUE;
1708: if (F->schur) { /* reset Schur status to unfactored */
1709: #if defined(PETSC_HAVE_CUDA)
1710: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1711: #endif
1712: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1713: mumps->id.ICNTL(19) = 2;
1714: PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1715: }
1716: PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1717: }
1719: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1720: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1722: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1723: if (mumps->petsc_size > 1) {
1724: PetscInt lsol_loc;
1725: PetscScalar *sol_loc;
1727: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1729: /* distributed solution; Create x_seq=sol_loc for repeated use */
1730: if (mumps->x_seq) {
1731: PetscCall(VecScatterDestroy(&mumps->scat_sol));
1732: PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1733: PetscCall(VecDestroy(&mumps->x_seq));
1734: }
1735: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1736: PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1737: mumps->id.lsol_loc = lsol_loc;
1738: mumps->id.sol_loc = (MumpsScalar *)sol_loc;
1739: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1740: }
1741: PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1742: PetscFunctionReturn(PETSC_SUCCESS);
1743: }
1745: /* Sets MUMPS options from the options database */
1746: PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1747: {
1748: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1749: PetscMUMPSInt icntl = 0, size, *listvar_schur;
1750: PetscInt info[80], i, ninfo = 80, rbs, cbs;
1751: PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1752: MumpsScalar *arr;
1754: PetscFunctionBegin;
1755: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1756: if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1757: PetscInt nthreads = 0;
1758: PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1759: PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1761: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1762: PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1763: PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1765: PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1766: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1767: /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1768: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1769: if (mumps->use_petsc_omp_support) {
1770: PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1771: ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1772: PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1773: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1774: PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1775: PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1776: #endif
1777: } else {
1778: mumps->omp_comm = PETSC_COMM_SELF;
1779: mumps->mumps_comm = mumps->petsc_comm;
1780: mumps->is_omp_master = PETSC_TRUE;
1781: }
1782: PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1783: mumps->reqs = NULL;
1784: mumps->tag = 0;
1786: if (mumps->mumps_comm != MPI_COMM_NULL) {
1787: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1788: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1789: MPI_Comm comm;
1790: PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1791: mumps->mumps_comm = comm;
1792: } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1793: }
1795: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1796: mumps->id.job = JOB_INIT;
1797: mumps->id.par = 1; /* host participates factorizaton and solve */
1798: mumps->id.sym = mumps->sym;
1800: size = mumps->id.size_schur;
1801: arr = mumps->id.schur;
1802: listvar_schur = mumps->id.listvar_schur;
1803: PetscMUMPS_c(mumps);
1804: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1805: /* restore cached ICNTL and CNTL values */
1806: for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1807: for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1808: PetscCall(PetscFree(mumps->ICNTL_pre));
1809: PetscCall(PetscFree(mumps->CNTL_pre));
1811: if (schur) {
1812: mumps->id.size_schur = size;
1813: mumps->id.schur_lld = size;
1814: mumps->id.schur = arr;
1815: mumps->id.listvar_schur = listvar_schur;
1816: if (mumps->petsc_size > 1) {
1817: PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1819: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
1820: gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1821: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1822: PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1823: } else {
1824: if (F->factortype == MAT_FACTOR_LU) {
1825: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1826: } else {
1827: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1828: }
1829: }
1830: mumps->id.ICNTL(26) = -1;
1831: }
1833: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1834: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1835: */
1836: PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1837: PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1839: mumps->scat_rhs = NULL;
1840: mumps->scat_sol = NULL;
1842: /* set PETSc-MUMPS default options - override MUMPS default */
1843: mumps->id.ICNTL(3) = 0;
1844: mumps->id.ICNTL(4) = 0;
1845: if (mumps->petsc_size == 1) {
1846: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1847: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1848: } else {
1849: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1850: mumps->id.ICNTL(21) = 1; /* distributed solution */
1851: }
1852: }
1853: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1854: if (flg) mumps->id.ICNTL(1) = icntl;
1855: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1856: if (flg) mumps->id.ICNTL(2) = icntl;
1857: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1858: if (flg) mumps->id.ICNTL(3) = icntl;
1860: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1861: if (flg) mumps->id.ICNTL(4) = icntl;
1862: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1864: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1865: if (flg) mumps->id.ICNTL(6) = icntl;
1867: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1868: if (flg) {
1869: PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1870: mumps->id.ICNTL(7) = icntl;
1871: }
1873: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1874: /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1875: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1876: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1877: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1878: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1879: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1880: PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1881: if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1882: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1883: if (flg) {
1884: PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1885: PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1886: }
1887: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1888: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1889: PetscCall(MatDestroy(&F->schur));
1890: PetscCall(MatMumpsResetSchur_Private(mumps));
1891: }
1893: /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1894: and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1895: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1896: This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1897: see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1898: In short, we could not use distributed RHS with MPICH until v4.0b1.
1899: */
1900: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1901: mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1902: #else
1903: mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1904: #endif
1905: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1906: PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1907: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1908: PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1909: #endif
1910: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1912: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1913: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1914: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1915: if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1917: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1918: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1919: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1920: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1921: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1922: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1923: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1924: /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */
1925: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1926: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1927: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1928: PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1930: PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1931: PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1932: PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1933: PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1934: PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1935: PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1937: PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1939: PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1940: if (ninfo) {
1941: PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1942: PetscCall(PetscMalloc1(ninfo, &mumps->info));
1943: mumps->ninfo = ninfo;
1944: for (i = 0; i < ninfo; i++) {
1945: PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1946: mumps->info[i] = info[i];
1947: }
1948: }
1949: PetscOptionsEnd();
1950: PetscFunctionReturn(PETSC_SUCCESS);
1951: }
1953: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1954: {
1955: PetscFunctionBegin;
1956: if (mumps->id.INFOG(1) < 0) {
1957: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1958: if (mumps->id.INFOG(1) == -6) {
1959: PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1960: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1961: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1962: PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1963: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1964: } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1965: PetscCall(PetscInfo(F, "Empty matrix\n"));
1966: } else {
1967: PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1968: F->factorerrortype = MAT_FACTOR_OTHER;
1969: }
1970: }
1971: PetscFunctionReturn(PETSC_SUCCESS);
1972: }
1974: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1975: {
1976: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1977: Vec b;
1978: const PetscInt M = A->rmap->N;
1980: PetscFunctionBegin;
1981: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1982: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1983: PetscFunctionReturn(PETSC_SUCCESS);
1984: }
1986: /* Set MUMPS options from the options database */
1987: PetscCall(MatSetFromOptions_MUMPS(F, A));
1989: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1990: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1992: /* analysis phase */
1993: mumps->id.job = JOB_FACTSYMBOLIC;
1994: mumps->id.n = M;
1995: switch (mumps->id.ICNTL(18)) {
1996: case 0: /* centralized assembled matrix input */
1997: if (!mumps->myid) {
1998: mumps->id.nnz = mumps->nnz;
1999: mumps->id.irn = mumps->irn;
2000: mumps->id.jcn = mumps->jcn;
2001: if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2002: if (r) {
2003: mumps->id.ICNTL(7) = 1;
2004: if (!mumps->myid) {
2005: const PetscInt *idx;
2006: PetscInt i;
2008: PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2009: PetscCall(ISGetIndices(r, &idx));
2010: for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2011: PetscCall(ISRestoreIndices(r, &idx));
2012: }
2013: }
2014: }
2015: break;
2016: case 3: /* distributed assembled matrix input (size>1) */
2017: mumps->id.nnz_loc = mumps->nnz;
2018: mumps->id.irn_loc = mumps->irn;
2019: mumps->id.jcn_loc = mumps->jcn;
2020: if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2021: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2022: PetscCall(MatCreateVecs(A, NULL, &b));
2023: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2024: PetscCall(VecDestroy(&b));
2025: }
2026: break;
2027: }
2028: PetscMUMPS_c(mumps);
2029: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2031: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2032: F->ops->solve = MatSolve_MUMPS;
2033: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2034: F->ops->matsolve = MatMatSolve_MUMPS;
2035: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2036: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2038: mumps->matstruc = SAME_NONZERO_PATTERN;
2039: PetscFunctionReturn(PETSC_SUCCESS);
2040: }
2042: /* Note the Petsc r and c permutations are ignored */
2043: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2044: {
2045: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2046: Vec b;
2047: const PetscInt M = A->rmap->N;
2049: PetscFunctionBegin;
2050: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2051: /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2052: PetscFunctionReturn(PETSC_SUCCESS);
2053: }
2055: /* Set MUMPS options from the options database */
2056: PetscCall(MatSetFromOptions_MUMPS(F, A));
2058: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2059: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2061: /* analysis phase */
2062: mumps->id.job = JOB_FACTSYMBOLIC;
2063: mumps->id.n = M;
2064: switch (mumps->id.ICNTL(18)) {
2065: case 0: /* centralized assembled matrix input */
2066: if (!mumps->myid) {
2067: mumps->id.nnz = mumps->nnz;
2068: mumps->id.irn = mumps->irn;
2069: mumps->id.jcn = mumps->jcn;
2070: if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2071: }
2072: break;
2073: case 3: /* distributed assembled matrix input (size>1) */
2074: mumps->id.nnz_loc = mumps->nnz;
2075: mumps->id.irn_loc = mumps->irn;
2076: mumps->id.jcn_loc = mumps->jcn;
2077: if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2078: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2079: PetscCall(MatCreateVecs(A, NULL, &b));
2080: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2081: PetscCall(VecDestroy(&b));
2082: }
2083: break;
2084: }
2085: PetscMUMPS_c(mumps);
2086: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2088: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2089: F->ops->solve = MatSolve_MUMPS;
2090: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2091: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2093: mumps->matstruc = SAME_NONZERO_PATTERN;
2094: PetscFunctionReturn(PETSC_SUCCESS);
2095: }
2097: /* Note the Petsc r permutation and factor info are ignored */
2098: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2099: {
2100: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2101: Vec b;
2102: const PetscInt M = A->rmap->N;
2104: PetscFunctionBegin;
2105: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2106: /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2107: PetscFunctionReturn(PETSC_SUCCESS);
2108: }
2110: /* Set MUMPS options from the options database */
2111: PetscCall(MatSetFromOptions_MUMPS(F, A));
2113: PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2114: PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2116: /* analysis phase */
2117: mumps->id.job = JOB_FACTSYMBOLIC;
2118: mumps->id.n = M;
2119: switch (mumps->id.ICNTL(18)) {
2120: case 0: /* centralized assembled matrix input */
2121: if (!mumps->myid) {
2122: mumps->id.nnz = mumps->nnz;
2123: mumps->id.irn = mumps->irn;
2124: mumps->id.jcn = mumps->jcn;
2125: if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2126: }
2127: break;
2128: case 3: /* distributed assembled matrix input (size>1) */
2129: mumps->id.nnz_loc = mumps->nnz;
2130: mumps->id.irn_loc = mumps->irn;
2131: mumps->id.jcn_loc = mumps->jcn;
2132: if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2133: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2134: PetscCall(MatCreateVecs(A, NULL, &b));
2135: PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2136: PetscCall(VecDestroy(&b));
2137: }
2138: break;
2139: }
2140: PetscMUMPS_c(mumps);
2141: PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2143: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2144: F->ops->solve = MatSolve_MUMPS;
2145: F->ops->solvetranspose = MatSolve_MUMPS;
2146: F->ops->matsolve = MatMatSolve_MUMPS;
2147: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2148: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2149: #if defined(PETSC_USE_COMPLEX)
2150: F->ops->getinertia = NULL;
2151: #else
2152: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2153: #endif
2155: mumps->matstruc = SAME_NONZERO_PATTERN;
2156: PetscFunctionReturn(PETSC_SUCCESS);
2157: }
2159: PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2160: {
2161: PetscBool iascii;
2162: PetscViewerFormat format;
2163: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2165: PetscFunctionBegin;
2166: /* check if matrix is mumps type */
2167: if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2169: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2170: if (iascii) {
2171: PetscCall(PetscViewerGetFormat(viewer, &format));
2172: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2173: PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2174: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2175: PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym));
2176: PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par));
2177: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1)));
2178: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2179: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3)));
2180: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4)));
2181: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5)));
2182: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6)));
2183: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2184: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8)));
2185: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10)));
2186: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11)));
2187: if (mumps->id.ICNTL(11) > 0) {
2188: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4)));
2189: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5)));
2190: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6)));
2191: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2192: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9)));
2193: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2194: }
2195: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12)));
2196: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13)));
2197: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2198: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15)));
2199: /* ICNTL(15-17) not used */
2200: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18)));
2201: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19)));
2202: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20)));
2203: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21)));
2204: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22)));
2205: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2207: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24)));
2208: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25)));
2209: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26)));
2210: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27)));
2211: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28)));
2212: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29)));
2214: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30)));
2215: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31)));
2216: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33)));
2217: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35)));
2218: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36)));
2219: PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38)));
2221: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1)));
2222: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2223: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3)));
2224: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4)));
2225: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5)));
2226: PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7)));
2228: /* information local to each processor */
2229: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2230: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2231: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2232: PetscCall(PetscViewerFlush(viewer));
2233: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2234: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2235: PetscCall(PetscViewerFlush(viewer));
2236: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2237: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2238: PetscCall(PetscViewerFlush(viewer));
2240: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2241: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2242: PetscCall(PetscViewerFlush(viewer));
2244: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2245: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2246: PetscCall(PetscViewerFlush(viewer));
2248: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2249: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2250: PetscCall(PetscViewerFlush(viewer));
2252: if (mumps->ninfo && mumps->ninfo <= 80) {
2253: PetscInt i;
2254: for (i = 0; i < mumps->ninfo; i++) {
2255: PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2256: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2257: PetscCall(PetscViewerFlush(viewer));
2258: }
2259: }
2260: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2261: } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2263: if (mumps->myid == 0) { /* information from the host */
2264: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2265: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2266: PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2267: PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2269: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2270: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2271: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2272: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2273: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2274: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2275: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2276: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2277: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2278: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2279: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2280: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2281: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2282: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2283: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2284: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2285: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2286: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2287: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2288: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2289: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2290: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2291: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2292: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2293: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2294: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2295: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2296: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2297: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2298: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2299: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2300: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2301: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2302: PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2303: }
2304: }
2305: }
2306: PetscFunctionReturn(PETSC_SUCCESS);
2307: }
2309: PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2310: {
2311: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2313: PetscFunctionBegin;
2314: info->block_size = 1.0;
2315: info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2316: info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2317: info->nz_unneeded = 0.0;
2318: info->assemblies = 0.0;
2319: info->mallocs = 0.0;
2320: info->memory = 0.0;
2321: info->fill_ratio_given = 0;
2322: info->fill_ratio_needed = 0;
2323: info->factor_mallocs = 0;
2324: PetscFunctionReturn(PETSC_SUCCESS);
2325: }
2327: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2328: {
2329: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2330: const PetscScalar *arr;
2331: const PetscInt *idxs;
2332: PetscInt size, i;
2334: PetscFunctionBegin;
2335: PetscCall(ISGetLocalSize(is, &size));
2336: /* Schur complement matrix */
2337: PetscCall(MatDestroy(&F->schur));
2338: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2339: PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2340: mumps->id.schur = (MumpsScalar *)arr;
2341: mumps->id.size_schur = size;
2342: mumps->id.schur_lld = size;
2343: PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2344: if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2346: /* MUMPS expects Fortran style indices */
2347: PetscCall(PetscFree(mumps->id.listvar_schur));
2348: PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2349: PetscCall(ISGetIndices(is, &idxs));
2350: for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2351: PetscCall(ISRestoreIndices(is, &idxs));
2352: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2353: mumps->id.ICNTL(26) = -1;
2354: PetscFunctionReturn(PETSC_SUCCESS);
2355: }
2357: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2358: {
2359: Mat St;
2360: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2361: PetscScalar *array;
2362: #if defined(PETSC_USE_COMPLEX)
2363: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2364: #endif
2366: PetscFunctionBegin;
2367: PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2368: PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2369: PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2370: PetscCall(MatSetType(St, MATDENSE));
2371: PetscCall(MatSetUp(St));
2372: PetscCall(MatDenseGetArray(St, &array));
2373: if (!mumps->sym) { /* MUMPS always return a full matrix */
2374: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2375: PetscInt i, j, N = mumps->id.size_schur;
2376: for (i = 0; i < N; i++) {
2377: for (j = 0; j < N; j++) {
2378: #if !defined(PETSC_USE_COMPLEX)
2379: PetscScalar val = mumps->id.schur[i * N + j];
2380: #else
2381: PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2382: #endif
2383: array[j * N + i] = val;
2384: }
2385: }
2386: } else { /* stored by columns */
2387: PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2388: }
2389: } else { /* either full or lower-triangular (not packed) */
2390: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2391: PetscInt i, j, N = mumps->id.size_schur;
2392: for (i = 0; i < N; i++) {
2393: for (j = i; j < N; j++) {
2394: #if !defined(PETSC_USE_COMPLEX)
2395: PetscScalar val = mumps->id.schur[i * N + j];
2396: #else
2397: PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2398: #endif
2399: array[i * N + j] = val;
2400: array[j * N + i] = val;
2401: }
2402: }
2403: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2404: PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2405: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2406: PetscInt i, j, N = mumps->id.size_schur;
2407: for (i = 0; i < N; i++) {
2408: for (j = 0; j < i + 1; j++) {
2409: #if !defined(PETSC_USE_COMPLEX)
2410: PetscScalar val = mumps->id.schur[i * N + j];
2411: #else
2412: PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2413: #endif
2414: array[i * N + j] = val;
2415: array[j * N + i] = val;
2416: }
2417: }
2418: }
2419: }
2420: PetscCall(MatDenseRestoreArray(St, &array));
2421: *S = St;
2422: PetscFunctionReturn(PETSC_SUCCESS);
2423: }
2425: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2426: {
2427: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2429: PetscFunctionBegin;
2430: if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2431: PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2432: for (i = 0; i < nICNTL_pre; ++i)
2433: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2434: if (i == nICNTL_pre) { /* not already cached */
2435: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2436: else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2437: mumps->ICNTL_pre[0]++;
2438: }
2439: mumps->ICNTL_pre[1 + 2 * i] = icntl;
2440: PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2441: } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2442: PetscFunctionReturn(PETSC_SUCCESS);
2443: }
2445: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2446: {
2447: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2449: PetscFunctionBegin;
2450: if (mumps->id.job == JOB_NULL) {
2451: PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2452: *ival = 0;
2453: for (i = 0; i < nICNTL_pre; ++i) {
2454: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2455: }
2456: } else *ival = mumps->id.ICNTL(icntl);
2457: PetscFunctionReturn(PETSC_SUCCESS);
2458: }
2460: /*@
2461: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2463: Logically Collective
2465: Input Parameters:
2466: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2467: . icntl - index of MUMPS parameter array ICNTL()
2468: - ival - value of MUMPS ICNTL(icntl)
2470: Options Database Key:
2471: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2473: Level: beginner
2475: References:
2476: . * - MUMPS Users' Guide
2478: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2479: @*/
2480: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2481: {
2482: PetscFunctionBegin;
2484: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2487: PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2488: PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2489: PetscFunctionReturn(PETSC_SUCCESS);
2490: }
2492: /*@
2493: MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2495: Logically Collective
2497: Input Parameters:
2498: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2499: - icntl - index of MUMPS parameter array ICNTL()
2501: Output Parameter:
2502: . ival - value of MUMPS ICNTL(icntl)
2504: Level: beginner
2506: References:
2507: . * - MUMPS Users' Guide
2509: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2510: @*/
2511: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2512: {
2513: PetscFunctionBegin;
2515: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2518: PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2519: PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2520: PetscFunctionReturn(PETSC_SUCCESS);
2521: }
2523: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2524: {
2525: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2527: PetscFunctionBegin;
2528: if (mumps->id.job == JOB_NULL) {
2529: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2530: for (i = 0; i < nCNTL_pre; ++i)
2531: if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2532: if (i == nCNTL_pre) {
2533: if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2534: else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2535: mumps->CNTL_pre[0]++;
2536: }
2537: mumps->CNTL_pre[1 + 2 * i] = icntl;
2538: mumps->CNTL_pre[2 + 2 * i] = val;
2539: } else mumps->id.CNTL(icntl) = val;
2540: PetscFunctionReturn(PETSC_SUCCESS);
2541: }
2543: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2544: {
2545: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2547: PetscFunctionBegin;
2548: if (mumps->id.job == JOB_NULL) {
2549: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2550: *val = 0.0;
2551: for (i = 0; i < nCNTL_pre; ++i) {
2552: if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2553: }
2554: } else *val = mumps->id.CNTL(icntl);
2555: PetscFunctionReturn(PETSC_SUCCESS);
2556: }
2558: /*@
2559: MatMumpsSetCntl - Set MUMPS parameter CNTL()
2561: Logically Collective
2563: Input Parameters:
2564: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2565: . icntl - index of MUMPS parameter array CNTL()
2566: - val - value of MUMPS CNTL(icntl)
2568: Options Database Key:
2569: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2571: Level: beginner
2573: References:
2574: . * - MUMPS Users' Guide
2576: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2577: @*/
2578: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2579: {
2580: PetscFunctionBegin;
2582: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2585: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2586: PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2587: PetscFunctionReturn(PETSC_SUCCESS);
2588: }
2590: /*@
2591: MatMumpsGetCntl - Get MUMPS parameter CNTL()
2593: Logically Collective
2595: Input Parameters:
2596: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2597: - icntl - index of MUMPS parameter array CNTL()
2599: Output Parameter:
2600: . val - value of MUMPS CNTL(icntl)
2602: Level: beginner
2604: References:
2605: . * - MUMPS Users' Guide
2607: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2608: @*/
2609: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2610: {
2611: PetscFunctionBegin;
2613: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2616: PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2617: PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2618: PetscFunctionReturn(PETSC_SUCCESS);
2619: }
2621: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2622: {
2623: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2625: PetscFunctionBegin;
2626: *info = mumps->id.INFO(icntl);
2627: PetscFunctionReturn(PETSC_SUCCESS);
2628: }
2630: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2631: {
2632: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2634: PetscFunctionBegin;
2635: *infog = mumps->id.INFOG(icntl);
2636: PetscFunctionReturn(PETSC_SUCCESS);
2637: }
2639: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2640: {
2641: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2643: PetscFunctionBegin;
2644: *rinfo = mumps->id.RINFO(icntl);
2645: PetscFunctionReturn(PETSC_SUCCESS);
2646: }
2648: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2649: {
2650: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2652: PetscFunctionBegin;
2653: *rinfog = mumps->id.RINFOG(icntl);
2654: PetscFunctionReturn(PETSC_SUCCESS);
2655: }
2657: PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2658: {
2659: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2661: PetscFunctionBegin;
2662: PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2663: *size = 0;
2664: *array = NULL;
2665: if (!mumps->myid) {
2666: *size = mumps->id.INFOG(28);
2667: PetscCall(PetscMalloc1(*size, array));
2668: for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2669: }
2670: PetscFunctionReturn(PETSC_SUCCESS);
2671: }
2673: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2674: {
2675: Mat Bt = NULL, Btseq = NULL;
2676: PetscBool flg;
2677: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2678: PetscScalar *aa;
2679: PetscInt spnr, *ia, *ja, M, nrhs;
2681: PetscFunctionBegin;
2683: PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2684: if (flg) {
2685: PetscCall(MatTransposeGetMat(spRHS, &Bt));
2686: } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2688: PetscCall(MatMumpsSetIcntl(F, 30, 1));
2690: if (mumps->petsc_size > 1) {
2691: Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2692: Btseq = b->A;
2693: } else {
2694: Btseq = Bt;
2695: }
2697: PetscCall(MatGetSize(spRHS, &M, &nrhs));
2698: mumps->id.nrhs = nrhs;
2699: mumps->id.lrhs = M;
2700: mumps->id.rhs = NULL;
2702: if (!mumps->myid) {
2703: PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2704: PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2705: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2706: PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2707: mumps->id.rhs_sparse = (MumpsScalar *)aa;
2708: } else {
2709: mumps->id.irhs_ptr = NULL;
2710: mumps->id.irhs_sparse = NULL;
2711: mumps->id.nz_rhs = 0;
2712: mumps->id.rhs_sparse = NULL;
2713: }
2714: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2715: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2717: /* solve phase */
2718: mumps->id.job = JOB_SOLVE;
2719: PetscMUMPS_c(mumps);
2720: PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2722: if (!mumps->myid) {
2723: PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2724: PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2725: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2726: }
2727: PetscFunctionReturn(PETSC_SUCCESS);
2728: }
2730: /*@
2731: MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2733: Logically Collective
2735: Input Parameter:
2736: . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2738: Output Parameter:
2739: . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2741: Level: beginner
2743: References:
2744: . * - MUMPS Users' Guide
2746: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2747: @*/
2748: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2749: {
2750: PetscFunctionBegin;
2752: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2753: PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2754: PetscFunctionReturn(PETSC_SUCCESS);
2755: }
2757: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2758: {
2759: Mat spRHS;
2761: PetscFunctionBegin;
2762: PetscCall(MatCreateTranspose(spRHST, &spRHS));
2763: PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2764: PetscCall(MatDestroy(&spRHS));
2765: PetscFunctionReturn(PETSC_SUCCESS);
2766: }
2768: /*@
2769: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
2771: Logically Collective
2773: Input Parameter:
2774: . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2776: Output Parameter:
2777: . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
2779: Level: beginner
2781: References:
2782: . * - MUMPS Users' Guide
2784: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2785: @*/
2786: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2787: {
2788: PetscBool flg;
2790: PetscFunctionBegin;
2792: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2793: PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2794: PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2796: PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2797: PetscFunctionReturn(PETSC_SUCCESS);
2798: }
2800: /*@
2801: MatMumpsGetInfo - Get MUMPS parameter INFO()
2803: Logically Collective
2805: Input Parameters:
2806: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2807: - icntl - index of MUMPS parameter array INFO()
2809: Output Parameter:
2810: . ival - value of MUMPS INFO(icntl)
2812: Level: beginner
2814: References:
2815: . * - MUMPS Users' Guide
2817: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2818: @*/
2819: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2820: {
2821: PetscFunctionBegin;
2823: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2825: PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2826: PetscFunctionReturn(PETSC_SUCCESS);
2827: }
2829: /*@
2830: MatMumpsGetInfog - Get MUMPS parameter INFOG()
2832: Logically Collective
2834: Input Parameters:
2835: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2836: - icntl - index of MUMPS parameter array INFOG()
2838: Output Parameter:
2839: . ival - value of MUMPS INFOG(icntl)
2841: Level: beginner
2843: References:
2844: . * - MUMPS Users' Guide
2846: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2847: @*/
2848: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2849: {
2850: PetscFunctionBegin;
2852: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2854: PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2855: PetscFunctionReturn(PETSC_SUCCESS);
2856: }
2858: /*@
2859: MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2861: Logically Collective
2863: Input Parameters:
2864: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2865: - icntl - index of MUMPS parameter array RINFO()
2867: Output Parameter:
2868: . val - value of MUMPS RINFO(icntl)
2870: Level: beginner
2872: References:
2873: . * - MUMPS Users' Guide
2875: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2876: @*/
2877: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2878: {
2879: PetscFunctionBegin;
2881: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2883: PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2884: PetscFunctionReturn(PETSC_SUCCESS);
2885: }
2887: /*@
2888: MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2890: Logically Collective
2892: Input Parameters:
2893: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2894: - icntl - index of MUMPS parameter array RINFOG()
2896: Output Parameter:
2897: . val - value of MUMPS RINFOG(icntl)
2899: Level: beginner
2901: References:
2902: . * - MUMPS Users' Guide
2904: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2905: @*/
2906: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2907: {
2908: PetscFunctionBegin;
2910: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2912: PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2913: PetscFunctionReturn(PETSC_SUCCESS);
2914: }
2916: /*@
2917: MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
2919: Logically Collective
2921: Input Parameter:
2922: . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2924: Output Parameters:
2925: + size - local size of the array. The size of the array is non-zero only on the host.
2926: - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
2927: for freeing this array.
2929: Level: beginner
2931: References:
2932: . * - MUMPS Users' Guide
2934: .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2935: @*/
2936: PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
2937: {
2938: PetscFunctionBegin;
2940: PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2943: PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
2944: PetscFunctionReturn(PETSC_SUCCESS);
2945: }
2947: /*MC
2948: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2949: distributed and sequential matrices via the external package MUMPS.
2951: Works with `MATAIJ` and `MATSBAIJ` matrices
2953: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2955: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
2956: See details below.
2958: Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
2960: Options Database Keys:
2961: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2962: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2963: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2964: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2965: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2966: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2967: Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2968: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2969: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2970: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2971: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2972: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2973: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2974: . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
2975: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2976: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2977: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2978: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2979: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2980: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2981: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2982: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2983: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2984: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2985: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2986: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2987: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2988: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2989: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2990: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2991: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2992: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2993: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2994: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2995: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2996: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2997: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2999: Level: beginner
3001: Notes:
3002: MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3003: error if the matrix is Hermitian.
3005: When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3006: `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3008: When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3009: the failure with
3010: .vb
3011: KSPGetPC(ksp,&pc);
3012: PCFactorGetMatrix(pc,&mat);
3013: MatMumpsGetInfo(mat,....);
3014: MatMumpsGetInfog(mat,....); etc.
3015: .ve
3016: Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3018: MUMPS provides 64-bit integer support in two build modes:
3019: full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3020: requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3022: selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3023: MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3024: columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3025: integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3027: With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3029: Two modes to run MUMPS/PETSc with OpenMP
3030: .vb
3031: Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3032: threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3033: .ve
3035: .vb
3036: -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3037: if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3038: .ve
3040: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3041: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3042: (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3043: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3044: (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3046: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3047: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3048: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3049: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3050: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3051: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3052: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3053: MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3054: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3055: problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3056: For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3057: examine the mapping result.
3059: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3060: for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3061: calls `omp_set_num_threads`(m) internally before calling MUMPS.
3063: References:
3064: + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3065: - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3067: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3068: M*/
3070: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3071: {
3072: PetscFunctionBegin;
3073: *type = MATSOLVERMUMPS;
3074: PetscFunctionReturn(PETSC_SUCCESS);
3075: }
3077: /* MatGetFactor for Seq and MPI AIJ matrices */
3078: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3079: {
3080: Mat B;
3081: Mat_MUMPS *mumps;
3082: PetscBool isSeqAIJ;
3083: PetscMPIInt size;
3085: PetscFunctionBegin;
3086: #if defined(PETSC_USE_COMPLEX)
3087: PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3088: #endif
3089: /* Create the factorization matrix */
3090: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3091: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3092: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3093: PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3094: PetscCall(MatSetUp(B));
3096: PetscCall(PetscNew(&mumps));
3098: B->ops->view = MatView_MUMPS;
3099: B->ops->getinfo = MatGetInfo_MUMPS;
3101: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3102: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3103: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3104: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3105: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3106: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3107: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3108: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3109: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3110: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3111: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3112: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3113: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3114: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3116: if (ftype == MAT_FACTOR_LU) {
3117: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3118: B->factortype = MAT_FACTOR_LU;
3119: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3120: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3121: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3122: mumps->sym = 0;
3123: } else {
3124: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3125: B->factortype = MAT_FACTOR_CHOLESKY;
3126: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3127: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3128: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3129: #if defined(PETSC_USE_COMPLEX)
3130: mumps->sym = 2;
3131: #else
3132: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3133: else mumps->sym = 2;
3134: #endif
3135: }
3137: /* set solvertype */
3138: PetscCall(PetscFree(B->solvertype));
3139: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3140: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3141: if (size == 1) {
3142: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3143: B->canuseordering = PETSC_TRUE;
3144: }
3145: B->ops->destroy = MatDestroy_MUMPS;
3146: B->data = (void *)mumps;
3148: *F = B;
3149: mumps->id.job = JOB_NULL;
3150: mumps->ICNTL_pre = NULL;
3151: mumps->CNTL_pre = NULL;
3152: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3153: PetscFunctionReturn(PETSC_SUCCESS);
3154: }
3156: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3157: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3158: {
3159: Mat B;
3160: Mat_MUMPS *mumps;
3161: PetscBool isSeqSBAIJ;
3162: PetscMPIInt size;
3164: PetscFunctionBegin;
3165: #if defined(PETSC_USE_COMPLEX)
3166: PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3167: #endif
3168: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3169: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3170: PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3171: PetscCall(MatSetUp(B));
3173: PetscCall(PetscNew(&mumps));
3174: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3175: if (isSeqSBAIJ) {
3176: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3177: } else {
3178: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3179: }
3181: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3182: B->ops->view = MatView_MUMPS;
3183: B->ops->getinfo = MatGetInfo_MUMPS;
3185: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3186: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3187: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3188: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3189: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3190: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3191: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3192: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3193: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3194: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3195: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3196: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3197: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3198: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3200: B->factortype = MAT_FACTOR_CHOLESKY;
3201: #if defined(PETSC_USE_COMPLEX)
3202: mumps->sym = 2;
3203: #else
3204: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3205: else mumps->sym = 2;
3206: #endif
3208: /* set solvertype */
3209: PetscCall(PetscFree(B->solvertype));
3210: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3211: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3212: if (size == 1) {
3213: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3214: B->canuseordering = PETSC_TRUE;
3215: }
3216: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3217: B->ops->destroy = MatDestroy_MUMPS;
3218: B->data = (void *)mumps;
3220: *F = B;
3221: mumps->id.job = JOB_NULL;
3222: mumps->ICNTL_pre = NULL;
3223: mumps->CNTL_pre = NULL;
3224: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3225: PetscFunctionReturn(PETSC_SUCCESS);
3226: }
3228: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3229: {
3230: Mat B;
3231: Mat_MUMPS *mumps;
3232: PetscBool isSeqBAIJ;
3233: PetscMPIInt size;
3235: PetscFunctionBegin;
3236: /* Create the factorization matrix */
3237: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3238: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3239: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3240: PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3241: PetscCall(MatSetUp(B));
3243: PetscCall(PetscNew(&mumps));
3244: if (ftype == MAT_FACTOR_LU) {
3245: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3246: B->factortype = MAT_FACTOR_LU;
3247: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3248: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3249: mumps->sym = 0;
3250: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3251: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3253: B->ops->view = MatView_MUMPS;
3254: B->ops->getinfo = MatGetInfo_MUMPS;
3256: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3257: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3258: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3259: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3260: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3261: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3262: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3263: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3264: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3265: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3266: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3267: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3268: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3269: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3271: /* set solvertype */
3272: PetscCall(PetscFree(B->solvertype));
3273: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3274: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3275: if (size == 1) {
3276: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3277: B->canuseordering = PETSC_TRUE;
3278: }
3279: B->ops->destroy = MatDestroy_MUMPS;
3280: B->data = (void *)mumps;
3282: *F = B;
3283: mumps->id.job = JOB_NULL;
3284: mumps->ICNTL_pre = NULL;
3285: mumps->CNTL_pre = NULL;
3286: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3287: PetscFunctionReturn(PETSC_SUCCESS);
3288: }
3290: /* MatGetFactor for Seq and MPI SELL matrices */
3291: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3292: {
3293: Mat B;
3294: Mat_MUMPS *mumps;
3295: PetscBool isSeqSELL;
3296: PetscMPIInt size;
3298: PetscFunctionBegin;
3299: /* Create the factorization matrix */
3300: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3301: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3302: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3303: PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3304: PetscCall(MatSetUp(B));
3306: PetscCall(PetscNew(&mumps));
3308: B->ops->view = MatView_MUMPS;
3309: B->ops->getinfo = MatGetInfo_MUMPS;
3311: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3312: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3313: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3314: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3315: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3316: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3317: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3318: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3319: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3320: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3321: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3322: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3324: if (ftype == MAT_FACTOR_LU) {
3325: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3326: B->factortype = MAT_FACTOR_LU;
3327: if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3328: else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3329: mumps->sym = 0;
3330: PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3331: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3333: /* set solvertype */
3334: PetscCall(PetscFree(B->solvertype));
3335: PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3336: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3337: if (size == 1) {
3338: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3339: B->canuseordering = PETSC_TRUE;
3340: }
3341: B->ops->destroy = MatDestroy_MUMPS;
3342: B->data = (void *)mumps;
3344: *F = B;
3345: mumps->id.job = JOB_NULL;
3346: mumps->ICNTL_pre = NULL;
3347: mumps->CNTL_pre = NULL;
3348: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3349: PetscFunctionReturn(PETSC_SUCCESS);
3350: }
3352: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3353: {
3354: PetscFunctionBegin;
3355: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3356: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3357: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3358: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3359: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3360: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3361: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3362: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3363: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3364: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3365: PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3366: PetscFunctionReturn(PETSC_SUCCESS);
3367: }