Actual source code: bddcprivate.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <petsc/private/pcbddcimpl.h>
3: #include <petsc/private/pcbddcprivateimpl.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: PetscScalar *uwork, *data, *U, ds = 0.;
18: PetscReal *sing;
19: PetscBLASInt bM, bN, lwork, lierr, di = 1;
20: PetscInt ulw, i, nr, nc, n;
21: #if defined(PETSC_USE_COMPLEX)
22: PetscReal *rwork2;
23: #endif
25: PetscFunctionBegin;
26: PetscCall(MatGetSize(A, &nr, &nc));
27: if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29: /* workspace */
30: if (!work) {
31: ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
32: PetscCall(PetscMalloc1(ulw, &uwork));
33: } else {
34: ulw = lw;
35: uwork = work;
36: }
37: n = PetscMin(nr, nc);
38: if (!rwork) {
39: PetscCall(PetscMalloc1(n, &sing));
40: } else {
41: sing = rwork;
42: }
44: /* SVD */
45: PetscCall(PetscMalloc1(nr * nr, &U));
46: PetscCall(PetscBLASIntCast(nr, &bM));
47: PetscCall(PetscBLASIntCast(nc, &bN));
48: PetscCall(PetscBLASIntCast(ulw, &lwork));
49: PetscCall(MatDenseGetArray(A, &data));
50: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
51: #if !defined(PETSC_USE_COMPLEX)
52: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
53: #else
54: PetscCall(PetscMalloc1(5 * n, &rwork2));
55: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
56: PetscCall(PetscFree(rwork2));
57: #endif
58: PetscCall(PetscFPTrapPop());
59: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
60: PetscCall(MatDenseRestoreArray(A, &data));
61: for (i = 0; i < n; i++)
62: if (sing[i] < PETSC_SMALL) break;
63: if (!rwork) PetscCall(PetscFree(sing));
64: if (!work) PetscCall(PetscFree(uwork));
65: /* create B */
66: if (!range) {
67: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
68: PetscCall(MatDenseGetArray(*B, &data));
69: PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
70: } else {
71: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
72: PetscCall(MatDenseGetArray(*B, &data));
73: PetscCall(PetscArraycpy(data, U, i * nr));
74: }
75: PetscCall(MatDenseRestoreArray(*B, &data));
76: PetscCall(PetscFree(U));
77: PetscFunctionReturn(PETSC_SUCCESS);
78: }
80: /* TODO REMOVE */
81: #if defined(PRINT_GDET)
82: static int inc = 0;
83: static int lev = 0;
84: #endif
86: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
87: {
88: Mat GE, GEd;
89: PetscInt rsize, csize, esize;
90: PetscScalar *ptr;
92: PetscFunctionBegin;
93: PetscCall(ISGetSize(edge, &esize));
94: if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
95: PetscCall(ISGetSize(extrow, &rsize));
96: PetscCall(ISGetSize(extcol, &csize));
98: /* gradients */
99: ptr = work + 5 * esize;
100: PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
101: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
102: PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
103: PetscCall(MatDestroy(&GE));
105: /* constants */
106: ptr += rsize * csize;
107: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
108: PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
109: PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
110: PetscCall(MatDestroy(&GE));
111: PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
112: PetscCall(MatDestroy(&GEd));
114: if (corners) {
115: Mat GEc;
116: const PetscScalar *vals;
117: PetscScalar v;
119: PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
120: PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
121: PetscCall(MatDenseGetArrayRead(GEd, &vals));
122: /* v = PetscAbsScalar(vals[0]) */;
123: v = 1.;
124: cvals[0] = vals[0] / v;
125: cvals[1] = vals[1] / v;
126: PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
127: PetscCall(MatScale(*GKins, 1. / v));
128: #if defined(PRINT_GDET)
129: {
130: PetscViewer viewer;
131: char filename[256];
132: PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
133: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
134: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
135: PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
136: PetscCall(MatView(GEc, viewer));
137: PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK"));
138: PetscCall(MatView(*GKins, viewer));
139: PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
140: PetscCall(MatView(GEd, viewer));
141: PetscCall(PetscViewerDestroy(&viewer));
142: }
143: #endif
144: PetscCall(MatDestroy(&GEd));
145: PetscCall(MatDestroy(&GEc));
146: }
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }
151: PetscErrorCode PCBDDCNedelecSupport(PC pc)
152: {
153: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
154: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
155: Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
156: Vec tvec;
157: PetscSF sfv;
158: ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
159: MPI_Comm comm;
160: IS lned, primals, allprimals, nedfieldlocal;
161: IS *eedges, *extrows, *extcols, *alleedges;
162: PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
163: PetscScalar *vals, *work;
164: PetscReal *rwork;
165: const PetscInt *idxs, *ii, *jj, *iit, *jjt;
166: PetscInt ne, nv, Lv, order, n, field;
167: PetscInt n_neigh, *neigh, *n_shared, **shared;
168: PetscInt i, j, extmem, cum, maxsize, nee;
169: PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170: PetscInt *sfvleaves, *sfvroots;
171: PetscInt *corners, *cedges;
172: PetscInt *ecount, **eneighs, *vcount, **vneighs;
173: PetscInt *emarks;
174: PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal;
176: PetscFunctionBegin;
177: /* If the discrete gradient is defined for a subset of dofs and global is true,
178: it assumes G is given in global ordering for all the dofs.
179: Otherwise, the ordering is global for the Nedelec field */
180: order = pcbddc->nedorder;
181: conforming = pcbddc->conforming;
182: field = pcbddc->nedfield;
183: global = pcbddc->nedglobal;
184: setprimal = PETSC_FALSE;
185: print = PETSC_FALSE;
186: singular = PETSC_FALSE;
188: /* Command line customization */
189: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
190: PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
191: PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL));
192: PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
193: /* print debug info TODO: to be removed */
194: PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
195: PetscOptionsEnd();
197: /* Return if there are no edges in the decomposition and the problem is not singular */
198: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
199: PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
200: PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
201: if (!singular) {
202: PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
203: lrc[0] = PETSC_FALSE;
204: for (i = 0; i < n; i++) {
205: if (PetscRealPart(vals[i]) > 2.) {
206: lrc[0] = PETSC_TRUE;
207: break;
208: }
209: }
210: PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
211: PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
212: if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: /* Get Nedelec field */
216: PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
217: if (pcbddc->n_ISForDofsLocal && field >= 0) {
218: PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
219: nedfieldlocal = pcbddc->ISForDofsLocal[field];
220: PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
221: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
222: ne = n;
223: nedfieldlocal = NULL;
224: global = PETSC_TRUE;
225: } else if (field == PETSC_DECIDE) {
226: PetscInt rst, ren, *idx;
228: PetscCall(PetscArrayzero(matis->sf_leafdata, n));
229: PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
230: PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
231: for (i = rst; i < ren; i++) {
232: PetscInt nc;
234: PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
235: if (nc > 1) matis->sf_rootdata[i - rst] = 1;
236: PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
237: }
238: PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
239: PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
240: PetscCall(PetscMalloc1(n, &idx));
241: for (i = 0, ne = 0; i < n; i++)
242: if (matis->sf_leafdata[i]) idx[ne++] = i;
243: PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
244: } else {
245: SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
246: }
248: /* Sanity checks */
249: PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
250: PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
251: PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
253: /* Just set primal dofs and return */
254: if (setprimal) {
255: IS enedfieldlocal;
256: PetscInt *eidxs;
258: PetscCall(PetscMalloc1(ne, &eidxs));
259: PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
260: if (nedfieldlocal) {
261: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
262: for (i = 0, cum = 0; i < ne; i++) {
263: if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
264: }
265: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
266: } else {
267: for (i = 0, cum = 0; i < ne; i++) {
268: if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
269: }
270: }
271: PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
272: PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
273: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
274: PetscCall(PetscFree(eidxs));
275: PetscCall(ISDestroy(&nedfieldlocal));
276: PetscCall(ISDestroy(&enedfieldlocal));
277: PetscFunctionReturn(PETSC_SUCCESS);
278: }
280: /* Compute some l2g maps */
281: if (nedfieldlocal) {
282: IS is;
284: /* need to map from the local Nedelec field to local numbering */
285: PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
286: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
287: PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
288: PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
289: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
290: if (global) {
291: PetscCall(PetscObjectReference((PetscObject)al2g));
292: el2g = al2g;
293: } else {
294: IS gis;
296: PetscCall(ISRenumber(is, NULL, NULL, &gis));
297: PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
298: PetscCall(ISDestroy(&gis));
299: }
300: PetscCall(ISDestroy(&is));
301: } else {
302: /* restore default */
303: pcbddc->nedfield = -1;
304: /* one ref for the destruction of al2g, one for el2g */
305: PetscCall(PetscObjectReference((PetscObject)al2g));
306: PetscCall(PetscObjectReference((PetscObject)al2g));
307: el2g = al2g;
308: fl2g = NULL;
309: }
311: /* Start communication to drop connections for interior edges (for cc analysis only) */
312: PetscCall(PetscArrayzero(matis->sf_leafdata, n));
313: PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
314: if (nedfieldlocal) {
315: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
316: for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
317: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
318: } else {
319: for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
320: }
321: PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
322: PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
324: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
325: PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
326: PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
327: if (global) {
328: PetscInt rst;
330: PetscCall(MatGetOwnershipRange(G, &rst, NULL));
331: for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
332: if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
333: }
334: PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
335: PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
336: } else {
337: PetscInt *tbz;
339: PetscCall(PetscMalloc1(ne, &tbz));
340: PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341: PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
342: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
343: for (i = 0, cum = 0; i < ne; i++)
344: if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
345: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
346: PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
347: PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
348: PetscCall(PetscFree(tbz));
349: }
350: } else { /* we need the entire G to infer the nullspace */
351: PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient));
352: G = pcbddc->discretegradient;
353: }
355: /* Extract subdomain relevant rows of G */
356: PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
357: PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
358: PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall));
359: PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
360: PetscCall(ISDestroy(&lned));
361: PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
362: PetscCall(MatDestroy(&lGall));
363: PetscCall(MatISGetLocalMat(lGis, &lG));
365: /* SF for nodal dofs communications */
366: PetscCall(MatGetLocalSize(G, NULL, &Lv));
367: PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
368: PetscCall(PetscObjectReference((PetscObject)vl2g));
369: PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
370: PetscCall(PetscSFCreate(comm, &sfv));
371: PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
372: PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
373: PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
374: i = singular ? 2 : 1;
375: PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots));
377: /* Destroy temporary G created in MATIS format and modified G */
378: PetscCall(PetscObjectReference((PetscObject)lG));
379: PetscCall(MatDestroy(&lGis));
380: PetscCall(MatDestroy(&G));
382: if (print) {
383: PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
384: PetscCall(MatView(lG, NULL));
385: }
387: /* Save lG for values insertion in change of basis */
388: PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
390: /* Analyze the edge-nodes connections (duplicate lG) */
391: PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
392: PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
393: PetscCall(PetscBTCreate(nv, &btv));
394: PetscCall(PetscBTCreate(ne, &bte));
395: PetscCall(PetscBTCreate(ne, &btb));
396: PetscCall(PetscBTCreate(ne, &btbd));
397: PetscCall(PetscBTCreate(nv, &btvcand));
398: /* need to import the boundary specification to ensure the
399: proper detection of coarse edges' endpoints */
400: if (pcbddc->DirichletBoundariesLocal) {
401: IS is;
403: if (fl2g) {
404: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
405: } else {
406: is = pcbddc->DirichletBoundariesLocal;
407: }
408: PetscCall(ISGetLocalSize(is, &cum));
409: PetscCall(ISGetIndices(is, &idxs));
410: for (i = 0; i < cum; i++) {
411: if (idxs[i] >= 0) {
412: PetscCall(PetscBTSet(btb, idxs[i]));
413: PetscCall(PetscBTSet(btbd, idxs[i]));
414: }
415: }
416: PetscCall(ISRestoreIndices(is, &idxs));
417: if (fl2g) PetscCall(ISDestroy(&is));
418: }
419: if (pcbddc->NeumannBoundariesLocal) {
420: IS is;
422: if (fl2g) {
423: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
424: } else {
425: is = pcbddc->NeumannBoundariesLocal;
426: }
427: PetscCall(ISGetLocalSize(is, &cum));
428: PetscCall(ISGetIndices(is, &idxs));
429: for (i = 0; i < cum; i++) {
430: if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i]));
431: }
432: PetscCall(ISRestoreIndices(is, &idxs));
433: if (fl2g) PetscCall(ISDestroy(&is));
434: }
436: /* Count neighs per dof */
437: PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs));
438: PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs));
440: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
441: for proper detection of coarse edges' endpoints */
442: PetscCall(PetscBTCreate(ne, &btee));
443: for (i = 0; i < ne; i++) {
444: if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
445: }
446: PetscCall(PetscMalloc1(ne, &marks));
447: if (!conforming) {
448: PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
449: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
450: }
451: PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
452: PetscCall(MatSeqAIJGetArray(lGe, &vals));
453: cum = 0;
454: for (i = 0; i < ne; i++) {
455: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
456: if (!PetscBTLookup(btee, i)) {
457: marks[cum++] = i;
458: continue;
459: }
460: /* set badly connected edge dofs as primal */
461: if (!conforming) {
462: if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
463: marks[cum++] = i;
464: PetscCall(PetscBTSet(bte, i));
465: for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
466: } else {
467: /* every edge dofs should be connected through a certain number of nodal dofs
468: to other edge dofs belonging to coarse edges
469: - at most 2 endpoints
470: - order-1 interior nodal dofs
471: - no undefined nodal dofs (nconn < order)
472: */
473: PetscInt ends = 0, ints = 0, undef = 0;
474: for (j = ii[i]; j < ii[i + 1]; j++) {
475: PetscInt v = jj[j], k;
476: PetscInt nconn = iit[v + 1] - iit[v];
477: for (k = iit[v]; k < iit[v + 1]; k++)
478: if (!PetscBTLookup(btee, jjt[k])) nconn--;
479: if (nconn > order) ends++;
480: else if (nconn == order) ints++;
481: else undef++;
482: }
483: if (undef || ends > 2 || ints != order - 1) {
484: marks[cum++] = i;
485: PetscCall(PetscBTSet(bte, i));
486: for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
487: }
488: }
489: }
490: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
491: if (!order && ii[i + 1] != ii[i]) {
492: PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
493: for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
494: }
495: }
496: PetscCall(PetscBTDestroy(&btee));
497: PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
498: PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
499: if (!conforming) {
500: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
501: PetscCall(MatDestroy(&lGt));
502: }
503: PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
505: /* identify splitpoints and corner candidates */
506: PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
507: if (print) {
508: PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
509: PetscCall(MatView(lGe, NULL));
510: PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
511: PetscCall(MatView(lGt, NULL));
512: }
513: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
514: PetscCall(MatSeqAIJGetArray(lGt, &vals));
515: for (i = 0; i < nv; i++) {
516: PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
517: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
518: if (!order) { /* variable order */
519: PetscReal vorder = 0.;
521: for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
522: test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
523: PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
524: ord = 1;
525: }
526: PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord);
527: for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
528: if (PetscBTLookup(btbd, jj[j])) {
529: bdir = PETSC_TRUE;
530: break;
531: }
532: if (vc != ecount[jj[j]]) {
533: sneighs = PETSC_FALSE;
534: } else {
535: PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]];
536: for (k = 0; k < vc; k++) {
537: if (vn[k] != en[k]) {
538: sneighs = PETSC_FALSE;
539: break;
540: }
541: }
542: }
543: }
544: if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
545: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
546: PetscCall(PetscBTSet(btv, i));
547: } else if (test == ord) {
548: if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
549: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
550: PetscCall(PetscBTSet(btv, i));
551: } else {
552: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
553: PetscCall(PetscBTSet(btvcand, i));
554: }
555: }
556: }
557: PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
558: PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
559: PetscCall(PetscBTDestroy(&btbd));
561: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
562: if (order != 1) {
563: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
564: PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
565: for (i = 0; i < nv; i++) {
566: if (PetscBTLookup(btvcand, i)) {
567: PetscBool found = PETSC_FALSE;
568: for (j = ii[i]; j < ii[i + 1] && !found; j++) {
569: PetscInt k, e = jj[j];
570: if (PetscBTLookup(bte, e)) continue;
571: for (k = iit[e]; k < iit[e + 1]; k++) {
572: PetscInt v = jjt[k];
573: if (v != i && PetscBTLookup(btvcand, v)) {
574: found = PETSC_TRUE;
575: break;
576: }
577: }
578: }
579: if (!found) {
580: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
581: PetscCall(PetscBTClear(btvcand, i));
582: } else {
583: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
584: }
585: }
586: }
587: PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
588: }
589: PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
590: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
591: PetscCall(MatDestroy(&lGe));
593: /* Get the local G^T explicitly */
594: PetscCall(MatDestroy(&lGt));
595: PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
596: PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
598: /* Mark interior nodal dofs */
599: PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared));
600: PetscCall(PetscBTCreate(nv, &btvi));
601: for (i = 1; i < n_neigh; i++) {
602: for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j]));
603: }
604: PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared));
606: /* communicate corners and splitpoints */
607: PetscCall(PetscMalloc1(nv, &vmarks));
608: PetscCall(PetscArrayzero(sfvleaves, nv));
609: PetscCall(PetscArrayzero(sfvroots, Lv));
610: for (i = 0; i < nv; i++)
611: if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
613: if (print) {
614: IS tbz;
616: cum = 0;
617: for (i = 0; i < nv; i++)
618: if (sfvleaves[i]) vmarks[cum++] = i;
620: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
621: PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
622: PetscCall(ISView(tbz, NULL));
623: PetscCall(ISDestroy(&tbz));
624: }
626: PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
627: PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
628: PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
629: PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
631: /* Zero rows of lGt corresponding to identified corners
632: and interior nodal dofs */
633: cum = 0;
634: for (i = 0; i < nv; i++) {
635: if (sfvleaves[i]) {
636: vmarks[cum++] = i;
637: PetscCall(PetscBTSet(btv, i));
638: }
639: if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
640: }
641: PetscCall(PetscBTDestroy(&btvi));
642: if (print) {
643: IS tbz;
645: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
646: PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
647: PetscCall(ISView(tbz, NULL));
648: PetscCall(ISDestroy(&tbz));
649: }
650: PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
651: PetscCall(PetscFree(vmarks));
652: PetscCall(PetscSFDestroy(&sfv));
653: PetscCall(PetscFree2(sfvleaves, sfvroots));
655: /* Recompute G */
656: PetscCall(MatDestroy(&lG));
657: PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
658: if (print) {
659: PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
660: PetscCall(MatView(lG, NULL));
661: PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
662: PetscCall(MatView(lGt, NULL));
663: }
665: /* Get primal dofs (if any) */
666: cum = 0;
667: for (i = 0; i < ne; i++) {
668: if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
669: }
670: if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
671: PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
672: if (print) {
673: PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
674: PetscCall(ISView(primals, NULL));
675: }
676: PetscCall(PetscBTDestroy(&bte));
677: /* TODO: what if the user passed in some of them ? */
678: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
679: PetscCall(ISDestroy(&primals));
681: /* Compute edge connectivity */
682: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
684: /* Symbolic conn = lG*lGt */
685: PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
686: PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
687: PetscCall(MatProductSetAlgorithm(conn, "default"));
688: PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
689: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
690: PetscCall(MatProductSetFromOptions(conn));
691: PetscCall(MatProductSymbolic(conn));
693: PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
694: if (fl2g) {
695: PetscBT btf;
696: PetscInt *iia, *jja, *iiu, *jju;
697: PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
699: /* create CSR for all local dofs */
700: PetscCall(PetscMalloc1(n + 1, &iia));
701: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
702: PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
703: iiu = pcbddc->mat_graph->xadj;
704: jju = pcbddc->mat_graph->adjncy;
705: } else if (pcbddc->use_local_adj) {
706: rest = PETSC_TRUE;
707: PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
708: } else {
709: free = PETSC_TRUE;
710: PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
711: iiu[0] = 0;
712: for (i = 0; i < n; i++) {
713: iiu[i + 1] = i + 1;
714: jju[i] = -1;
715: }
716: }
718: /* import sizes of CSR */
719: iia[0] = 0;
720: for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
722: /* overwrite entries corresponding to the Nedelec field */
723: PetscCall(PetscBTCreate(n, &btf));
724: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
725: for (i = 0; i < ne; i++) {
726: PetscCall(PetscBTSet(btf, idxs[i]));
727: iia[idxs[i] + 1] = ii[i + 1] - ii[i];
728: }
730: /* iia in CSR */
731: for (i = 0; i < n; i++) iia[i + 1] += iia[i];
733: /* jja in CSR */
734: PetscCall(PetscMalloc1(iia[n], &jja));
735: for (i = 0; i < n; i++)
736: if (!PetscBTLookup(btf, i))
737: for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
739: /* map edge dofs connectivity */
740: if (jj) {
741: PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
742: for (i = 0; i < ne; i++) {
743: PetscInt e = idxs[i];
744: for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
745: }
746: }
747: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
748: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER));
749: if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
750: if (free) PetscCall(PetscFree2(iiu, jju));
751: PetscCall(PetscBTDestroy(&btf));
752: } else {
753: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER));
754: }
756: /* Analyze interface for edge dofs */
757: PetscCall(PCBDDCAnalyzeInterface(pc));
758: pcbddc->mat_graph->twodim = PETSC_FALSE;
760: /* Get coarse edges in the edge space */
761: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
762: PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
764: if (fl2g) {
765: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
766: PetscCall(PetscMalloc1(nee, &eedges));
767: for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
768: } else {
769: eedges = alleedges;
770: primals = allprimals;
771: }
773: /* Mark fine edge dofs with their coarse edge id */
774: PetscCall(PetscArrayzero(marks, ne));
775: PetscCall(ISGetLocalSize(primals, &cum));
776: PetscCall(ISGetIndices(primals, &idxs));
777: for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
778: PetscCall(ISRestoreIndices(primals, &idxs));
779: if (print) {
780: PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
781: PetscCall(ISView(primals, NULL));
782: }
784: maxsize = 0;
785: for (i = 0; i < nee; i++) {
786: PetscInt size, mark = i + 1;
788: PetscCall(ISGetLocalSize(eedges[i], &size));
789: PetscCall(ISGetIndices(eedges[i], &idxs));
790: for (j = 0; j < size; j++) marks[idxs[j]] = mark;
791: PetscCall(ISRestoreIndices(eedges[i], &idxs));
792: maxsize = PetscMax(maxsize, size);
793: }
795: /* Find coarse edge endpoints */
796: PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
797: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
798: for (i = 0; i < nee; i++) {
799: PetscInt mark = i + 1, size;
801: PetscCall(ISGetLocalSize(eedges[i], &size));
802: if (!size && nedfieldlocal) continue;
803: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
804: PetscCall(ISGetIndices(eedges[i], &idxs));
805: if (print) {
806: PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
807: PetscCall(ISView(eedges[i], NULL));
808: }
809: for (j = 0; j < size; j++) {
810: PetscInt k, ee = idxs[j];
811: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee));
812: for (k = ii[ee]; k < ii[ee + 1]; k++) {
813: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k]));
814: if (PetscBTLookup(btv, jj[k])) {
815: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k]));
816: } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
817: PetscInt k2;
818: PetscBool corner = PETSC_FALSE;
819: for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
820: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
821: /* it's a corner if either is connected with an edge dof belonging to a different cc or
822: if the edge dof lie on the natural part of the boundary */
823: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
824: corner = PETSC_TRUE;
825: break;
826: }
827: }
828: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
829: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k]));
830: PetscCall(PetscBTSet(btv, jj[k]));
831: } else {
832: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n"));
833: }
834: }
835: }
836: }
837: PetscCall(ISRestoreIndices(eedges[i], &idxs));
838: }
839: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
840: PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
841: PetscCall(PetscBTDestroy(&btb));
843: /* Reset marked primal dofs */
844: PetscCall(ISGetLocalSize(primals, &cum));
845: PetscCall(ISGetIndices(primals, &idxs));
846: for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
847: PetscCall(ISRestoreIndices(primals, &idxs));
849: /* Now use the initial lG */
850: PetscCall(MatDestroy(&lG));
851: PetscCall(MatDestroy(&lGt));
852: lG = lGinit;
853: PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
855: /* Compute extended cols indices */
856: PetscCall(PetscBTCreate(nv, &btvc));
857: PetscCall(PetscBTCreate(nee, &bter));
858: PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
859: PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
860: i *= maxsize;
861: PetscCall(PetscCalloc1(nee, &extcols));
862: PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
863: eerr = PETSC_FALSE;
864: for (i = 0; i < nee; i++) {
865: PetscInt size, found = 0;
867: cum = 0;
868: PetscCall(ISGetLocalSize(eedges[i], &size));
869: if (!size && nedfieldlocal) continue;
870: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
871: PetscCall(ISGetIndices(eedges[i], &idxs));
872: PetscCall(PetscBTMemzero(nv, btvc));
873: for (j = 0; j < size; j++) {
874: PetscInt k, ee = idxs[j];
875: for (k = ii[ee]; k < ii[ee + 1]; k++) {
876: PetscInt vv = jj[k];
877: if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
878: else if (!PetscBTLookupSet(btvc, vv)) found++;
879: }
880: }
881: PetscCall(ISRestoreIndices(eedges[i], &idxs));
882: PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
883: PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
884: PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
885: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
886: /* it may happen that endpoints are not defined at this point
887: if it is the case, mark this edge for a second pass */
888: if (cum != size - 1 || found != 2) {
889: PetscCall(PetscBTSet(bter, i));
890: if (print) {
891: PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
892: PetscCall(ISView(eedges[i], NULL));
893: PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
894: PetscCall(ISView(extcols[i], NULL));
895: }
896: eerr = PETSC_TRUE;
897: }
898: }
899: /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
900: PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
901: if (done) {
902: PetscInt *newprimals;
904: PetscCall(PetscMalloc1(ne, &newprimals));
905: PetscCall(ISGetLocalSize(primals, &cum));
906: PetscCall(ISGetIndices(primals, &idxs));
907: PetscCall(PetscArraycpy(newprimals, idxs, cum));
908: PetscCall(ISRestoreIndices(primals, &idxs));
909: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
910: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
911: for (i = 0; i < nee; i++) {
912: PetscBool has_candidates = PETSC_FALSE;
913: if (PetscBTLookup(bter, i)) {
914: PetscInt size, mark = i + 1;
916: PetscCall(ISGetLocalSize(eedges[i], &size));
917: PetscCall(ISGetIndices(eedges[i], &idxs));
918: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
919: for (j = 0; j < size; j++) {
920: PetscInt k, ee = idxs[j];
921: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
922: for (k = ii[ee]; k < ii[ee + 1]; k++) {
923: /* set all candidates located on the edge as corners */
924: if (PetscBTLookup(btvcand, jj[k])) {
925: PetscInt k2, vv = jj[k];
926: has_candidates = PETSC_TRUE;
927: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv));
928: PetscCall(PetscBTSet(btv, vv));
929: /* set all edge dofs connected to candidate as primals */
930: for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
931: if (marks[jjt[k2]] == mark) {
932: PetscInt k3, ee2 = jjt[k2];
933: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
934: newprimals[cum++] = ee2;
935: /* finally set the new corners */
936: for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
937: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
938: PetscCall(PetscBTSet(btv, jj[k3]));
939: }
940: }
941: }
942: } else {
943: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
944: }
945: }
946: }
947: if (!has_candidates) { /* circular edge */
948: PetscInt k, ee = idxs[0], *tmarks;
950: PetscCall(PetscCalloc1(ne, &tmarks));
951: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i));
952: for (k = ii[ee]; k < ii[ee + 1]; k++) {
953: PetscInt k2;
954: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k]));
955: PetscCall(PetscBTSet(btv, jj[k]));
956: for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
957: }
958: for (j = 0; j < size; j++) {
959: if (tmarks[idxs[j]] > 1) {
960: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
961: newprimals[cum++] = idxs[j];
962: }
963: }
964: PetscCall(PetscFree(tmarks));
965: }
966: PetscCall(ISRestoreIndices(eedges[i], &idxs));
967: }
968: PetscCall(ISDestroy(&extcols[i]));
969: }
970: PetscCall(PetscFree(extcols));
971: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
972: PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
973: if (fl2g) {
974: PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
975: PetscCall(ISDestroy(&primals));
976: for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
977: PetscCall(PetscFree(eedges));
978: }
979: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
980: PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
981: PetscCall(PetscFree(newprimals));
982: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
983: PetscCall(ISDestroy(&primals));
984: PetscCall(PCBDDCAnalyzeInterface(pc));
985: pcbddc->mat_graph->twodim = PETSC_FALSE;
986: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
987: if (fl2g) {
988: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
989: PetscCall(PetscMalloc1(nee, &eedges));
990: for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
991: } else {
992: eedges = alleedges;
993: primals = allprimals;
994: }
995: PetscCall(PetscCalloc1(nee, &extcols));
997: /* Mark again */
998: PetscCall(PetscArrayzero(marks, ne));
999: for (i = 0; i < nee; i++) {
1000: PetscInt size, mark = i + 1;
1002: PetscCall(ISGetLocalSize(eedges[i], &size));
1003: PetscCall(ISGetIndices(eedges[i], &idxs));
1004: for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1005: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1006: }
1007: if (print) {
1008: PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1009: PetscCall(ISView(primals, NULL));
1010: }
1012: /* Recompute extended cols */
1013: eerr = PETSC_FALSE;
1014: for (i = 0; i < nee; i++) {
1015: PetscInt size;
1017: cum = 0;
1018: PetscCall(ISGetLocalSize(eedges[i], &size));
1019: if (!size && nedfieldlocal) continue;
1020: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1021: PetscCall(ISGetIndices(eedges[i], &idxs));
1022: for (j = 0; j < size; j++) {
1023: PetscInt k, ee = idxs[j];
1024: for (k = ii[ee]; k < ii[ee + 1]; k++)
1025: if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1026: }
1027: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1028: PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1029: PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1030: PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1031: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1032: if (cum != size - 1) {
1033: if (print) {
1034: PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1035: PetscCall(ISView(eedges[i], NULL));
1036: PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1037: PetscCall(ISView(extcols[i], NULL));
1038: }
1039: eerr = PETSC_TRUE;
1040: }
1041: }
1042: }
1043: PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1044: PetscCall(PetscFree2(extrow, gidxs));
1045: PetscCall(PetscBTDestroy(&bter));
1046: if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1047: /* an error should not occur at this point */
1048: PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1050: /* Check the number of endpoints */
1051: PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1052: PetscCall(PetscMalloc1(2 * nee, &corners));
1053: PetscCall(PetscMalloc1(nee, &cedges));
1054: for (i = 0; i < nee; i++) {
1055: PetscInt size, found = 0, gc[2];
1057: /* init with defaults */
1058: cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1059: PetscCall(ISGetLocalSize(eedges[i], &size));
1060: if (!size && nedfieldlocal) continue;
1061: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1062: PetscCall(ISGetIndices(eedges[i], &idxs));
1063: PetscCall(PetscBTMemzero(nv, btvc));
1064: for (j = 0; j < size; j++) {
1065: PetscInt k, ee = idxs[j];
1066: for (k = ii[ee]; k < ii[ee + 1]; k++) {
1067: PetscInt vv = jj[k];
1068: if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1069: PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more then two corners for edge %" PetscInt_FMT, i);
1070: corners[i * 2 + found++] = vv;
1071: }
1072: }
1073: }
1074: if (found != 2) {
1075: PetscInt e;
1076: if (fl2g) {
1077: PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1078: } else {
1079: e = idxs[0];
1080: }
1081: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1082: }
1084: /* get primal dof index on this coarse edge */
1085: PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1086: if (gc[0] > gc[1]) {
1087: PetscInt swap = corners[2 * i];
1088: corners[2 * i] = corners[2 * i + 1];
1089: corners[2 * i + 1] = swap;
1090: }
1091: cedges[i] = idxs[size - 1];
1092: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1093: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1094: }
1095: PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1096: PetscCall(PetscBTDestroy(&btvc));
1098: if (PetscDefined(USE_DEBUG)) {
1099: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1100: not interfere with neighbouring coarse edges */
1101: PetscCall(PetscMalloc1(nee + 1, &emarks));
1102: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1103: for (i = 0; i < nv; i++) {
1104: PetscInt emax = 0, eemax = 0;
1106: if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1107: PetscCall(PetscArrayzero(emarks, nee + 1));
1108: for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1109: for (j = 1; j < nee + 1; j++) {
1110: if (emax < emarks[j]) {
1111: emax = emarks[j];
1112: eemax = j;
1113: }
1114: }
1115: /* not relevant for edges */
1116: if (!eemax) continue;
1118: for (j = ii[i]; j < ii[i + 1]; j++) {
1119: PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1120: }
1121: }
1122: PetscCall(PetscFree(emarks));
1123: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1124: }
1126: /* Compute extended rows indices for edge blocks of the change of basis */
1127: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1128: PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1129: extmem *= maxsize;
1130: PetscCall(PetscMalloc1(extmem * nee, &extrow));
1131: PetscCall(PetscMalloc1(nee, &extrows));
1132: PetscCall(PetscCalloc1(nee, &extrowcum));
1133: for (i = 0; i < nv; i++) {
1134: PetscInt mark = 0, size, start;
1136: if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1137: for (j = ii[i]; j < ii[i + 1]; j++)
1138: if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1140: /* not relevant */
1141: if (!mark) continue;
1143: /* import extended row */
1144: mark--;
1145: start = mark * extmem + extrowcum[mark];
1146: size = ii[i + 1] - ii[i];
1147: PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1148: PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1149: extrowcum[mark] += size;
1150: }
1151: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1152: PetscCall(MatDestroy(&lGt));
1153: PetscCall(PetscFree(marks));
1155: /* Compress extrows */
1156: cum = 0;
1157: for (i = 0; i < nee; i++) {
1158: PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1159: PetscCall(PetscSortRemoveDupsInt(&size, start));
1160: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1161: cum = PetscMax(cum, size);
1162: }
1163: PetscCall(PetscFree(extrowcum));
1164: PetscCall(PetscBTDestroy(&btv));
1165: PetscCall(PetscBTDestroy(&btvcand));
1167: /* Workspace for lapack inner calls and VecSetValues */
1168: PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1170: /* Create change of basis matrix (preallocation can be improved) */
1171: PetscCall(MatCreate(comm, &T));
1172: PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N));
1173: PetscCall(MatSetType(T, MATAIJ));
1174: PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL));
1175: PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL));
1176: PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1177: PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1178: PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1179: PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1181: /* Defaults to identity */
1182: PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
1183: PetscCall(VecSet(tvec, 1.0));
1184: PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES));
1185: PetscCall(VecDestroy(&tvec));
1187: /* Create discrete gradient for the coarser level if needed */
1188: PetscCall(MatDestroy(&pcbddc->nedcG));
1189: PetscCall(ISDestroy(&pcbddc->nedclocal));
1190: if (pcbddc->current_level < pcbddc->max_levels) {
1191: ISLocalToGlobalMapping cel2g, cvl2g;
1192: IS wis, gwis;
1193: PetscInt cnv, cne;
1195: PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1196: if (fl2g) {
1197: PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1198: } else {
1199: PetscCall(PetscObjectReference((PetscObject)wis));
1200: pcbddc->nedclocal = wis;
1201: }
1202: PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1203: PetscCall(ISDestroy(&wis));
1204: PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1205: PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1206: PetscCall(ISDestroy(&wis));
1207: PetscCall(ISDestroy(&gwis));
1209: PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1210: PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1211: PetscCall(ISDestroy(&wis));
1212: PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1213: PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1214: PetscCall(ISDestroy(&wis));
1215: PetscCall(ISDestroy(&gwis));
1217: PetscCall(MatCreate(comm, &pcbddc->nedcG));
1218: PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1219: PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1220: PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1221: PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1222: PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1223: PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1224: PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1225: }
1226: PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1228: #if defined(PRINT_GDET)
1229: inc = 0;
1230: lev = pcbddc->current_level;
1231: #endif
1233: /* Insert values in the change of basis matrix */
1234: for (i = 0; i < nee; i++) {
1235: Mat Gins = NULL, GKins = NULL;
1236: IS cornersis = NULL;
1237: PetscScalar cvals[2];
1239: if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1240: PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1241: if (Gins && GKins) {
1242: const PetscScalar *data;
1243: const PetscInt *rows, *cols;
1244: PetscInt nrh, nch, nrc, ncc;
1246: PetscCall(ISGetIndices(eedges[i], &cols));
1247: /* H1 */
1248: PetscCall(ISGetIndices(extrows[i], &rows));
1249: PetscCall(MatGetSize(Gins, &nrh, &nch));
1250: PetscCall(MatDenseGetArrayRead(Gins, &data));
1251: PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1252: PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1253: PetscCall(ISRestoreIndices(extrows[i], &rows));
1254: /* complement */
1255: PetscCall(MatGetSize(GKins, &nrc, &ncc));
1256: PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1257: PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1258: PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1259: PetscCall(MatDenseGetArrayRead(GKins, &data));
1260: PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1261: PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1263: /* coarse discrete gradient */
1264: if (pcbddc->nedcG) {
1265: PetscInt cols[2];
1267: cols[0] = 2 * i;
1268: cols[1] = 2 * i + 1;
1269: PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1270: }
1271: PetscCall(ISRestoreIndices(eedges[i], &cols));
1272: }
1273: PetscCall(ISDestroy(&extrows[i]));
1274: PetscCall(ISDestroy(&extcols[i]));
1275: PetscCall(ISDestroy(&cornersis));
1276: PetscCall(MatDestroy(&Gins));
1277: PetscCall(MatDestroy(&GKins));
1278: }
1279: PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1281: /* Start assembling */
1282: PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1283: if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1285: /* Free */
1286: if (fl2g) {
1287: PetscCall(ISDestroy(&primals));
1288: for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1289: PetscCall(PetscFree(eedges));
1290: }
1292: /* hack mat_graph with primal dofs on the coarse edges */
1293: {
1294: PCBDDCGraph graph = pcbddc->mat_graph;
1295: PetscInt *oqueue = graph->queue;
1296: PetscInt *ocptr = graph->cptr;
1297: PetscInt ncc, *idxs;
1299: /* find first primal edge */
1300: if (pcbddc->nedclocal) {
1301: PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1302: } else {
1303: if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1304: idxs = cedges;
1305: }
1306: cum = 0;
1307: while (cum < nee && cedges[cum] < 0) cum++;
1309: /* adapt connected components */
1310: PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1311: graph->cptr[0] = 0;
1312: for (i = 0, ncc = 0; i < graph->ncc; i++) {
1313: PetscInt lc = ocptr[i + 1] - ocptr[i];
1314: if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1315: graph->cptr[ncc + 1] = graph->cptr[ncc] + 1;
1316: graph->queue[graph->cptr[ncc]] = cedges[cum];
1317: ncc++;
1318: lc--;
1319: cum++;
1320: while (cum < nee && cedges[cum] < 0) cum++;
1321: }
1322: graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1323: for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1324: ncc++;
1325: }
1326: graph->ncc = ncc;
1327: if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1328: PetscCall(PetscFree2(ocptr, oqueue));
1329: }
1330: PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1331: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1332: PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1333: PetscCall(MatDestroy(&conn));
1335: PetscCall(ISDestroy(&nedfieldlocal));
1336: PetscCall(PetscFree(extrow));
1337: PetscCall(PetscFree2(work, rwork));
1338: PetscCall(PetscFree(corners));
1339: PetscCall(PetscFree(cedges));
1340: PetscCall(PetscFree(extrows));
1341: PetscCall(PetscFree(extcols));
1342: PetscCall(MatDestroy(&lG));
1344: /* Complete assembling */
1345: PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1346: if (pcbddc->nedcG) {
1347: PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1348: #if 0
1349: PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G"));
1350: PetscCall(MatView(pcbddc->nedcG,NULL));
1351: #endif
1352: }
1354: /* set change of basis */
1355: PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular));
1356: PetscCall(MatDestroy(&T));
1358: PetscFunctionReturn(PETSC_SUCCESS);
1359: }
1361: /* the near-null space of BDDC carries information on quadrature weights,
1362: and these can be collinear -> so cheat with MatNullSpaceCreate
1363: and create a suitable set of basis vectors first */
1364: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1365: {
1366: PetscInt i;
1368: PetscFunctionBegin;
1369: for (i = 0; i < nvecs; i++) {
1370: PetscInt first, last;
1372: PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1373: PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1374: if (i >= first && i < last) {
1375: PetscScalar *data;
1376: PetscCall(VecGetArray(quad_vecs[i], &data));
1377: if (!has_const) {
1378: data[i - first] = 1.;
1379: } else {
1380: data[2 * i - first] = 1. / PetscSqrtReal(2.);
1381: data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1382: }
1383: PetscCall(VecRestoreArray(quad_vecs[i], &data));
1384: }
1385: PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1386: }
1387: PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1388: for (i = 0; i < nvecs; i++) { /* reset vectors */
1389: PetscInt first, last;
1390: PetscCall(VecLockReadPop(quad_vecs[i]));
1391: PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1392: if (i >= first && i < last) {
1393: PetscScalar *data;
1394: PetscCall(VecGetArray(quad_vecs[i], &data));
1395: if (!has_const) {
1396: data[i - first] = 0.;
1397: } else {
1398: data[2 * i - first] = 0.;
1399: data[2 * i - first + 1] = 0.;
1400: }
1401: PetscCall(VecRestoreArray(quad_vecs[i], &data));
1402: }
1403: PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1404: PetscCall(VecLockReadPush(quad_vecs[i]));
1405: }
1406: PetscFunctionReturn(PETSC_SUCCESS);
1407: }
1409: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1410: {
1411: Mat loc_divudotp;
1412: Vec p, v, vins, quad_vec, *quad_vecs;
1413: ISLocalToGlobalMapping map;
1414: PetscScalar *vals;
1415: const PetscScalar *array;
1416: PetscInt i, maxneighs = 0, maxsize, *gidxs;
1417: PetscInt n_neigh, *neigh, *n_shared, **shared;
1418: PetscMPIInt rank;
1420: PetscFunctionBegin;
1421: PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1422: for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs);
1423: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
1424: if (!maxneighs) {
1425: PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1426: *nnsp = NULL;
1427: PetscFunctionReturn(PETSC_SUCCESS);
1428: }
1429: maxsize = 0;
1430: for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize);
1431: PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals));
1432: /* create vectors to hold quadrature weights */
1433: PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1434: if (!transpose) {
1435: PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1436: } else {
1437: PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1438: }
1439: PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs));
1440: PetscCall(VecDestroy(&quad_vec));
1441: PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp));
1442: for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i]));
1444: /* compute local quad vec */
1445: PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1446: if (!transpose) {
1447: PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1448: } else {
1449: PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1450: }
1451: PetscCall(VecSet(p, 1.));
1452: if (!transpose) {
1453: PetscCall(MatMultTranspose(loc_divudotp, p, v));
1454: } else {
1455: PetscCall(MatMult(loc_divudotp, p, v));
1456: }
1457: if (vl2l) {
1458: Mat lA;
1459: VecScatter sc;
1461: PetscCall(MatISGetLocalMat(A, &lA));
1462: PetscCall(MatCreateVecs(lA, &vins, NULL));
1463: PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1464: PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1465: PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1466: PetscCall(VecScatterDestroy(&sc));
1467: } else {
1468: vins = v;
1469: }
1470: PetscCall(VecGetArrayRead(vins, &array));
1471: PetscCall(VecDestroy(&p));
1473: /* insert in global quadrature vecs */
1474: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1475: for (i = 1; i < n_neigh; i++) {
1476: const PetscInt *idxs;
1477: PetscInt idx, nn, j;
1479: idxs = shared[i];
1480: nn = n_shared[i];
1481: for (j = 0; j < nn; j++) vals[j] = array[idxs[j]];
1482: PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx));
1483: idx = -(idx + 1);
1484: PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs);
1485: PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs));
1486: PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES));
1487: }
1488: PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1489: PetscCall(VecRestoreArrayRead(vins, &array));
1490: if (vl2l) PetscCall(VecDestroy(&vins));
1491: PetscCall(VecDestroy(&v));
1492: PetscCall(PetscFree2(gidxs, vals));
1494: /* assemble near null space */
1495: for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i]));
1496: for (i = 0; i < maxneighs; i++) {
1497: PetscCall(VecAssemblyEnd(quad_vecs[i]));
1498: PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view"));
1499: PetscCall(VecLockReadPush(quad_vecs[i]));
1500: }
1501: PetscCall(VecDestroyVecs(maxneighs, &quad_vecs));
1502: PetscFunctionReturn(PETSC_SUCCESS);
1503: }
1505: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1506: {
1507: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1509: PetscFunctionBegin;
1510: if (primalv) {
1511: if (pcbddc->user_primal_vertices_local) {
1512: IS list[2], newp;
1514: list[0] = primalv;
1515: list[1] = pcbddc->user_primal_vertices_local;
1516: PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1517: PetscCall(ISSortRemoveDups(newp));
1518: PetscCall(ISDestroy(&list[1]));
1519: pcbddc->user_primal_vertices_local = newp;
1520: } else {
1521: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1522: }
1523: }
1524: PetscFunctionReturn(PETSC_SUCCESS);
1525: }
1527: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1528: {
1529: PetscInt f, *comp = (PetscInt *)ctx;
1531: PetscFunctionBegin;
1532: for (f = 0; f < Nf; f++) out[f] = X[*comp];
1533: PetscFunctionReturn(PETSC_SUCCESS);
1534: }
1536: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1537: {
1538: Vec local, global;
1539: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1540: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
1541: PetscBool monolithic = PETSC_FALSE;
1543: PetscFunctionBegin;
1544: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1545: PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1546: PetscOptionsEnd();
1547: /* need to convert from global to local topology information and remove references to information in global ordering */
1548: PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1549: PetscCall(MatCreateVecs(matis->A, &local, NULL));
1550: PetscCall(VecBindToCPU(global, PETSC_TRUE));
1551: PetscCall(VecBindToCPU(local, PETSC_TRUE));
1552: if (monolithic) { /* just get block size to properly compute vertices */
1553: if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1554: goto boundary;
1555: }
1557: if (pcbddc->user_provided_isfordofs) {
1558: if (pcbddc->n_ISForDofs) {
1559: PetscInt i;
1561: PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1562: for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1563: PetscInt bs;
1565: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1566: PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1567: PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1568: PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1569: }
1570: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1571: pcbddc->n_ISForDofs = 0;
1572: PetscCall(PetscFree(pcbddc->ISForDofs));
1573: }
1574: } else {
1575: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1576: DM dm;
1578: PetscCall(MatGetDM(pc->pmat, &dm));
1579: if (!dm) PetscCall(PCGetDM(pc, &dm));
1580: if (dm) {
1581: IS *fields;
1582: PetscInt nf, i;
1584: PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1585: PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1586: for (i = 0; i < nf; i++) {
1587: PetscInt bs;
1589: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1590: PetscCall(ISGetBlockSize(fields[i], &bs));
1591: PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1592: PetscCall(ISDestroy(&fields[i]));
1593: }
1594: PetscCall(PetscFree(fields));
1595: pcbddc->n_ISForDofsLocal = nf;
1596: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1597: PetscContainer c;
1599: PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1600: if (c) {
1601: MatISLocalFields lf;
1602: PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1603: PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1604: } else { /* fallback, create the default fields if bs > 1 */
1605: PetscInt i, n = matis->A->rmap->n;
1606: PetscCall(MatGetBlockSize(pc->pmat, &i));
1607: if (i > 1) {
1608: pcbddc->n_ISForDofsLocal = i;
1609: PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1610: for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1611: }
1612: }
1613: }
1614: } else {
1615: PetscInt i;
1616: for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1617: }
1618: }
1620: boundary:
1621: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1622: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1623: } else if (pcbddc->DirichletBoundariesLocal) {
1624: PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1625: }
1626: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1627: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1628: } else if (pcbddc->NeumannBoundariesLocal) {
1629: PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1630: }
1631: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1632: PetscCall(VecDestroy(&global));
1633: PetscCall(VecDestroy(&local));
1634: /* detect local disconnected subdomains if requested (use matis->A) */
1635: if (pcbddc->detect_disconnected) {
1636: IS primalv = NULL;
1637: PetscInt i;
1638: PetscBool filter = pcbddc->detect_disconnected_filter;
1640: for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1641: PetscCall(PetscFree(pcbddc->local_subs));
1642: PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1643: PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1644: PetscCall(ISDestroy(&primalv));
1645: }
1646: /* early stage corner detection */
1647: {
1648: DM dm;
1650: PetscCall(MatGetDM(pc->pmat, &dm));
1651: if (!dm) PetscCall(PCGetDM(pc, &dm));
1652: if (dm) {
1653: PetscBool isda;
1655: PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1656: if (isda) {
1657: ISLocalToGlobalMapping l2l;
1658: IS corners;
1659: Mat lA;
1660: PetscBool gl, lo;
1662: {
1663: Vec cvec;
1664: const PetscScalar *coords;
1665: PetscInt dof, n, cdim;
1666: PetscBool memc = PETSC_TRUE;
1668: PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1669: PetscCall(DMGetCoordinates(dm, &cvec));
1670: PetscCall(VecGetLocalSize(cvec, &n));
1671: PetscCall(VecGetBlockSize(cvec, &cdim));
1672: n /= cdim;
1673: PetscCall(PetscFree(pcbddc->mat_graph->coords));
1674: PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1675: PetscCall(VecGetArrayRead(cvec, &coords));
1676: #if defined(PETSC_USE_COMPLEX)
1677: memc = PETSC_FALSE;
1678: #endif
1679: if (dof != 1) memc = PETSC_FALSE;
1680: if (memc) {
1681: PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1682: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1683: PetscReal *bcoords = pcbddc->mat_graph->coords;
1684: PetscInt i, b, d;
1686: for (i = 0; i < n; i++) {
1687: for (b = 0; b < dof; b++) {
1688: for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1689: }
1690: }
1691: }
1692: PetscCall(VecRestoreArrayRead(cvec, &coords));
1693: pcbddc->mat_graph->cdim = cdim;
1694: pcbddc->mat_graph->cnloc = dof * n;
1695: pcbddc->mat_graph->cloc = PETSC_FALSE;
1696: }
1697: PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1698: PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1699: PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1700: PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1701: lo = (PetscBool)(l2l && corners);
1702: PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1703: if (gl) { /* From PETSc's DMDA */
1704: const PetscInt *idx;
1705: PetscInt dof, bs, *idxout, n;
1707: PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1708: PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1709: PetscCall(ISGetLocalSize(corners, &n));
1710: PetscCall(ISGetIndices(corners, &idx));
1711: if (bs == dof) {
1712: PetscCall(PetscMalloc1(n, &idxout));
1713: PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1714: } else { /* the original DMDA local-to-local map have been modified */
1715: PetscInt i, d;
1717: PetscCall(PetscMalloc1(dof * n, &idxout));
1718: for (i = 0; i < n; i++)
1719: for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1720: PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1722: bs = 1;
1723: n *= dof;
1724: }
1725: PetscCall(ISRestoreIndices(corners, &idx));
1726: PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1727: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1728: PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1729: PetscCall(ISDestroy(&corners));
1730: pcbddc->corner_selected = PETSC_TRUE;
1731: pcbddc->corner_selection = PETSC_TRUE;
1732: }
1733: if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1734: }
1735: }
1736: }
1737: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1738: DM dm;
1740: PetscCall(MatGetDM(pc->pmat, &dm));
1741: if (!dm) PetscCall(PCGetDM(pc, &dm));
1742: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1743: Vec vcoords;
1744: PetscSection section;
1745: PetscReal *coords;
1746: PetscInt d, cdim, nl, nf, **ctxs;
1747: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1748: /* debug coordinates */
1749: PetscViewer viewer;
1750: PetscBool flg;
1751: PetscViewerFormat format;
1752: const char *prefix;
1754: PetscCall(DMGetCoordinateDim(dm, &cdim));
1755: PetscCall(DMGetLocalSection(dm, §ion));
1756: PetscCall(PetscSectionGetNumFields(section, &nf));
1757: PetscCall(DMCreateGlobalVector(dm, &vcoords));
1758: PetscCall(VecGetLocalSize(vcoords, &nl));
1759: PetscCall(PetscMalloc1(nl * cdim, &coords));
1760: PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1761: PetscCall(PetscMalloc1(nf, &ctxs[0]));
1762: for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1763: for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1765: /* debug coordinates */
1766: PetscCall(PCGetOptionsPrefix(pc, &prefix));
1767: PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1768: if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1769: for (d = 0; d < cdim; d++) {
1770: PetscInt i;
1771: const PetscScalar *v;
1772: char name[16];
1774: for (i = 0; i < nf; i++) ctxs[i][0] = d;
1775: PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d));
1776: PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1777: PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1778: if (flg) PetscCall(VecView(vcoords, viewer));
1779: PetscCall(VecGetArrayRead(vcoords, &v));
1780: for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1781: PetscCall(VecRestoreArrayRead(vcoords, &v));
1782: }
1783: PetscCall(VecDestroy(&vcoords));
1784: PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1785: PetscCall(PetscFree(coords));
1786: PetscCall(PetscFree(ctxs[0]));
1787: PetscCall(PetscFree2(funcs, ctxs));
1788: if (flg) {
1789: PetscCall(PetscViewerPopFormat(viewer));
1790: PetscCall(PetscViewerDestroy(&viewer));
1791: }
1792: }
1793: }
1794: PetscFunctionReturn(PETSC_SUCCESS);
1795: }
1797: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1798: {
1799: Mat_IS *matis = (Mat_IS *)(pc->pmat->data);
1800: IS nis;
1801: const PetscInt *idxs;
1802: PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1804: PetscFunctionBegin;
1805: PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1806: if (mop == MPI_LAND) {
1807: /* init rootdata with true */
1808: for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1809: } else {
1810: PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1811: }
1812: PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1813: PetscCall(ISGetLocalSize(*is, &nd));
1814: PetscCall(ISGetIndices(*is, &idxs));
1815: for (i = 0; i < nd; i++)
1816: if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1817: PetscCall(ISRestoreIndices(*is, &idxs));
1818: PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
1819: PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
1820: PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
1821: PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
1822: if (mop == MPI_LAND) {
1823: PetscCall(PetscMalloc1(nd, &nidxs));
1824: } else {
1825: PetscCall(PetscMalloc1(n, &nidxs));
1826: }
1827: for (i = 0, nnd = 0; i < n; i++)
1828: if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
1829: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis));
1830: PetscCall(ISDestroy(is));
1831: *is = nis;
1832: PetscFunctionReturn(PETSC_SUCCESS);
1833: }
1835: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
1836: {
1837: PC_IS *pcis = (PC_IS *)(pc->data);
1838: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
1840: PetscFunctionBegin;
1841: if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
1842: if (pcbddc->ChangeOfBasisMatrix) {
1843: Vec swap;
1845: PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
1846: swap = pcbddc->work_change;
1847: pcbddc->work_change = r;
1848: r = swap;
1849: }
1850: PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
1851: PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
1852: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
1853: PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
1854: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
1855: PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
1856: PetscCall(VecSet(z, 0.));
1857: PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
1858: PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
1859: if (pcbddc->ChangeOfBasisMatrix) {
1860: pcbddc->work_change = r;
1861: PetscCall(VecCopy(z, pcbddc->work_change));
1862: PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
1863: }
1864: PetscFunctionReturn(PETSC_SUCCESS);
1865: }
1867: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1868: {
1869: PCBDDCBenignMatMult_ctx ctx;
1870: PetscBool apply_right, apply_left, reset_x;
1872: PetscFunctionBegin;
1873: PetscCall(MatShellGetContext(A, &ctx));
1874: if (transpose) {
1875: apply_right = ctx->apply_left;
1876: apply_left = ctx->apply_right;
1877: } else {
1878: apply_right = ctx->apply_right;
1879: apply_left = ctx->apply_left;
1880: }
1881: reset_x = PETSC_FALSE;
1882: if (apply_right) {
1883: const PetscScalar *ax;
1884: PetscInt nl, i;
1886: PetscCall(VecGetLocalSize(x, &nl));
1887: PetscCall(VecGetArrayRead(x, &ax));
1888: PetscCall(PetscArraycpy(ctx->work, ax, nl));
1889: PetscCall(VecRestoreArrayRead(x, &ax));
1890: for (i = 0; i < ctx->benign_n; i++) {
1891: PetscScalar sum, val;
1892: const PetscInt *idxs;
1893: PetscInt nz, j;
1894: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
1895: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
1896: sum = 0.;
1897: if (ctx->apply_p0) {
1898: val = ctx->work[idxs[nz - 1]];
1899: for (j = 0; j < nz - 1; j++) {
1900: sum += ctx->work[idxs[j]];
1901: ctx->work[idxs[j]] += val;
1902: }
1903: } else {
1904: for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
1905: }
1906: ctx->work[idxs[nz - 1]] -= sum;
1907: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
1908: }
1909: PetscCall(VecPlaceArray(x, ctx->work));
1910: reset_x = PETSC_TRUE;
1911: }
1912: if (transpose) {
1913: PetscCall(MatMultTranspose(ctx->A, x, y));
1914: } else {
1915: PetscCall(MatMult(ctx->A, x, y));
1916: }
1917: if (reset_x) PetscCall(VecResetArray(x));
1918: if (apply_left) {
1919: PetscScalar *ay;
1920: PetscInt i;
1922: PetscCall(VecGetArray(y, &ay));
1923: for (i = 0; i < ctx->benign_n; i++) {
1924: PetscScalar sum, val;
1925: const PetscInt *idxs;
1926: PetscInt nz, j;
1927: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
1928: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
1929: val = -ay[idxs[nz - 1]];
1930: if (ctx->apply_p0) {
1931: sum = 0.;
1932: for (j = 0; j < nz - 1; j++) {
1933: sum += ay[idxs[j]];
1934: ay[idxs[j]] += val;
1935: }
1936: ay[idxs[nz - 1]] += sum;
1937: } else {
1938: for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
1939: ay[idxs[nz - 1]] = 0.;
1940: }
1941: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
1942: }
1943: PetscCall(VecRestoreArray(y, &ay));
1944: }
1945: PetscFunctionReturn(PETSC_SUCCESS);
1946: }
1948: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1949: {
1950: PetscFunctionBegin;
1951: PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
1952: PetscFunctionReturn(PETSC_SUCCESS);
1953: }
1955: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1956: {
1957: PetscFunctionBegin;
1958: PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
1959: PetscFunctionReturn(PETSC_SUCCESS);
1960: }
1962: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1963: {
1964: PC_IS *pcis = (PC_IS *)pc->data;
1965: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1966: PCBDDCBenignMatMult_ctx ctx;
1968: PetscFunctionBegin;
1969: if (!restore) {
1970: Mat A_IB, A_BI;
1971: PetscScalar *work;
1972: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
1974: PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
1975: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
1976: PetscCall(PetscMalloc1(pcis->n, &work));
1977: PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
1978: PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
1979: PetscCall(MatSetType(A_IB, MATSHELL));
1980: PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
1981: PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
1982: PetscCall(PetscNew(&ctx));
1983: PetscCall(MatShellSetContext(A_IB, ctx));
1984: ctx->apply_left = PETSC_TRUE;
1985: ctx->apply_right = PETSC_FALSE;
1986: ctx->apply_p0 = PETSC_FALSE;
1987: ctx->benign_n = pcbddc->benign_n;
1988: if (reuse) {
1989: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1990: ctx->free = PETSC_FALSE;
1991: } else { /* TODO: could be optimized for successive solves */
1992: ISLocalToGlobalMapping N_to_D;
1993: PetscInt i;
1995: PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
1996: PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
1997: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
1998: PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
1999: ctx->free = PETSC_TRUE;
2000: }
2001: ctx->A = pcis->A_IB;
2002: ctx->work = work;
2003: PetscCall(MatSetUp(A_IB));
2004: PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2005: PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2006: pcis->A_IB = A_IB;
2008: /* A_BI as A_IB^T */
2009: PetscCall(MatCreateTranspose(A_IB, &A_BI));
2010: pcbddc->benign_original_mat = pcis->A_BI;
2011: pcis->A_BI = A_BI;
2012: } else {
2013: if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2014: PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2015: PetscCall(MatDestroy(&pcis->A_IB));
2016: pcis->A_IB = ctx->A;
2017: ctx->A = NULL;
2018: PetscCall(MatDestroy(&pcis->A_BI));
2019: pcis->A_BI = pcbddc->benign_original_mat;
2020: pcbddc->benign_original_mat = NULL;
2021: if (ctx->free) {
2022: PetscInt i;
2023: for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2024: PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2025: }
2026: PetscCall(PetscFree(ctx->work));
2027: PetscCall(PetscFree(ctx));
2028: }
2029: PetscFunctionReturn(PETSC_SUCCESS);
2030: }
2032: /* used just in bddc debug mode */
2033: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2034: {
2035: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2036: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2037: Mat An;
2039: PetscFunctionBegin;
2040: PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2041: PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2042: if (is1) {
2043: PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2044: PetscCall(MatDestroy(&An));
2045: } else {
2046: *B = An;
2047: }
2048: PetscFunctionReturn(PETSC_SUCCESS);
2049: }
2051: /* TODO: add reuse flag */
2052: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2053: {
2054: Mat Bt;
2055: PetscScalar *a, *bdata;
2056: const PetscInt *ii, *ij;
2057: PetscInt m, n, i, nnz, *bii, *bij;
2058: PetscBool flg_row;
2060: PetscFunctionBegin;
2061: PetscCall(MatGetSize(A, &n, &m));
2062: PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2063: PetscCall(MatSeqAIJGetArray(A, &a));
2064: nnz = n;
2065: for (i = 0; i < ii[n]; i++) {
2066: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2067: }
2068: PetscCall(PetscMalloc1(n + 1, &bii));
2069: PetscCall(PetscMalloc1(nnz, &bij));
2070: PetscCall(PetscMalloc1(nnz, &bdata));
2071: nnz = 0;
2072: bii[0] = 0;
2073: for (i = 0; i < n; i++) {
2074: PetscInt j;
2075: for (j = ii[i]; j < ii[i + 1]; j++) {
2076: PetscScalar entry = a[j];
2077: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2078: bij[nnz] = ij[j];
2079: bdata[nnz] = entry;
2080: nnz++;
2081: }
2082: }
2083: bii[i + 1] = nnz;
2084: }
2085: PetscCall(MatSeqAIJRestoreArray(A, &a));
2086: PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2087: PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2088: {
2089: Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data);
2090: b->free_a = PETSC_TRUE;
2091: b->free_ij = PETSC_TRUE;
2092: }
2093: if (*B == A) PetscCall(MatDestroy(&A));
2094: *B = Bt;
2095: PetscFunctionReturn(PETSC_SUCCESS);
2096: }
2098: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2099: {
2100: Mat B = NULL;
2101: DM dm;
2102: IS is_dummy, *cc_n;
2103: ISLocalToGlobalMapping l2gmap_dummy;
2104: PCBDDCGraph graph;
2105: PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL;
2106: PetscInt i, n;
2107: PetscInt *xadj, *adjncy;
2108: PetscBool isplex = PETSC_FALSE;
2110: PetscFunctionBegin;
2111: if (ncc) *ncc = 0;
2112: if (cc) *cc = NULL;
2113: if (primalv) *primalv = NULL;
2114: PetscCall(PCBDDCGraphCreate(&graph));
2115: PetscCall(MatGetDM(pc->pmat, &dm));
2116: if (!dm) PetscCall(PCGetDM(pc, &dm));
2117: if (dm) PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex));
2118: if (filter) isplex = PETSC_FALSE;
2120: if (isplex) { /* this code has been modified from plexpartition.c */
2121: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2122: PetscInt *adj = NULL;
2123: IS cellNumbering;
2124: const PetscInt *cellNum;
2125: PetscBool useCone, useClosure;
2126: PetscSection section;
2127: PetscSegBuffer adjBuffer;
2128: PetscSF sfPoint;
2130: PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2131: PetscCall(DMGetPointSF(dm, &sfPoint));
2132: PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2133: /* Build adjacency graph via a section/segbuffer */
2134: PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion));
2135: PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2136: PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2137: /* Always use FVM adjacency to create partitioner graph */
2138: PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2139: PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2140: PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2141: PetscCall(ISGetIndices(cellNumbering, &cellNum));
2142: for (n = 0, p = pStart; p < pEnd; p++) {
2143: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2144: if (nroots > 0) {
2145: if (cellNum[p] < 0) continue;
2146: }
2147: adjSize = PETSC_DETERMINE;
2148: PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2149: for (a = 0; a < adjSize; ++a) {
2150: const PetscInt point = adj[a];
2151: if (pStart <= point && point < pEnd) {
2152: PetscInt *PETSC_RESTRICT pBuf;
2153: PetscCall(PetscSectionAddDof(section, p, 1));
2154: PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2155: *pBuf = point;
2156: }
2157: }
2158: n++;
2159: }
2160: PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2161: /* Derive CSR graph from section/segbuffer */
2162: PetscCall(PetscSectionSetUp(section));
2163: PetscCall(PetscSectionGetStorageSize(section, &size));
2164: PetscCall(PetscMalloc1(n + 1, &xadj));
2165: for (idx = 0, p = pStart; p < pEnd; p++) {
2166: if (nroots > 0) {
2167: if (cellNum[p] < 0) continue;
2168: }
2169: PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++])));
2170: }
2171: xadj[n] = size;
2172: PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2173: /* Clean up */
2174: PetscCall(PetscSegBufferDestroy(&adjBuffer));
2175: PetscCall(PetscSectionDestroy(§ion));
2176: PetscCall(PetscFree(adj));
2177: graph->xadj = xadj;
2178: graph->adjncy = adjncy;
2179: } else {
2180: Mat A;
2181: PetscBool isseqaij, flg_row;
2183: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2184: if (!A->rmap->N || !A->cmap->N) {
2185: PetscCall(PCBDDCGraphDestroy(&graph));
2186: PetscFunctionReturn(PETSC_SUCCESS);
2187: }
2188: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2189: if (!isseqaij && filter) {
2190: PetscBool isseqdense;
2192: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2193: if (!isseqdense) {
2194: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2195: } else { /* TODO: rectangular case and LDA */
2196: PetscScalar *array;
2197: PetscReal chop = 1.e-6;
2199: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2200: PetscCall(MatDenseGetArray(B, &array));
2201: PetscCall(MatGetSize(B, &n, NULL));
2202: for (i = 0; i < n; i++) {
2203: PetscInt j;
2204: for (j = i + 1; j < n; j++) {
2205: PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2206: if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2207: if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2208: }
2209: }
2210: PetscCall(MatDenseRestoreArray(B, &array));
2211: PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2212: }
2213: } else {
2214: PetscCall(PetscObjectReference((PetscObject)A));
2215: B = A;
2216: }
2217: PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2219: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2220: if (filter) {
2221: PetscScalar *data;
2222: PetscInt j, cum;
2224: PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2225: PetscCall(MatSeqAIJGetArray(B, &data));
2226: cum = 0;
2227: for (i = 0; i < n; i++) {
2228: PetscInt t;
2230: for (j = xadj[i]; j < xadj[i + 1]; j++) {
2231: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2232: adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2233: }
2234: t = xadj_filtered[i];
2235: xadj_filtered[i] = cum;
2236: cum += t;
2237: }
2238: PetscCall(MatSeqAIJRestoreArray(B, &data));
2239: graph->xadj = xadj_filtered;
2240: graph->adjncy = adjncy_filtered;
2241: } else {
2242: graph->xadj = xadj;
2243: graph->adjncy = adjncy;
2244: }
2245: }
2246: /* compute local connected components using PCBDDCGraph */
2247: PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2248: PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2249: PetscCall(ISDestroy(&is_dummy));
2250: PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT));
2251: PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2252: PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2253: PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2255: /* partial clean up */
2256: PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2257: if (B) {
2258: PetscBool flg_row;
2259: PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2260: PetscCall(MatDestroy(&B));
2261: }
2262: if (isplex) {
2263: PetscCall(PetscFree(xadj));
2264: PetscCall(PetscFree(adjncy));
2265: }
2267: /* get back data */
2268: if (isplex) {
2269: if (ncc) *ncc = graph->ncc;
2270: if (cc || primalv) {
2271: Mat A;
2272: PetscBT btv, btvt;
2273: PetscSection subSection;
2274: PetscInt *ids, cum, cump, *cids, *pids;
2276: PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2277: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2278: PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2279: PetscCall(PetscBTCreate(A->rmap->n, &btv));
2280: PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2282: cids[0] = 0;
2283: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2284: PetscInt j;
2286: PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2287: for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2288: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2290: PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2291: for (k = 0; k < 2 * size; k += 2) {
2292: PetscInt s, pp, p = closure[k], off, dof, cdof;
2294: PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2295: PetscCall(PetscSectionGetOffset(subSection, p, &off));
2296: PetscCall(PetscSectionGetDof(subSection, p, &dof));
2297: for (s = 0; s < dof - cdof; s++) {
2298: if (PetscBTLookupSet(btvt, off + s)) continue;
2299: if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2300: else pids[cump++] = off + s; /* cross-vertex */
2301: }
2302: PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2303: if (pp != p) {
2304: PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2305: PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2306: PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2307: for (s = 0; s < dof - cdof; s++) {
2308: if (PetscBTLookupSet(btvt, off + s)) continue;
2309: if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2310: else pids[cump++] = off + s; /* cross-vertex */
2311: }
2312: }
2313: }
2314: PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2315: }
2316: cids[i + 1] = cum;
2317: /* mark dofs as already assigned */
2318: for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2319: }
2320: if (cc) {
2321: PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2322: for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2323: *cc = cc_n;
2324: }
2325: if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2326: PetscCall(PetscFree3(ids, cids, pids));
2327: PetscCall(PetscBTDestroy(&btv));
2328: PetscCall(PetscBTDestroy(&btvt));
2329: }
2330: } else {
2331: if (ncc) *ncc = graph->ncc;
2332: if (cc) {
2333: PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2334: for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2335: *cc = cc_n;
2336: }
2337: }
2338: /* clean up graph */
2339: graph->xadj = NULL;
2340: graph->adjncy = NULL;
2341: PetscCall(PCBDDCGraphDestroy(&graph));
2342: PetscFunctionReturn(PETSC_SUCCESS);
2343: }
2345: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2346: {
2347: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2348: PC_IS *pcis = (PC_IS *)(pc->data);
2349: IS dirIS = NULL;
2350: PetscInt i;
2352: PetscFunctionBegin;
2353: PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2354: if (zerodiag) {
2355: Mat A;
2356: Vec vec3_N;
2357: PetscScalar *vals;
2358: const PetscInt *idxs;
2359: PetscInt nz, *count;
2361: /* p0 */
2362: PetscCall(VecSet(pcis->vec1_N, 0.));
2363: PetscCall(PetscMalloc1(pcis->n, &vals));
2364: PetscCall(ISGetLocalSize(zerodiag, &nz));
2365: PetscCall(ISGetIndices(zerodiag, &idxs));
2366: for (i = 0; i < nz; i++) vals[i] = 1.;
2367: PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2368: PetscCall(VecAssemblyBegin(pcis->vec1_N));
2369: PetscCall(VecAssemblyEnd(pcis->vec1_N));
2370: /* v_I */
2371: PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2372: for (i = 0; i < nz; i++) vals[i] = 0.;
2373: PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2374: PetscCall(ISRestoreIndices(zerodiag, &idxs));
2375: PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2376: for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2377: PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2378: PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2379: if (dirIS) {
2380: PetscInt n;
2382: PetscCall(ISGetLocalSize(dirIS, &n));
2383: PetscCall(ISGetIndices(dirIS, &idxs));
2384: for (i = 0; i < n; i++) vals[i] = 0.;
2385: PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2386: PetscCall(ISRestoreIndices(dirIS, &idxs));
2387: }
2388: PetscCall(VecAssemblyBegin(pcis->vec2_N));
2389: PetscCall(VecAssemblyEnd(pcis->vec2_N));
2390: PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2391: PetscCall(VecSet(vec3_N, 0.));
2392: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2393: PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2394: PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2395: PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2396: PetscCall(PetscFree(vals));
2397: PetscCall(VecDestroy(&vec3_N));
2399: /* there should not be any pressure dofs lying on the interface */
2400: PetscCall(PetscCalloc1(pcis->n, &count));
2401: PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2402: for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2403: PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2404: PetscCall(ISGetIndices(zerodiag, &idxs));
2405: for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2406: PetscCall(ISRestoreIndices(zerodiag, &idxs));
2407: PetscCall(PetscFree(count));
2408: }
2409: PetscCall(ISDestroy(&dirIS));
2411: /* check PCBDDCBenignGetOrSetP0 */
2412: PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2413: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2414: PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2415: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2416: PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2417: for (i = 0; i < pcbddc->benign_n; i++) {
2418: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2419: PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2420: }
2421: PetscFunctionReturn(PETSC_SUCCESS);
2422: }
2424: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2425: {
2426: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2427: Mat_IS *matis = (Mat_IS *)(pc->pmat->data);
2428: IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2429: PetscInt nz, n, benign_n, bsp = 1;
2430: PetscInt *interior_dofs, n_interior_dofs, nneu;
2431: PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2433: PetscFunctionBegin;
2434: if (reuse) goto project_b0;
2435: PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2436: PetscCall(MatDestroy(&pcbddc->benign_B0));
2437: for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2438: PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2439: has_null_pressures = PETSC_TRUE;
2440: have_null = PETSC_TRUE;
2441: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2442: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2443: Checks if all the pressure dofs in each subdomain have a zero diagonal
2444: If not, a change of basis on pressures is not needed
2445: since the local Schur complements are already SPD
2446: */
2447: if (pcbddc->n_ISForDofsLocal) {
2448: IS iP = NULL;
2449: PetscInt p, *pp;
2450: PetscBool flg;
2452: PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2453: n = pcbddc->n_ISForDofsLocal;
2454: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2455: PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2456: PetscOptionsEnd();
2457: if (!flg) {
2458: n = 1;
2459: pp[0] = pcbddc->n_ISForDofsLocal - 1;
2460: }
2462: bsp = 0;
2463: for (p = 0; p < n; p++) {
2464: PetscInt bs;
2466: PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2467: PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2468: bsp += bs;
2469: }
2470: PetscCall(PetscMalloc1(bsp, &bzerodiag));
2471: bsp = 0;
2472: for (p = 0; p < n; p++) {
2473: const PetscInt *idxs;
2474: PetscInt b, bs, npl, *bidxs;
2476: PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2477: PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2478: PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2479: PetscCall(PetscMalloc1(npl / bs, &bidxs));
2480: for (b = 0; b < bs; b++) {
2481: PetscInt i;
2483: for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2484: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2485: bsp++;
2486: }
2487: PetscCall(PetscFree(bidxs));
2488: PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2489: }
2490: PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2492: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2493: PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2494: if (iP) {
2495: IS newpressures;
2497: PetscCall(ISDifference(pressures, iP, &newpressures));
2498: PetscCall(ISDestroy(&pressures));
2499: pressures = newpressures;
2500: }
2501: PetscCall(ISSorted(pressures, &sorted));
2502: if (!sorted) PetscCall(ISSort(pressures));
2503: PetscCall(PetscFree(pp));
2504: }
2506: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2507: PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2508: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2509: PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2510: PetscCall(ISSorted(zerodiag, &sorted));
2511: if (!sorted) PetscCall(ISSort(zerodiag));
2512: PetscCall(PetscObjectReference((PetscObject)zerodiag));
2513: zerodiag_save = zerodiag;
2514: PetscCall(ISGetLocalSize(zerodiag, &nz));
2515: if (!nz) {
2516: if (n) have_null = PETSC_FALSE;
2517: has_null_pressures = PETSC_FALSE;
2518: PetscCall(ISDestroy(&zerodiag));
2519: }
2520: recompute_zerodiag = PETSC_FALSE;
2522: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2523: zerodiag_subs = NULL;
2524: benign_n = 0;
2525: n_interior_dofs = 0;
2526: interior_dofs = NULL;
2527: nneu = 0;
2528: if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2529: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2530: if (checkb) { /* need to compute interior nodes */
2531: PetscInt n, i, j;
2532: PetscInt n_neigh, *neigh, *n_shared, **shared;
2533: PetscInt *iwork;
2535: PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n));
2536: PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared));
2537: PetscCall(PetscCalloc1(n, &iwork));
2538: PetscCall(PetscMalloc1(n, &interior_dofs));
2539: for (i = 1; i < n_neigh; i++)
2540: for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1;
2541: for (i = 0; i < n; i++)
2542: if (!iwork[i]) interior_dofs[n_interior_dofs++] = i;
2543: PetscCall(PetscFree(iwork));
2544: PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared));
2545: }
2546: if (has_null_pressures) {
2547: IS *subs;
2548: PetscInt nsubs, i, j, nl;
2549: const PetscInt *idxs;
2550: PetscScalar *array;
2551: Vec *work;
2553: subs = pcbddc->local_subs;
2554: nsubs = pcbddc->n_local_subs;
2555: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2556: if (checkb) {
2557: PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2558: PetscCall(ISGetLocalSize(zerodiag, &nl));
2559: PetscCall(ISGetIndices(zerodiag, &idxs));
2560: /* work[0] = 1_p */
2561: PetscCall(VecSet(work[0], 0.));
2562: PetscCall(VecGetArray(work[0], &array));
2563: for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2564: PetscCall(VecRestoreArray(work[0], &array));
2565: /* work[0] = 1_v */
2566: PetscCall(VecSet(work[1], 1.));
2567: PetscCall(VecGetArray(work[1], &array));
2568: for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2569: PetscCall(VecRestoreArray(work[1], &array));
2570: PetscCall(ISRestoreIndices(zerodiag, &idxs));
2571: }
2573: if (nsubs > 1 || bsp > 1) {
2574: IS *is;
2575: PetscInt b, totb;
2577: totb = bsp;
2578: is = bsp > 1 ? bzerodiag : &zerodiag;
2579: nsubs = PetscMax(nsubs, 1);
2580: PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2581: for (b = 0; b < totb; b++) {
2582: for (i = 0; i < nsubs; i++) {
2583: ISLocalToGlobalMapping l2g;
2584: IS t_zerodiag_subs;
2585: PetscInt nl;
2587: if (subs) {
2588: PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2589: } else {
2590: IS tis;
2592: PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2593: PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2594: PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2595: PetscCall(ISDestroy(&tis));
2596: }
2597: PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2598: PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2599: if (nl) {
2600: PetscBool valid = PETSC_TRUE;
2602: if (checkb) {
2603: PetscCall(VecSet(matis->x, 0));
2604: PetscCall(ISGetLocalSize(subs[i], &nl));
2605: PetscCall(ISGetIndices(subs[i], &idxs));
2606: PetscCall(VecGetArray(matis->x, &array));
2607: for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2608: PetscCall(VecRestoreArray(matis->x, &array));
2609: PetscCall(ISRestoreIndices(subs[i], &idxs));
2610: PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2611: PetscCall(MatMult(matis->A, matis->x, matis->y));
2612: PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2613: PetscCall(VecGetArray(matis->y, &array));
2614: for (j = 0; j < n_interior_dofs; j++) {
2615: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2616: valid = PETSC_FALSE;
2617: break;
2618: }
2619: }
2620: PetscCall(VecRestoreArray(matis->y, &array));
2621: }
2622: if (valid && nneu) {
2623: const PetscInt *idxs;
2624: PetscInt nzb;
2626: PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2627: PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2628: PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2629: if (nzb) valid = PETSC_FALSE;
2630: }
2631: if (valid && pressures) {
2632: IS t_pressure_subs, tmp;
2633: PetscInt i1, i2;
2635: PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2636: PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2637: PetscCall(ISGetLocalSize(tmp, &i1));
2638: PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2639: if (i2 != i1) valid = PETSC_FALSE;
2640: PetscCall(ISDestroy(&t_pressure_subs));
2641: PetscCall(ISDestroy(&tmp));
2642: }
2643: if (valid) {
2644: PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2645: benign_n++;
2646: } else recompute_zerodiag = PETSC_TRUE;
2647: }
2648: PetscCall(ISDestroy(&t_zerodiag_subs));
2649: PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2650: }
2651: }
2652: } else { /* there's just one subdomain (or zero if they have not been detected */
2653: PetscBool valid = PETSC_TRUE;
2655: if (nneu) valid = PETSC_FALSE;
2656: if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2657: if (valid && checkb) {
2658: PetscCall(MatMult(matis->A, work[0], matis->x));
2659: PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2660: PetscCall(VecGetArray(matis->x, &array));
2661: for (j = 0; j < n_interior_dofs; j++) {
2662: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2663: valid = PETSC_FALSE;
2664: break;
2665: }
2666: }
2667: PetscCall(VecRestoreArray(matis->x, &array));
2668: }
2669: if (valid) {
2670: benign_n = 1;
2671: PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2672: PetscCall(PetscObjectReference((PetscObject)zerodiag));
2673: zerodiag_subs[0] = zerodiag;
2674: }
2675: }
2676: if (checkb) PetscCall(VecDestroyVecs(2, &work));
2677: }
2678: PetscCall(PetscFree(interior_dofs));
2680: if (!benign_n) {
2681: PetscInt n;
2683: PetscCall(ISDestroy(&zerodiag));
2684: recompute_zerodiag = PETSC_FALSE;
2685: PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2686: if (n) have_null = PETSC_FALSE;
2687: }
2689: /* final check for null pressures */
2690: if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2692: if (recompute_zerodiag) {
2693: PetscCall(ISDestroy(&zerodiag));
2694: if (benign_n == 1) {
2695: PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2696: zerodiag = zerodiag_subs[0];
2697: } else {
2698: PetscInt i, nzn, *new_idxs;
2700: nzn = 0;
2701: for (i = 0; i < benign_n; i++) {
2702: PetscInt ns;
2703: PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2704: nzn += ns;
2705: }
2706: PetscCall(PetscMalloc1(nzn, &new_idxs));
2707: nzn = 0;
2708: for (i = 0; i < benign_n; i++) {
2709: PetscInt ns, *idxs;
2710: PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2711: PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2712: PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2713: PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2714: nzn += ns;
2715: }
2716: PetscCall(PetscSortInt(nzn, new_idxs));
2717: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2718: }
2719: have_null = PETSC_FALSE;
2720: }
2722: /* determines if the coarse solver will be singular or not */
2723: PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2725: /* Prepare matrix to compute no-net-flux */
2726: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2727: Mat A, loc_divudotp;
2728: ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2729: IS row, col, isused = NULL;
2730: PetscInt M, N, n, st, n_isused;
2732: if (pressures) {
2733: isused = pressures;
2734: } else {
2735: isused = zerodiag_save;
2736: }
2737: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2738: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2739: PetscCall(MatGetLocalSize(A, &n, NULL));
2740: PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2741: n_isused = 0;
2742: if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2743: PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2744: st = st - n_isused;
2745: if (n) {
2746: const PetscInt *gidxs;
2748: PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2749: PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2750: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2751: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2752: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2753: PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2754: } else {
2755: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
2756: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2757: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
2758: }
2759: PetscCall(MatGetSize(pc->pmat, NULL, &N));
2760: PetscCall(ISGetSize(row, &M));
2761: PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
2762: PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
2763: PetscCall(ISDestroy(&row));
2764: PetscCall(ISDestroy(&col));
2765: PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
2766: PetscCall(MatSetType(pcbddc->divudotp, MATIS));
2767: PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
2768: PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
2769: PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
2770: PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
2771: PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
2772: PetscCall(MatDestroy(&loc_divudotp));
2773: PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
2774: PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
2775: }
2776: PetscCall(ISDestroy(&zerodiag_save));
2777: PetscCall(ISDestroy(&pressures));
2778: if (bzerodiag) {
2779: PetscInt i;
2781: for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
2782: PetscCall(PetscFree(bzerodiag));
2783: }
2784: pcbddc->benign_n = benign_n;
2785: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2787: /* determines if the problem has subdomains with 0 pressure block */
2788: have_null = (PetscBool)(!!pcbddc->benign_n);
2789: PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
2791: project_b0:
2792: PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2793: /* change of basis and p0 dofs */
2794: if (pcbddc->benign_n) {
2795: PetscInt i, s, *nnz;
2797: /* local change of basis for pressures */
2798: PetscCall(MatDestroy(&pcbddc->benign_change));
2799: PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
2800: PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
2801: PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
2802: PetscCall(PetscMalloc1(n, &nnz));
2803: for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
2804: for (i = 0; i < pcbddc->benign_n; i++) {
2805: const PetscInt *idxs;
2806: PetscInt nzs, j;
2808: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
2809: PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
2810: for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
2811: nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */
2812: PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
2813: }
2814: PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
2815: PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
2816: PetscCall(PetscFree(nnz));
2817: /* set identity by default */
2818: for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
2819: PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
2820: PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
2821: /* set change on pressures */
2822: for (s = 0; s < pcbddc->benign_n; s++) {
2823: PetscScalar *array;
2824: const PetscInt *idxs;
2825: PetscInt nzs;
2827: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
2828: PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
2829: for (i = 0; i < nzs - 1; i++) {
2830: PetscScalar vals[2];
2831: PetscInt cols[2];
2833: cols[0] = idxs[i];
2834: cols[1] = idxs[nzs - 1];
2835: vals[0] = 1.;
2836: vals[1] = 1.;
2837: PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
2838: }
2839: PetscCall(PetscMalloc1(nzs, &array));
2840: for (i = 0; i < nzs - 1; i++) array[i] = -1.;
2841: array[nzs - 1] = 1.;
2842: PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
2843: /* store local idxs for p0 */
2844: pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
2845: PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
2846: PetscCall(PetscFree(array));
2847: }
2848: PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
2849: PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
2851: /* project if needed */
2852: if (pcbddc->benign_change_explicit) {
2853: Mat M;
2855: PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
2856: PetscCall(MatDestroy(&pcbddc->local_mat));
2857: PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
2858: PetscCall(MatDestroy(&M));
2859: }
2860: /* store global idxs for p0 */
2861: PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
2862: }
2863: *zerodiaglocal = zerodiag;
2864: PetscFunctionReturn(PETSC_SUCCESS);
2865: }
2867: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2868: {
2869: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2870: PetscScalar *array;
2872: PetscFunctionBegin;
2873: if (!pcbddc->benign_sf) {
2874: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
2875: PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
2876: }
2877: if (get) {
2878: PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
2879: PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
2880: PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
2881: PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
2882: } else {
2883: PetscCall(VecGetArray(v, &array));
2884: PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
2885: PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
2886: PetscCall(VecRestoreArray(v, &array));
2887: }
2888: PetscFunctionReturn(PETSC_SUCCESS);
2889: }
2891: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2892: {
2893: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2895: PetscFunctionBegin;
2896: /* TODO: add error checking
2897: - avoid nested pop (or push) calls.
2898: - cannot push before pop.
2899: - cannot call this if pcbddc->local_mat is NULL
2900: */
2901: if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
2902: if (pop) {
2903: if (pcbddc->benign_change_explicit) {
2904: IS is_p0;
2905: MatReuse reuse;
2907: /* extract B_0 */
2908: reuse = MAT_INITIAL_MATRIX;
2909: if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
2910: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
2911: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
2912: /* remove rows and cols from local problem */
2913: PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
2914: PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
2915: PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
2916: PetscCall(ISDestroy(&is_p0));
2917: } else {
2918: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2919: PetscScalar *vals;
2920: PetscInt i, n, *idxs_ins;
2922: PetscCall(VecGetLocalSize(matis->y, &n));
2923: PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
2924: if (!pcbddc->benign_B0) {
2925: PetscInt *nnz;
2926: PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
2927: PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
2928: PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
2929: PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
2930: for (i = 0; i < pcbddc->benign_n; i++) {
2931: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
2932: nnz[i] = n - nnz[i];
2933: }
2934: PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
2935: PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
2936: PetscCall(PetscFree(nnz));
2937: }
2939: for (i = 0; i < pcbddc->benign_n; i++) {
2940: PetscScalar *array;
2941: PetscInt *idxs, j, nz, cum;
2943: PetscCall(VecSet(matis->x, 0.));
2944: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
2945: PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
2946: for (j = 0; j < nz; j++) vals[j] = 1.;
2947: PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
2948: PetscCall(VecAssemblyBegin(matis->x));
2949: PetscCall(VecAssemblyEnd(matis->x));
2950: PetscCall(VecSet(matis->y, 0.));
2951: PetscCall(MatMult(matis->A, matis->x, matis->y));
2952: PetscCall(VecGetArray(matis->y, &array));
2953: cum = 0;
2954: for (j = 0; j < n; j++) {
2955: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2956: vals[cum] = array[j];
2957: idxs_ins[cum] = j;
2958: cum++;
2959: }
2960: }
2961: PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
2962: PetscCall(VecRestoreArray(matis->y, &array));
2963: PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
2964: }
2965: PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
2966: PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
2967: PetscCall(PetscFree2(idxs_ins, vals));
2968: }
2969: } else { /* push */
2971: PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
2972: for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
2973: PetscScalar *B0_vals;
2974: PetscInt *B0_cols, B0_ncol;
2976: PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
2977: PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
2978: PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
2979: PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
2980: PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
2981: }
2982: PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
2983: PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
2984: }
2985: PetscFunctionReturn(PETSC_SUCCESS);
2986: }
2988: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2989: {
2990: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2991: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2992: PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork;
2993: PetscBLASInt *B_iwork, *B_ifail;
2994: PetscScalar *work, lwork;
2995: PetscScalar *St, *S, *eigv;
2996: PetscScalar *Sarray, *Starray;
2997: PetscReal *eigs, thresh, lthresh, uthresh;
2998: PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
2999: PetscBool allocated_S_St, upart;
3000: #if defined(PETSC_USE_COMPLEX)
3001: PetscReal *rwork;
3002: #endif
3004: PetscFunctionBegin;
3005: if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3006: PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3007: PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3008: PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3009: sub_schurs->is_posdef);
3010: PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3012: if (pcbddc->dbg_flag) {
3013: if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3014: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3015: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3016: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3017: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3018: }
3020: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3022: /* max size of subsets */
3023: mss = 0;
3024: for (i = 0; i < sub_schurs->n_subs; i++) {
3025: PetscInt subset_size;
3027: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3028: mss = PetscMax(mss, subset_size);
3029: }
3031: /* min/max and threshold */
3032: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3033: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3034: nmax = PetscMax(nmin, nmax);
3035: allocated_S_St = PETSC_FALSE;
3036: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3037: allocated_S_St = PETSC_TRUE;
3038: }
3040: /* allocate lapack workspace */
3041: cum = cum2 = 0;
3042: maxneigs = 0;
3043: for (i = 0; i < sub_schurs->n_subs; i++) {
3044: PetscInt n, subset_size;
3046: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3047: n = PetscMin(subset_size, nmax);
3048: cum += subset_size;
3049: cum2 += subset_size * n;
3050: maxneigs = PetscMax(maxneigs, n);
3051: }
3052: lwork = 0;
3053: if (mss) {
3054: PetscScalar sdummy = 0.;
3055: PetscBLASInt B_itype = 1;
3056: PetscBLASInt B_N = mss, idummy = 0;
3057: PetscReal rdummy = 0., zero = 0.0;
3058: PetscReal eps = 0.0; /* dlamch? */
3060: PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3061: B_lwork = -1;
3062: /* some implementations may complain about NULL pointers, even if we are querying */
3063: S = &sdummy;
3064: St = &sdummy;
3065: eigs = &rdummy;
3066: eigv = &sdummy;
3067: B_iwork = &idummy;
3068: B_ifail = &idummy;
3069: #if defined(PETSC_USE_COMPLEX)
3070: rwork = &rdummy;
3071: #endif
3072: thresh = 1.0;
3073: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3074: #if defined(PETSC_USE_COMPLEX)
3075: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3076: #else
3077: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3078: #endif
3079: PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr);
3080: PetscCall(PetscFPTrapPop());
3081: }
3083: nv = 0;
3084: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3085: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3086: }
3087: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3088: if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3089: PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3090: #if defined(PETSC_USE_COMPLEX)
3091: PetscCall(PetscMalloc1(7 * mss, &rwork));
3092: #endif
3093: PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3094: &pcbddc->adaptive_constraints_data));
3095: PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3097: maxneigs = 0;
3098: cum = cumarray = 0;
3099: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3100: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3101: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3102: const PetscInt *idxs;
3104: PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3105: for (cum = 0; cum < nv; cum++) {
3106: pcbddc->adaptive_constraints_n[cum] = 1;
3107: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3108: pcbddc->adaptive_constraints_data[cum] = 1.0;
3109: pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3110: pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3111: }
3112: PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3113: }
3115: if (mss) { /* multilevel */
3116: if (sub_schurs->gdsw) {
3117: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3118: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3119: } else {
3120: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3121: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3122: }
3123: }
3125: lthresh = pcbddc->adaptive_threshold[0];
3126: uthresh = pcbddc->adaptive_threshold[1];
3127: upart = pcbddc->use_deluxe_scaling;
3128: for (i = 0; i < sub_schurs->n_subs; i++) {
3129: const PetscInt *idxs;
3130: PetscReal upper, lower;
3131: PetscInt j, subset_size, eigs_start = 0;
3132: PetscBLASInt B_N;
3133: PetscBool same_data = PETSC_FALSE;
3134: PetscBool scal = PETSC_FALSE;
3136: if (upart) {
3137: upper = PETSC_MAX_REAL;
3138: lower = uthresh;
3139: } else {
3140: if (sub_schurs->gdsw) {
3141: upper = uthresh;
3142: lower = PETSC_MIN_REAL;
3143: } else {
3144: PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3145: upper = 1. / uthresh;
3146: lower = 0.;
3147: }
3148: }
3149: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3150: PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3151: PetscCall(PetscBLASIntCast(subset_size, &B_N));
3152: /* this is experimental: we assume the dofs have been properly grouped to have
3153: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3154: if (!sub_schurs->is_posdef) {
3155: Mat T;
3157: for (j = 0; j < subset_size; j++) {
3158: if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3159: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3160: PetscCall(MatScale(T, -1.0));
3161: PetscCall(MatDestroy(&T));
3162: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3163: PetscCall(MatScale(T, -1.0));
3164: PetscCall(MatDestroy(&T));
3165: if (sub_schurs->change_primal_sub) {
3166: PetscInt nz, k;
3167: const PetscInt *idxs;
3169: PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3170: PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3171: for (k = 0; k < nz; k++) {
3172: *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3173: *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3174: }
3175: PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3176: }
3177: scal = PETSC_TRUE;
3178: break;
3179: }
3180: }
3181: }
3183: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3184: if (sub_schurs->is_symmetric) {
3185: PetscInt j, k;
3186: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3187: PetscCall(PetscArrayzero(S, subset_size * subset_size));
3188: PetscCall(PetscArrayzero(St, subset_size * subset_size));
3189: }
3190: for (j = 0; j < subset_size; j++) {
3191: for (k = j; k < subset_size; k++) {
3192: S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3193: St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3194: }
3195: }
3196: } else {
3197: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3198: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3199: }
3200: } else {
3201: S = Sarray + cumarray;
3202: St = Starray + cumarray;
3203: }
3204: /* see if we can save some work */
3205: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3207: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3208: B_neigs = 0;
3209: } else {
3210: PetscBLASInt B_itype = 1;
3211: PetscBLASInt B_IL, B_IU;
3212: PetscReal eps = -1.0; /* dlamch? */
3213: PetscInt nmin_s;
3214: PetscBool compute_range;
3216: PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3217: B_neigs = 0;
3218: compute_range = (PetscBool)!same_data;
3219: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3221: if (pcbddc->dbg_flag) {
3222: PetscInt nc = 0;
3224: if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3225: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i,
3226: sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc));
3227: }
3229: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3230: if (compute_range) {
3231: /* ask for eigenvalues larger than thresh */
3232: if (sub_schurs->is_posdef) {
3233: #if defined(PETSC_USE_COMPLEX)
3234: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3235: #else
3236: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3237: #endif
3238: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3239: } else { /* no theory so far, but it works nicely */
3240: PetscInt recipe = 0, recipe_m = 1;
3241: PetscReal bb[2];
3243: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3244: switch (recipe) {
3245: case 0:
3246: if (scal) {
3247: bb[0] = PETSC_MIN_REAL;
3248: bb[1] = lthresh;
3249: } else {
3250: bb[0] = uthresh;
3251: bb[1] = PETSC_MAX_REAL;
3252: }
3253: #if defined(PETSC_USE_COMPLEX)
3254: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3255: #else
3256: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3257: #endif
3258: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3259: break;
3260: case 1:
3261: bb[0] = PETSC_MIN_REAL;
3262: bb[1] = lthresh * lthresh;
3263: #if defined(PETSC_USE_COMPLEX)
3264: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3265: #else
3266: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3267: #endif
3268: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3269: if (!scal) {
3270: PetscBLASInt B_neigs2 = 0;
3272: bb[0] = PetscMax(lthresh * lthresh, uthresh);
3273: bb[1] = PETSC_MAX_REAL;
3274: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3275: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3276: #if defined(PETSC_USE_COMPLEX)
3277: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3278: #else
3279: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3280: #endif
3281: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3282: B_neigs += B_neigs2;
3283: }
3284: break;
3285: case 2:
3286: if (scal) {
3287: bb[0] = PETSC_MIN_REAL;
3288: bb[1] = 0;
3289: #if defined(PETSC_USE_COMPLEX)
3290: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3291: #else
3292: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3293: #endif
3294: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3295: } else {
3296: PetscBLASInt B_neigs2 = 0;
3297: PetscBool do_copy = PETSC_FALSE;
3299: lthresh = PetscMax(lthresh, 0.0);
3300: if (lthresh > 0.0) {
3301: bb[0] = PETSC_MIN_REAL;
3302: bb[1] = lthresh * lthresh;
3304: do_copy = PETSC_TRUE;
3305: #if defined(PETSC_USE_COMPLEX)
3306: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3307: #else
3308: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3309: #endif
3310: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3311: }
3312: bb[0] = PetscMax(lthresh * lthresh, uthresh);
3313: bb[1] = PETSC_MAX_REAL;
3314: if (do_copy) {
3315: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3316: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3317: }
3318: #if defined(PETSC_USE_COMPLEX)
3319: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3320: #else
3321: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3322: #endif
3323: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3324: B_neigs += B_neigs2;
3325: }
3326: break;
3327: case 3:
3328: if (scal) {
3329: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3330: } else {
3331: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3332: }
3333: if (!scal) {
3334: bb[0] = uthresh;
3335: bb[1] = PETSC_MAX_REAL;
3336: #if defined(PETSC_USE_COMPLEX)
3337: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3338: #else
3339: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3340: #endif
3341: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3342: }
3343: if (recipe_m > 0 && B_N - B_neigs > 0) {
3344: PetscBLASInt B_neigs2 = 0;
3346: B_IL = 1;
3347: PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3348: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3349: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3350: #if defined(PETSC_USE_COMPLEX)
3351: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3352: #else
3353: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3354: #endif
3355: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3356: B_neigs += B_neigs2;
3357: }
3358: break;
3359: case 4:
3360: bb[0] = PETSC_MIN_REAL;
3361: bb[1] = lthresh;
3362: #if defined(PETSC_USE_COMPLEX)
3363: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3364: #else
3365: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3366: #endif
3367: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3368: {
3369: PetscBLASInt B_neigs2 = 0;
3371: bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3372: bb[1] = PETSC_MAX_REAL;
3373: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3374: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3375: #if defined(PETSC_USE_COMPLEX)
3376: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3377: #else
3378: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3379: #endif
3380: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3381: B_neigs += B_neigs2;
3382: }
3383: break;
3384: case 5: /* same as before: first compute all eigenvalues, then filter */
3385: #if defined(PETSC_USE_COMPLEX)
3386: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3387: #else
3388: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3389: #endif
3390: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3391: {
3392: PetscInt e, k, ne;
3393: for (e = 0, ne = 0; e < B_neigs; e++) {
3394: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3395: for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3396: eigs[ne] = eigs[e];
3397: ne++;
3398: }
3399: }
3400: PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3401: B_neigs = ne;
3402: }
3403: break;
3404: default:
3405: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3406: }
3407: }
3408: } else if (!same_data) { /* this is just to see all the eigenvalues */
3409: B_IU = PetscMax(1, PetscMin(B_N, nmax));
3410: B_IL = 1;
3411: #if defined(PETSC_USE_COMPLEX)
3412: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3413: #else
3414: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3415: #endif
3416: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3417: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3418: PetscInt k;
3419: PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3420: PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3421: PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3422: nmin = nmax;
3423: PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3424: for (k = 0; k < nmax; k++) {
3425: eigs[k] = 1. / PETSC_SMALL;
3426: eigv[k * (subset_size + 1)] = 1.0;
3427: }
3428: }
3429: PetscCall(PetscFPTrapPop());
3430: if (B_ierr) {
3431: PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3432: PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3433: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3434: }
3436: if (B_neigs > nmax) {
3437: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3438: if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3439: B_neigs = nmax;
3440: }
3442: nmin_s = PetscMin(nmin, B_N);
3443: if (B_neigs < nmin_s) {
3444: PetscBLASInt B_neigs2 = 0;
3446: if (upart) {
3447: if (scal) {
3448: B_IU = nmin_s;
3449: B_IL = B_neigs + 1;
3450: } else {
3451: B_IL = B_N - nmin_s + 1;
3452: B_IU = B_N - B_neigs;
3453: }
3454: } else {
3455: B_IL = B_neigs + 1;
3456: B_IU = nmin_s;
3457: }
3458: if (pcbddc->dbg_flag) {
3459: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3460: }
3461: if (sub_schurs->is_symmetric) {
3462: PetscInt j, k;
3463: for (j = 0; j < subset_size; j++) {
3464: for (k = j; k < subset_size; k++) {
3465: S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3466: St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3467: }
3468: }
3469: } else {
3470: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3471: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3472: }
3473: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3474: #if defined(PETSC_USE_COMPLEX)
3475: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3476: #else
3477: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3478: #endif
3479: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3480: PetscCall(PetscFPTrapPop());
3481: B_neigs += B_neigs2;
3482: }
3483: if (B_ierr) {
3484: PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3485: PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3486: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3487: }
3488: if (pcbddc->dbg_flag) {
3489: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3490: for (j = 0; j < B_neigs; j++) {
3491: if (!sub_schurs->gdsw) {
3492: if (eigs[j] == 0.0) {
3493: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n"));
3494: } else {
3495: if (upart) {
3496: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start]));
3497: } else {
3498: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start])));
3499: }
3500: }
3501: } else {
3502: double pg = (double)eigs[j + eigs_start];
3503: if (pg < 2 * PETSC_SMALL) pg = 0.0;
3504: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg));
3505: }
3506: }
3507: }
3508: }
3509: /* change the basis back to the original one */
3510: if (sub_schurs->change) {
3511: Mat change, phi, phit;
3513: if (pcbddc->dbg_flag > 2) {
3514: PetscInt ii;
3515: for (ii = 0; ii < B_neigs; ii++) {
3516: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3517: for (j = 0; j < B_N; j++) {
3518: #if defined(PETSC_USE_COMPLEX)
3519: PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3520: PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3521: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c));
3522: #else
3523: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3524: #endif
3525: }
3526: }
3527: }
3528: PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3529: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3530: PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi));
3531: PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3532: PetscCall(MatDestroy(&phit));
3533: PetscCall(MatDestroy(&phi));
3534: }
3535: maxneigs = PetscMax(B_neigs, maxneigs);
3536: pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3537: if (B_neigs) {
3538: PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3540: if (pcbddc->dbg_flag > 1) {
3541: PetscInt ii;
3542: for (ii = 0; ii < B_neigs; ii++) {
3543: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3544: for (j = 0; j < B_N; j++) {
3545: #if defined(PETSC_USE_COMPLEX)
3546: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3547: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3548: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c));
3549: #else
3550: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3551: #endif
3552: }
3553: }
3554: }
3555: PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3556: pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3557: pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3558: cum++;
3559: }
3560: PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3561: /* shift for next computation */
3562: cumarray += subset_size * subset_size;
3563: }
3564: if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3566: if (mss) {
3567: if (sub_schurs->gdsw) {
3568: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3569: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3570: } else {
3571: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3572: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3573: /* destroy matrices (junk) */
3574: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3575: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3576: }
3577: }
3578: if (allocated_S_St) PetscCall(PetscFree2(S, St));
3579: PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3580: #if defined(PETSC_USE_COMPLEX)
3581: PetscCall(PetscFree(rwork));
3582: #endif
3583: if (pcbddc->dbg_flag) {
3584: PetscInt maxneigs_r;
3585: PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3586: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3587: }
3588: PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3589: PetscFunctionReturn(PETSC_SUCCESS);
3590: }
3592: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3593: {
3594: PetscScalar *coarse_submat_vals;
3596: PetscFunctionBegin;
3597: /* Setup local scatters R_to_B and (optionally) R_to_D */
3598: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3599: PetscCall(PCBDDCSetUpLocalScatters(pc));
3601: /* Setup local neumann solver ksp_R */
3602: /* PCBDDCSetUpLocalScatters should be called first! */
3603: PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3605: /*
3606: Setup local correction and local part of coarse basis.
3607: Gives back the dense local part of the coarse matrix in column major ordering
3608: */
3609: PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals));
3611: /* Compute total number of coarse nodes and setup coarse solver */
3612: PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals));
3614: /* free */
3615: PetscCall(PetscFree(coarse_submat_vals));
3616: PetscFunctionReturn(PETSC_SUCCESS);
3617: }
3619: PetscErrorCode PCBDDCResetCustomization(PC pc)
3620: {
3621: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3623: PetscFunctionBegin;
3624: PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3625: PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3626: PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3627: PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3628: PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3629: PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3630: PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3631: PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3632: PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3633: PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3634: PetscFunctionReturn(PETSC_SUCCESS);
3635: }
3637: PetscErrorCode PCBDDCResetTopography(PC pc)
3638: {
3639: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3640: PetscInt i;
3642: PetscFunctionBegin;
3643: PetscCall(MatDestroy(&pcbddc->nedcG));
3644: PetscCall(ISDestroy(&pcbddc->nedclocal));
3645: PetscCall(MatDestroy(&pcbddc->discretegradient));
3646: PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3647: PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3648: PetscCall(MatDestroy(&pcbddc->switch_static_change));
3649: PetscCall(VecDestroy(&pcbddc->work_change));
3650: PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3651: PetscCall(MatDestroy(&pcbddc->divudotp));
3652: PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3653: PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3654: for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3655: pcbddc->n_local_subs = 0;
3656: PetscCall(PetscFree(pcbddc->local_subs));
3657: PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3658: pcbddc->graphanalyzed = PETSC_FALSE;
3659: pcbddc->recompute_topography = PETSC_TRUE;
3660: pcbddc->corner_selected = PETSC_FALSE;
3661: PetscFunctionReturn(PETSC_SUCCESS);
3662: }
3664: PetscErrorCode PCBDDCResetSolvers(PC pc)
3665: {
3666: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3668: PetscFunctionBegin;
3669: PetscCall(VecDestroy(&pcbddc->coarse_vec));
3670: if (pcbddc->coarse_phi_B) {
3671: PetscScalar *array;
3672: PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array));
3673: PetscCall(PetscFree(array));
3674: }
3675: PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3676: PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3677: PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3678: PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3679: PetscCall(VecDestroy(&pcbddc->vec1_P));
3680: PetscCall(VecDestroy(&pcbddc->vec1_C));
3681: PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3682: PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3683: PetscCall(VecDestroy(&pcbddc->vec1_R));
3684: PetscCall(VecDestroy(&pcbddc->vec2_R));
3685: PetscCall(ISDestroy(&pcbddc->is_R_local));
3686: PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3687: PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3688: PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3689: PetscCall(KSPReset(pcbddc->ksp_D));
3690: PetscCall(KSPReset(pcbddc->ksp_R));
3691: PetscCall(KSPReset(pcbddc->coarse_ksp));
3692: PetscCall(MatDestroy(&pcbddc->local_mat));
3693: PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3694: PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3695: PetscCall(PetscFree(pcbddc->global_primal_indices));
3696: PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3697: PetscCall(MatDestroy(&pcbddc->benign_change));
3698: PetscCall(VecDestroy(&pcbddc->benign_vec));
3699: PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3700: PetscCall(MatDestroy(&pcbddc->benign_B0));
3701: PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3702: if (pcbddc->benign_zerodiag_subs) {
3703: PetscInt i;
3704: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3705: PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3706: }
3707: PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3708: PetscFunctionReturn(PETSC_SUCCESS);
3709: }
3711: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3712: {
3713: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3714: PC_IS *pcis = (PC_IS *)pc->data;
3715: VecType impVecType;
3716: PetscInt n_constraints, n_R, old_size;
3718: PetscFunctionBegin;
3719: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3720: n_R = pcis->n - pcbddc->n_vertices;
3721: PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3722: /* local work vectors (try to avoid unneeded work)*/
3723: /* R nodes */
3724: old_size = -1;
3725: if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3726: if (n_R != old_size) {
3727: PetscCall(VecDestroy(&pcbddc->vec1_R));
3728: PetscCall(VecDestroy(&pcbddc->vec2_R));
3729: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3730: PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3731: PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3732: PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3733: }
3734: /* local primal dofs */
3735: old_size = -1;
3736: if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3737: if (pcbddc->local_primal_size != old_size) {
3738: PetscCall(VecDestroy(&pcbddc->vec1_P));
3739: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3740: PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3741: PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3742: }
3743: /* local explicit constraints */
3744: old_size = -1;
3745: if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3746: if (n_constraints && n_constraints != old_size) {
3747: PetscCall(VecDestroy(&pcbddc->vec1_C));
3748: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3749: PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3750: PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3751: }
3752: PetscFunctionReturn(PETSC_SUCCESS);
3753: }
3755: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3756: {
3757: /* pointers to pcis and pcbddc */
3758: PC_IS *pcis = (PC_IS *)pc->data;
3759: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3760: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3761: /* submatrices of local problem */
3762: Mat A_RV, A_VR, A_VV, local_auxmat2_R;
3763: /* submatrices of local coarse problem */
3764: Mat S_VV, S_CV, S_VC, S_CC;
3765: /* working matrices */
3766: Mat C_CR;
3767: /* additional working stuff */
3768: PC pc_R;
3769: Mat F, Brhs = NULL;
3770: Vec dummy_vec;
3771: PetscBool isLU, isCHOL, need_benign_correction, sparserhs;
3772: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3773: PetscScalar *work;
3774: PetscInt *idx_V_B;
3775: PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I;
3776: PetscInt i, n_R, n_D, n_B;
3777: PetscScalar one = 1.0, m_one = -1.0;
3779: PetscFunctionBegin;
3780: PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
3781: PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
3783: /* Set Non-overlapping dimensions */
3784: n_vertices = pcbddc->n_vertices;
3785: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3786: n_B = pcis->n_B;
3787: n_D = pcis->n - n_B;
3788: n_R = pcis->n - n_vertices;
3790: /* vertices in boundary numbering */
3791: PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
3792: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
3793: PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
3795: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3796: PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals));
3797: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV));
3798: PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size));
3799: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV));
3800: PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size));
3801: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC));
3802: PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size));
3803: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC));
3804: PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size));
3806: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3807: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
3808: PetscCall(PCSetUp(pc_R));
3809: PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
3810: PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
3811: lda_rhs = n_R;
3812: need_benign_correction = PETSC_FALSE;
3813: if (isLU || isCHOL) {
3814: PetscCall(PCFactorGetMatrix(pc_R, &F));
3815: } else if (sub_schurs && sub_schurs->reuse_solver) {
3816: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3817: MatFactorType type;
3819: F = reuse_solver->F;
3820: PetscCall(MatGetFactorType(F, &type));
3821: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3822: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3823: PetscCall(MatGetSize(F, &lda_rhs, NULL));
3824: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3825: } else F = NULL;
3827: /* determine if we can use a sparse right-hand side */
3828: sparserhs = PETSC_FALSE;
3829: if (F) {
3830: MatSolverType solver;
3832: PetscCall(MatFactorGetSolverType(F, &solver));
3833: PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
3834: }
3836: /* allocate workspace */
3837: n = 0;
3838: if (n_constraints) n += lda_rhs * n_constraints;
3839: if (n_vertices) {
3840: n = PetscMax(2 * lda_rhs * n_vertices, n);
3841: n = PetscMax((lda_rhs + n_B) * n_vertices, n);
3842: }
3843: if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n);
3844: PetscCall(PetscMalloc1(n, &work));
3846: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3847: dummy_vec = NULL;
3848: if (need_benign_correction && lda_rhs != n_R && F) {
3849: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
3850: PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
3851: PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
3852: }
3854: PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3855: PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3857: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3858: if (n_constraints) {
3859: Mat M3, C_B;
3860: IS is_aux;
3862: /* Extract constraints on R nodes: C_{CR} */
3863: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux));
3864: PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
3865: PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
3867: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3868: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3869: if (!sparserhs) {
3870: PetscCall(PetscArrayzero(work, lda_rhs * n_constraints));
3871: for (i = 0; i < n_constraints; i++) {
3872: const PetscScalar *row_cmat_values;
3873: const PetscInt *row_cmat_indices;
3874: PetscInt size_of_constraint, j;
3876: PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
3877: for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j];
3878: PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
3879: }
3880: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs));
3881: } else {
3882: Mat tC_CR;
3884: PetscCall(MatScale(C_CR, -1.0));
3885: if (lda_rhs != n_R) {
3886: PetscScalar *aa;
3887: PetscInt r, *ii, *jj;
3888: PetscBool done;
3890: PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
3891: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
3892: PetscCall(MatSeqAIJGetArray(C_CR, &aa));
3893: PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
3894: PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
3895: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
3896: } else {
3897: PetscCall(PetscObjectReference((PetscObject)C_CR));
3898: tC_CR = C_CR;
3899: }
3900: PetscCall(MatCreateTranspose(tC_CR, &Brhs));
3901: PetscCall(MatDestroy(&tC_CR));
3902: }
3903: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R));
3904: if (F) {
3905: if (need_benign_correction) {
3906: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3908: /* rhs is already zero on interior dofs, no need to change the rhs */
3909: PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
3910: }
3911: PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
3912: if (need_benign_correction) {
3913: PetscScalar *marr;
3914: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3916: PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
3917: if (lda_rhs != n_R) {
3918: for (i = 0; i < n_constraints; i++) {
3919: PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
3920: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
3921: PetscCall(VecResetArray(dummy_vec));
3922: }
3923: } else {
3924: for (i = 0; i < n_constraints; i++) {
3925: PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
3926: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
3927: PetscCall(VecResetArray(pcbddc->vec1_R));
3928: }
3929: }
3930: PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
3931: }
3932: } else {
3933: PetscScalar *marr;
3935: PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
3936: for (i = 0; i < n_constraints; i++) {
3937: PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs));
3938: PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
3939: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
3940: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
3941: PetscCall(VecResetArray(pcbddc->vec1_R));
3942: PetscCall(VecResetArray(pcbddc->vec2_R));
3943: }
3944: PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
3945: }
3946: if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
3947: PetscCall(MatDestroy(&Brhs));
3948: if (!pcbddc->switch_static) {
3949: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2));
3950: for (i = 0; i < n_constraints; i++) {
3951: Vec r, b;
3952: PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
3953: PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
3954: PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
3955: PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
3956: PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
3957: PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
3958: }
3959: PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3));
3960: } else {
3961: if (lda_rhs != n_R) {
3962: IS dummy;
3964: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy));
3965: PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
3966: PetscCall(ISDestroy(&dummy));
3967: } else {
3968: PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
3969: pcbddc->local_auxmat2 = local_auxmat2_R;
3970: }
3971: PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3));
3972: }
3973: PetscCall(ISDestroy(&is_aux));
3974: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
3975: PetscCall(MatScale(M3, m_one));
3976: if (isCHOL) {
3977: PetscCall(MatCholeskyFactor(M3, NULL, NULL));
3978: } else {
3979: PetscCall(MatLUFactor(M3, NULL, NULL, NULL));
3980: }
3981: PetscCall(MatSeqDenseInvertFactors_Private(M3));
3982: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3983: PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1));
3984: PetscCall(MatDestroy(&C_B));
3985: PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3986: PetscCall(MatDestroy(&M3));
3987: }
3989: /* Get submatrices from subdomain matrix */
3990: if (n_vertices) {
3991: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3992: PetscBool oldpin;
3993: #endif
3994: PetscBool isaij;
3995: IS is_aux;
3997: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3998: IS tis;
4000: PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4001: PetscCall(ISSort(tis));
4002: PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4003: PetscCall(ISDestroy(&tis));
4004: } else {
4005: PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4006: }
4007: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4008: oldpin = pcbddc->local_mat->boundtocpu;
4009: #endif
4010: PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4011: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4012: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4013: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij));
4014: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4015: PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4016: }
4017: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4018: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4019: PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4020: #endif
4021: PetscCall(ISDestroy(&is_aux));
4022: }
4024: /* Matrix of coarse basis functions (local) */
4025: if (pcbddc->coarse_phi_B) {
4026: PetscInt on_B, on_primal, on_D = n_D;
4027: if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL));
4028: PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal));
4029: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4030: PetscScalar *marray;
4032: PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray));
4033: PetscCall(PetscFree(marray));
4034: PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4035: PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4036: PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4037: PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4038: }
4039: }
4041: if (!pcbddc->coarse_phi_B) {
4042: PetscScalar *marr;
4044: /* memory size */
4045: n = n_B * pcbddc->local_primal_size;
4046: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size;
4047: if (!pcbddc->symmetric_primal) n *= 2;
4048: PetscCall(PetscCalloc1(n, &marr));
4049: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B));
4050: marr += n_B * pcbddc->local_primal_size;
4051: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4052: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D));
4053: marr += n_D * pcbddc->local_primal_size;
4054: }
4055: if (!pcbddc->symmetric_primal) {
4056: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B));
4057: marr += n_B * pcbddc->local_primal_size;
4058: if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D));
4059: } else {
4060: PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
4061: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4062: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4063: PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
4064: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4065: }
4066: }
4067: }
4069: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4070: p0_lidx_I = NULL;
4071: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4072: const PetscInt *idxs;
4074: PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4075: PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4076: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4077: PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4078: }
4080: /* vertices */
4081: if (n_vertices) {
4082: PetscBool restoreavr = PETSC_FALSE;
4084: PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV));
4086: if (n_R) {
4087: Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */
4088: PetscBLASInt B_N, B_one = 1;
4089: const PetscScalar *x;
4090: PetscScalar *y;
4092: PetscCall(MatScale(A_RV, m_one));
4093: if (need_benign_correction) {
4094: ISLocalToGlobalMapping RtoN;
4095: IS is_p0;
4096: PetscInt *idxs_p0, n;
4098: PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4099: PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4100: PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4101: PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4102: PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4103: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4104: PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4105: PetscCall(ISDestroy(&is_p0));
4106: }
4108: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV));
4109: if (!sparserhs || need_benign_correction) {
4110: if (lda_rhs == n_R) {
4111: PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4112: } else {
4113: PetscScalar *av, *array;
4114: const PetscInt *xadj, *adjncy;
4115: PetscInt n;
4116: PetscBool flg_row;
4118: array = work + lda_rhs * n_vertices;
4119: PetscCall(PetscArrayzero(array, lda_rhs * n_vertices));
4120: PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4121: PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4122: PetscCall(MatSeqAIJGetArray(A_RV, &av));
4123: for (i = 0; i < n; i++) {
4124: PetscInt j;
4125: for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j];
4126: }
4127: PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4128: PetscCall(MatDestroy(&A_RV));
4129: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV));
4130: }
4131: if (need_benign_correction) {
4132: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4133: PetscScalar *marr;
4135: PetscCall(MatDenseGetArray(A_RV, &marr));
4136: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4138: | 0 0 0 | (V)
4139: L = | 0 0 -1 | (P-p0)
4140: | 0 0 -1 | (p0)
4142: */
4143: for (i = 0; i < reuse_solver->benign_n; i++) {
4144: const PetscScalar *vals;
4145: const PetscInt *idxs, *idxs_zero;
4146: PetscInt n, j, nz;
4148: PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4149: PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4150: PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4151: for (j = 0; j < n; j++) {
4152: PetscScalar val = vals[j];
4153: PetscInt k, col = idxs[j];
4154: for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4155: }
4156: PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4157: PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4158: }
4159: PetscCall(MatDenseRestoreArray(A_RV, &marr));
4160: }
4161: PetscCall(PetscObjectReference((PetscObject)A_RV));
4162: Brhs = A_RV;
4163: } else {
4164: Mat tA_RVT, A_RVT;
4166: if (!pcbddc->symmetric_primal) {
4167: /* A_RV already scaled by -1 */
4168: PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4169: } else {
4170: restoreavr = PETSC_TRUE;
4171: PetscCall(MatScale(A_VR, -1.0));
4172: PetscCall(PetscObjectReference((PetscObject)A_VR));
4173: A_RVT = A_VR;
4174: }
4175: if (lda_rhs != n_R) {
4176: PetscScalar *aa;
4177: PetscInt r, *ii, *jj;
4178: PetscBool done;
4180: PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4181: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4182: PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4183: PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4184: PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4185: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4186: } else {
4187: PetscCall(PetscObjectReference((PetscObject)A_RVT));
4188: tA_RVT = A_RVT;
4189: }
4190: PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4191: PetscCall(MatDestroy(&tA_RVT));
4192: PetscCall(MatDestroy(&A_RVT));
4193: }
4194: if (F) {
4195: /* need to correct the rhs */
4196: if (need_benign_correction) {
4197: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4198: PetscScalar *marr;
4200: PetscCall(MatDenseGetArray(Brhs, &marr));
4201: if (lda_rhs != n_R) {
4202: for (i = 0; i < n_vertices; i++) {
4203: PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4204: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4205: PetscCall(VecResetArray(dummy_vec));
4206: }
4207: } else {
4208: for (i = 0; i < n_vertices; i++) {
4209: PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4210: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4211: PetscCall(VecResetArray(pcbddc->vec1_R));
4212: }
4213: }
4214: PetscCall(MatDenseRestoreArray(Brhs, &marr));
4215: }
4216: PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4217: if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4218: /* need to correct the solution */
4219: if (need_benign_correction) {
4220: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4221: PetscScalar *marr;
4223: PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4224: if (lda_rhs != n_R) {
4225: for (i = 0; i < n_vertices; i++) {
4226: PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4227: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4228: PetscCall(VecResetArray(dummy_vec));
4229: }
4230: } else {
4231: for (i = 0; i < n_vertices; i++) {
4232: PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4233: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4234: PetscCall(VecResetArray(pcbddc->vec1_R));
4235: }
4236: }
4237: PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4238: }
4239: } else {
4240: PetscCall(MatDenseGetArray(Brhs, &y));
4241: for (i = 0; i < n_vertices; i++) {
4242: PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs));
4243: PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs));
4244: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4245: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4246: PetscCall(VecResetArray(pcbddc->vec1_R));
4247: PetscCall(VecResetArray(pcbddc->vec2_R));
4248: }
4249: PetscCall(MatDenseRestoreArray(Brhs, &y));
4250: }
4251: PetscCall(MatDestroy(&A_RV));
4252: PetscCall(MatDestroy(&Brhs));
4253: /* S_VV and S_CV */
4254: if (n_constraints) {
4255: Mat B;
4257: PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices));
4258: for (i = 0; i < n_vertices; i++) {
4259: PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs));
4260: PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B));
4261: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4262: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4263: PetscCall(VecResetArray(pcis->vec1_B));
4264: PetscCall(VecResetArray(pcbddc->vec1_R));
4265: }
4266: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B));
4267: /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4268: PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV));
4269: PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4270: PetscCall(MatProductSetFromOptions(S_CV));
4271: PetscCall(MatProductSymbolic(S_CV));
4272: PetscCall(MatProductNumeric(S_CV));
4273: PetscCall(MatProductClear(S_CV));
4275: PetscCall(MatDestroy(&B));
4276: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B));
4277: /* Reuse B = local_auxmat2_R * S_CV */
4278: PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B));
4279: PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4280: PetscCall(MatProductSetFromOptions(B));
4281: PetscCall(MatProductSymbolic(B));
4282: PetscCall(MatProductNumeric(B));
4284: PetscCall(MatScale(S_CV, m_one));
4285: PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N));
4286: PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one));
4287: PetscCall(MatDestroy(&B));
4288: }
4289: if (lda_rhs != n_R) {
4290: PetscCall(MatDestroy(&A_RRmA_RV));
4291: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV));
4292: PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs));
4293: }
4294: PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt));
4295: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4296: if (need_benign_correction) {
4297: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4298: PetscScalar *marr, *sums;
4300: PetscCall(PetscMalloc1(n_vertices, &sums));
4301: PetscCall(MatDenseGetArray(S_VVt, &marr));
4302: for (i = 0; i < reuse_solver->benign_n; i++) {
4303: const PetscScalar *vals;
4304: const PetscInt *idxs, *idxs_zero;
4305: PetscInt n, j, nz;
4307: PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4308: PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4309: for (j = 0; j < n_vertices; j++) {
4310: PetscInt k;
4311: sums[j] = 0.;
4312: for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs];
4313: }
4314: PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4315: for (j = 0; j < n; j++) {
4316: PetscScalar val = vals[j];
4317: PetscInt k;
4318: for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k];
4319: }
4320: PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4321: PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4322: }
4323: PetscCall(PetscFree(sums));
4324: PetscCall(MatDenseRestoreArray(S_VVt, &marr));
4325: PetscCall(MatDestroy(&A_RV_bcorr));
4326: }
4327: PetscCall(MatDestroy(&A_RRmA_RV));
4328: PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N));
4329: PetscCall(MatDenseGetArrayRead(A_VV, &x));
4330: PetscCall(MatDenseGetArray(S_VVt, &y));
4331: PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one));
4332: PetscCall(MatDenseRestoreArrayRead(A_VV, &x));
4333: PetscCall(MatDenseRestoreArray(S_VVt, &y));
4334: PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN));
4335: PetscCall(MatDestroy(&S_VVt));
4336: } else {
4337: PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN));
4338: }
4339: PetscCall(MatDestroy(&A_VV));
4341: /* coarse basis functions */
4342: for (i = 0; i < n_vertices; i++) {
4343: Vec v;
4344: PetscScalar one = 1.0, zero = 0.0;
4346: PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i));
4347: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v));
4348: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4349: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4350: if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4351: PetscMPIInt rank;
4352: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank));
4353: PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix");
4354: }
4355: PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
4356: PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4357: PetscCall(VecAssemblyEnd(v));
4358: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v));
4360: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4361: PetscInt j;
4363: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v));
4364: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4365: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4366: if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4367: PetscMPIInt rank;
4368: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank));
4369: PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix");
4370: }
4371: for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES));
4372: PetscCall(VecAssemblyBegin(v));
4373: PetscCall(VecAssemblyEnd(v));
4374: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v));
4375: }
4376: PetscCall(VecResetArray(pcbddc->vec1_R));
4377: }
4378: /* if n_R == 0 the object is not destroyed */
4379: PetscCall(MatDestroy(&A_RV));
4380: }
4381: PetscCall(VecDestroy(&dummy_vec));
4383: if (n_constraints) {
4384: Mat B;
4386: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B));
4387: PetscCall(MatScale(S_CC, m_one));
4388: PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B));
4389: PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4390: PetscCall(MatProductSetFromOptions(B));
4391: PetscCall(MatProductSymbolic(B));
4392: PetscCall(MatProductNumeric(B));
4394: PetscCall(MatScale(S_CC, m_one));
4395: if (n_vertices) {
4396: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4397: PetscCall(MatTransposeSetPrecursor(S_CV, S_VC));
4398: PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC));
4399: } else {
4400: Mat S_VCt;
4402: if (lda_rhs != n_R) {
4403: PetscCall(MatDestroy(&B));
4404: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B));
4405: PetscCall(MatDenseSetLDA(B, lda_rhs));
4406: }
4407: PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt));
4408: PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN));
4409: PetscCall(MatDestroy(&S_VCt));
4410: }
4411: }
4412: PetscCall(MatDestroy(&B));
4413: /* coarse basis functions */
4414: for (i = 0; i < n_constraints; i++) {
4415: Vec v;
4417: PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i));
4418: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v));
4419: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4420: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4421: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v));
4422: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4423: PetscInt j;
4424: PetscScalar zero = 0.0;
4425: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v));
4426: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4427: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4428: for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES));
4429: PetscCall(VecAssemblyBegin(v));
4430: PetscCall(VecAssemblyEnd(v));
4431: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v));
4432: }
4433: PetscCall(VecResetArray(pcbddc->vec1_R));
4434: }
4435: }
4436: if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R));
4437: PetscCall(PetscFree(p0_lidx_I));
4439: /* coarse matrix entries relative to B_0 */
4440: if (pcbddc->benign_n) {
4441: Mat B0_B, B0_BPHI;
4442: IS is_dummy;
4443: const PetscScalar *data;
4444: PetscInt j;
4446: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
4447: PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
4448: PetscCall(ISDestroy(&is_dummy));
4449: PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
4450: PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
4451: PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
4452: for (j = 0; j < pcbddc->benign_n; j++) {
4453: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4454: for (i = 0; i < pcbddc->local_primal_size; i++) {
4455: coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j];
4456: coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j];
4457: }
4458: }
4459: PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
4460: PetscCall(MatDestroy(&B0_B));
4461: PetscCall(MatDestroy(&B0_BPHI));
4462: }
4464: /* compute other basis functions for non-symmetric problems */
4465: if (!pcbddc->symmetric_primal) {
4466: Mat B_V = NULL, B_C = NULL;
4467: PetscScalar *marray;
4469: if (n_constraints) {
4470: Mat S_CCT, C_CRT;
4472: PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
4473: PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
4474: PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C));
4475: PetscCall(MatDestroy(&S_CCT));
4476: if (n_vertices) {
4477: Mat S_VCT;
4479: PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
4480: PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V));
4481: PetscCall(MatDestroy(&S_VCT));
4482: }
4483: PetscCall(MatDestroy(&C_CRT));
4484: } else {
4485: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
4486: }
4487: if (n_vertices && n_R) {
4488: PetscScalar *av, *marray;
4489: const PetscInt *xadj, *adjncy;
4490: PetscInt n;
4491: PetscBool flg_row;
4493: /* B_V = B_V - A_VR^T */
4494: PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4495: PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4496: PetscCall(MatSeqAIJGetArray(A_VR, &av));
4497: PetscCall(MatDenseGetArray(B_V, &marray));
4498: for (i = 0; i < n; i++) {
4499: PetscInt j;
4500: for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
4501: }
4502: PetscCall(MatDenseRestoreArray(B_V, &marray));
4503: PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4504: PetscCall(MatDestroy(&A_VR));
4505: }
4507: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4508: if (n_vertices) {
4509: PetscCall(MatDenseGetArray(B_V, &marray));
4510: for (i = 0; i < n_vertices; i++) {
4511: PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
4512: PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
4513: PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4514: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4515: PetscCall(VecResetArray(pcbddc->vec1_R));
4516: PetscCall(VecResetArray(pcbddc->vec2_R));
4517: }
4518: PetscCall(MatDenseRestoreArray(B_V, &marray));
4519: }
4520: if (B_C) {
4521: PetscCall(MatDenseGetArray(B_C, &marray));
4522: for (i = n_vertices; i < n_constraints + n_vertices; i++) {
4523: PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
4524: PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
4525: PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4526: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4527: PetscCall(VecResetArray(pcbddc->vec1_R));
4528: PetscCall(VecResetArray(pcbddc->vec2_R));
4529: }
4530: PetscCall(MatDenseRestoreArray(B_C, &marray));
4531: }
4532: /* coarse basis functions */
4533: for (i = 0; i < pcbddc->local_primal_size; i++) {
4534: Vec v;
4536: PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
4537: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
4538: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4539: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4540: if (i < n_vertices) {
4541: PetscScalar one = 1.0;
4542: PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
4543: PetscCall(VecAssemblyBegin(v));
4544: PetscCall(VecAssemblyEnd(v));
4545: }
4546: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
4548: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4549: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
4550: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4551: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4552: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
4553: }
4554: PetscCall(VecResetArray(pcbddc->vec1_R));
4555: }
4556: PetscCall(MatDestroy(&B_V));
4557: PetscCall(MatDestroy(&B_C));
4558: }
4560: /* free memory */
4561: PetscCall(PetscFree(idx_V_B));
4562: PetscCall(MatDestroy(&S_VV));
4563: PetscCall(MatDestroy(&S_CV));
4564: PetscCall(MatDestroy(&S_VC));
4565: PetscCall(MatDestroy(&S_CC));
4566: PetscCall(PetscFree(work));
4567: if (n_vertices) PetscCall(MatDestroy(&A_VR));
4568: if (n_constraints) PetscCall(MatDestroy(&C_CR));
4569: PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4571: /* Checking coarse_sub_mat and coarse basis functions */
4572: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4573: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4574: if (pcbddc->dbg_flag) {
4575: Mat coarse_sub_mat;
4576: Mat AUXMAT, TM1, TM2, TM3, TM4;
4577: Mat coarse_phi_D, coarse_phi_B;
4578: Mat coarse_psi_D, coarse_psi_B;
4579: Mat A_II, A_BB, A_IB, A_BI;
4580: Mat C_B, CPHI;
4581: IS is_dummy;
4582: Vec mones;
4583: MatType checkmattype = MATSEQAIJ;
4584: PetscReal real_value;
4586: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4587: Mat A;
4588: PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
4589: PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
4590: PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
4591: PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
4592: PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
4593: PetscCall(MatDestroy(&A));
4594: } else {
4595: PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
4596: PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
4597: PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
4598: PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
4599: }
4600: PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
4601: PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
4602: if (!pcbddc->symmetric_primal) {
4603: PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
4604: PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
4605: }
4606: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat));
4608: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
4609: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
4610: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
4611: if (!pcbddc->symmetric_primal) {
4612: PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4613: PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
4614: PetscCall(MatDestroy(&AUXMAT));
4615: PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4616: PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
4617: PetscCall(MatDestroy(&AUXMAT));
4618: PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4619: PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
4620: PetscCall(MatDestroy(&AUXMAT));
4621: PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4622: PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
4623: PetscCall(MatDestroy(&AUXMAT));
4624: } else {
4625: PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
4626: PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
4627: PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4628: PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
4629: PetscCall(MatDestroy(&AUXMAT));
4630: PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4631: PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
4632: PetscCall(MatDestroy(&AUXMAT));
4633: }
4634: PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
4635: PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
4636: PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
4637: PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
4638: if (pcbddc->benign_n) {
4639: Mat B0_B, B0_BPHI;
4640: const PetscScalar *data2;
4641: PetscScalar *data;
4642: PetscInt j;
4644: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
4645: PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
4646: PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
4647: PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
4648: PetscCall(MatDenseGetArray(TM1, &data));
4649: PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
4650: for (j = 0; j < pcbddc->benign_n; j++) {
4651: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4652: for (i = 0; i < pcbddc->local_primal_size; i++) {
4653: data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
4654: data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
4655: }
4656: }
4657: PetscCall(MatDenseRestoreArray(TM1, &data));
4658: PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
4659: PetscCall(MatDestroy(&B0_B));
4660: PetscCall(ISDestroy(&is_dummy));
4661: PetscCall(MatDestroy(&B0_BPHI));
4662: }
4663: #if 0
4664: {
4665: PetscViewer viewer;
4666: char filename[256];
4667: PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
4668: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
4669: PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
4670: PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed"));
4671: PetscCall(MatView(coarse_sub_mat,viewer));
4672: PetscCall(PetscObjectSetName((PetscObject)TM1,"projected"));
4673: PetscCall(MatView(TM1,viewer));
4674: if (pcbddc->coarse_phi_B) {
4675: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
4676: PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
4677: }
4678: if (pcbddc->coarse_phi_D) {
4679: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
4680: PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
4681: }
4682: if (pcbddc->coarse_psi_B) {
4683: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
4684: PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
4685: }
4686: if (pcbddc->coarse_psi_D) {
4687: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
4688: PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
4689: }
4690: PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
4691: PetscCall(MatView(pcbddc->local_mat,viewer));
4692: PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
4693: PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
4694: PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
4695: PetscCall(ISView(pcis->is_I_local,viewer));
4696: PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
4697: PetscCall(ISView(pcis->is_B_local,viewer));
4698: PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
4699: PetscCall(ISView(pcbddc->is_R_local,viewer));
4700: PetscCall(PetscViewerDestroy(&viewer));
4701: }
4702: #endif
4703: PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN));
4704: PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
4705: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
4706: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
4708: /* check constraints */
4709: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
4710: PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4711: if (!pcbddc->benign_n) { /* TODO: add benign case */
4712: PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
4713: } else {
4714: PetscScalar *data;
4715: Mat tmat;
4716: PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
4717: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
4718: PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
4719: PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
4720: PetscCall(MatDestroy(&tmat));
4721: }
4722: PetscCall(MatCreateVecs(CPHI, &mones, NULL));
4723: PetscCall(VecSet(mones, -1.0));
4724: PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
4725: PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
4726: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
4727: if (!pcbddc->symmetric_primal) {
4728: PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
4729: PetscCall(VecSet(mones, -1.0));
4730: PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
4731: PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
4732: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
4733: }
4734: PetscCall(MatDestroy(&C_B));
4735: PetscCall(MatDestroy(&CPHI));
4736: PetscCall(ISDestroy(&is_dummy));
4737: PetscCall(VecDestroy(&mones));
4738: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
4739: PetscCall(MatDestroy(&A_II));
4740: PetscCall(MatDestroy(&A_BB));
4741: PetscCall(MatDestroy(&A_IB));
4742: PetscCall(MatDestroy(&A_BI));
4743: PetscCall(MatDestroy(&TM1));
4744: PetscCall(MatDestroy(&TM2));
4745: PetscCall(MatDestroy(&TM3));
4746: PetscCall(MatDestroy(&TM4));
4747: PetscCall(MatDestroy(&coarse_phi_D));
4748: PetscCall(MatDestroy(&coarse_phi_B));
4749: if (!pcbddc->symmetric_primal) {
4750: PetscCall(MatDestroy(&coarse_psi_D));
4751: PetscCall(MatDestroy(&coarse_psi_B));
4752: }
4753: PetscCall(MatDestroy(&coarse_sub_mat));
4754: }
4755: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4756: {
4757: PetscBool gpu;
4759: PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu));
4760: if (gpu) {
4761: if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
4762: if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
4763: if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
4764: if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
4765: if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
4766: if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
4767: }
4768: }
4769: /* get back data */
4770: *coarse_submat_vals_n = coarse_submat_vals;
4771: PetscFunctionReturn(PETSC_SUCCESS);
4772: }
4774: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
4775: {
4776: Mat *work_mat;
4777: IS isrow_s, iscol_s;
4778: PetscBool rsorted, csorted;
4779: PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
4781: PetscFunctionBegin;
4782: PetscCall(ISSorted(isrow, &rsorted));
4783: PetscCall(ISSorted(iscol, &csorted));
4784: PetscCall(ISGetLocalSize(isrow, &rsize));
4785: PetscCall(ISGetLocalSize(iscol, &csize));
4787: if (!rsorted) {
4788: const PetscInt *idxs;
4789: PetscInt *idxs_sorted, i;
4791: PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
4792: PetscCall(PetscMalloc1(rsize, &idxs_sorted));
4793: for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
4794: PetscCall(ISGetIndices(isrow, &idxs));
4795: PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
4796: for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
4797: PetscCall(ISRestoreIndices(isrow, &idxs));
4798: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
4799: } else {
4800: PetscCall(PetscObjectReference((PetscObject)isrow));
4801: isrow_s = isrow;
4802: }
4804: if (!csorted) {
4805: if (isrow == iscol) {
4806: PetscCall(PetscObjectReference((PetscObject)isrow_s));
4807: iscol_s = isrow_s;
4808: } else {
4809: const PetscInt *idxs;
4810: PetscInt *idxs_sorted, i;
4812: PetscCall(PetscMalloc1(csize, &idxs_perm_c));
4813: PetscCall(PetscMalloc1(csize, &idxs_sorted));
4814: for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
4815: PetscCall(ISGetIndices(iscol, &idxs));
4816: PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
4817: for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
4818: PetscCall(ISRestoreIndices(iscol, &idxs));
4819: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
4820: }
4821: } else {
4822: PetscCall(PetscObjectReference((PetscObject)iscol));
4823: iscol_s = iscol;
4824: }
4826: PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
4828: if (!rsorted || !csorted) {
4829: Mat new_mat;
4830: IS is_perm_r, is_perm_c;
4832: if (!rsorted) {
4833: PetscInt *idxs_r, i;
4834: PetscCall(PetscMalloc1(rsize, &idxs_r));
4835: for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
4836: PetscCall(PetscFree(idxs_perm_r));
4837: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
4838: } else {
4839: PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
4840: }
4841: PetscCall(ISSetPermutation(is_perm_r));
4843: if (!csorted) {
4844: if (isrow_s == iscol_s) {
4845: PetscCall(PetscObjectReference((PetscObject)is_perm_r));
4846: is_perm_c = is_perm_r;
4847: } else {
4848: PetscInt *idxs_c, i;
4849: PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
4850: PetscCall(PetscMalloc1(csize, &idxs_c));
4851: for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
4852: PetscCall(PetscFree(idxs_perm_c));
4853: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
4854: }
4855: } else {
4856: PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
4857: }
4858: PetscCall(ISSetPermutation(is_perm_c));
4860: PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
4861: PetscCall(MatDestroy(&work_mat[0]));
4862: work_mat[0] = new_mat;
4863: PetscCall(ISDestroy(&is_perm_r));
4864: PetscCall(ISDestroy(&is_perm_c));
4865: }
4867: PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
4868: *B = work_mat[0];
4869: PetscCall(MatDestroyMatrices(1, &work_mat));
4870: PetscCall(ISDestroy(&isrow_s));
4871: PetscCall(ISDestroy(&iscol_s));
4872: PetscFunctionReturn(PETSC_SUCCESS);
4873: }
4875: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4876: {
4877: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
4878: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4879: Mat new_mat, lA;
4880: IS is_local, is_global;
4881: PetscInt local_size;
4882: PetscBool isseqaij, issym, isset;
4884: PetscFunctionBegin;
4885: PetscCall(MatDestroy(&pcbddc->local_mat));
4886: PetscCall(MatGetSize(matis->A, &local_size, NULL));
4887: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
4888: PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
4889: PetscCall(ISDestroy(&is_local));
4890: PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
4891: PetscCall(ISDestroy(&is_global));
4893: if (pcbddc->dbg_flag) {
4894: Vec x, x_change;
4895: PetscReal error;
4897: PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
4898: PetscCall(VecSetRandom(x, NULL));
4899: PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
4900: PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
4901: PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
4902: PetscCall(MatMult(new_mat, matis->x, matis->y));
4903: if (!pcbddc->change_interior) {
4904: const PetscScalar *x, *y, *v;
4905: PetscReal lerror = 0.;
4906: PetscInt i;
4908: PetscCall(VecGetArrayRead(matis->x, &x));
4909: PetscCall(VecGetArrayRead(matis->y, &y));
4910: PetscCall(VecGetArrayRead(matis->counter, &v));
4911: for (i = 0; i < local_size; i++)
4912: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
4913: PetscCall(VecRestoreArrayRead(matis->x, &x));
4914: PetscCall(VecRestoreArrayRead(matis->y, &y));
4915: PetscCall(VecRestoreArrayRead(matis->counter, &v));
4916: PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
4917: if (error > PETSC_SMALL) {
4918: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4919: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
4920: } else {
4921: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
4922: }
4923: }
4924: }
4925: PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
4926: PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
4927: PetscCall(VecAXPY(x, -1.0, x_change));
4928: PetscCall(VecNorm(x, NORM_INFINITY, &error));
4929: if (error > PETSC_SMALL) {
4930: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4931: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
4932: } else {
4933: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
4934: }
4935: }
4936: PetscCall(VecDestroy(&x));
4937: PetscCall(VecDestroy(&x_change));
4938: }
4940: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4941: PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
4943: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4944: PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
4945: if (isseqaij) {
4946: PetscCall(MatDestroy(&pcbddc->local_mat));
4947: PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
4948: if (lA) {
4949: Mat work;
4950: PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
4951: PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
4952: PetscCall(MatDestroy(&work));
4953: }
4954: } else {
4955: Mat work_mat;
4957: PetscCall(MatDestroy(&pcbddc->local_mat));
4958: PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
4959: PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
4960: PetscCall(MatDestroy(&work_mat));
4961: if (lA) {
4962: Mat work;
4963: PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
4964: PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
4965: PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
4966: PetscCall(MatDestroy(&work));
4967: }
4968: }
4969: PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
4970: if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
4971: PetscCall(MatDestroy(&new_mat));
4972: PetscFunctionReturn(PETSC_SUCCESS);
4973: }
4975: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4976: {
4977: PC_IS *pcis = (PC_IS *)(pc->data);
4978: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4979: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4980: PetscInt *idx_R_local = NULL;
4981: PetscInt n_vertices, i, j, n_R, n_D, n_B;
4982: PetscInt vbs, bs;
4983: PetscBT bitmask = NULL;
4985: PetscFunctionBegin;
4986: /*
4987: No need to setup local scatters if
4988: - primal space is unchanged
4989: AND
4990: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4991: AND
4992: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4993: */
4994: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
4995: /* destroy old objects */
4996: PetscCall(ISDestroy(&pcbddc->is_R_local));
4997: PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
4998: PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
4999: /* Set Non-overlapping dimensions */
5000: n_B = pcis->n_B;
5001: n_D = pcis->n - n_B;
5002: n_vertices = pcbddc->n_vertices;
5004: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5006: /* create auxiliary bitmask and allocate workspace */
5007: if (!sub_schurs || !sub_schurs->reuse_solver) {
5008: PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5009: PetscCall(PetscBTCreate(pcis->n, &bitmask));
5010: for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5012: for (i = 0, n_R = 0; i < pcis->n; i++) {
5013: if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5014: }
5015: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5016: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5018: PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5019: PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5020: }
5022: /* Block code */
5023: vbs = 1;
5024: PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5025: if (bs > 1 && !(n_vertices % bs)) {
5026: PetscBool is_blocked = PETSC_TRUE;
5027: PetscInt *vary;
5028: if (!sub_schurs || !sub_schurs->reuse_solver) {
5029: PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5030: PetscCall(PetscArrayzero(vary, pcis->n / bs));
5031: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5032: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5033: for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5034: for (i = 0; i < pcis->n / bs; i++) {
5035: if (vary[i] != 0 && vary[i] != bs) {
5036: is_blocked = PETSC_FALSE;
5037: break;
5038: }
5039: }
5040: PetscCall(PetscFree(vary));
5041: } else {
5042: /* Verify directly the R set */
5043: for (i = 0; i < n_R / bs; i++) {
5044: PetscInt j, node = idx_R_local[bs * i];
5045: for (j = 1; j < bs; j++) {
5046: if (node != idx_R_local[bs * i + j] - j) {
5047: is_blocked = PETSC_FALSE;
5048: break;
5049: }
5050: }
5051: }
5052: }
5053: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5054: vbs = bs;
5055: for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5056: }
5057: }
5058: PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5059: if (sub_schurs && sub_schurs->reuse_solver) {
5060: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5062: PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5063: PetscCall(ISDestroy(&reuse_solver->is_R));
5064: PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5065: reuse_solver->is_R = pcbddc->is_R_local;
5066: } else {
5067: PetscCall(PetscFree(idx_R_local));
5068: }
5070: /* print some info if requested */
5071: if (pcbddc->dbg_flag) {
5072: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5073: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5074: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5075: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5076: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5077: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5078: pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5079: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5080: }
5082: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5083: if (!sub_schurs || !sub_schurs->reuse_solver) {
5084: IS is_aux1, is_aux2;
5085: PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5087: PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5088: PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5089: PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5090: PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5091: for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5092: PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5093: for (i = 0, j = 0; i < n_R; i++) {
5094: if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5095: }
5096: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5097: PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5098: for (i = 0, j = 0; i < n_B; i++) {
5099: if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5100: }
5101: PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5102: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5103: PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5104: PetscCall(ISDestroy(&is_aux1));
5105: PetscCall(ISDestroy(&is_aux2));
5107: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5108: PetscCall(PetscMalloc1(n_D, &aux_array1));
5109: for (i = 0, j = 0; i < n_R; i++) {
5110: if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5111: }
5112: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5113: PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5114: PetscCall(ISDestroy(&is_aux1));
5115: }
5116: PetscCall(PetscBTDestroy(&bitmask));
5117: PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5118: } else {
5119: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5120: IS tis;
5121: PetscInt schur_size;
5123: PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5124: PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5125: PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5126: PetscCall(ISDestroy(&tis));
5127: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5128: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5129: PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5130: PetscCall(ISDestroy(&tis));
5131: }
5132: }
5133: PetscFunctionReturn(PETSC_SUCCESS);
5134: }
5136: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5137: {
5138: MatNullSpace NullSpace;
5139: Mat dmat;
5140: const Vec *nullvecs;
5141: Vec v, v2, *nullvecs2;
5142: VecScatter sct = NULL;
5143: PetscContainer c;
5144: PetscScalar *ddata;
5145: PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs;
5146: PetscBool nnsp_has_cnst;
5148: PetscFunctionBegin;
5149: if (!is && !B) { /* MATIS */
5150: Mat_IS *matis = (Mat_IS *)A->data;
5152: if (!B) PetscCall(MatISGetLocalMat(A, &B));
5153: sct = matis->cctx;
5154: PetscCall(PetscObjectReference((PetscObject)sct));
5155: } else {
5156: PetscCall(MatGetNullSpace(B, &NullSpace));
5157: if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5158: if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5159: }
5160: PetscCall(MatGetNullSpace(A, &NullSpace));
5161: if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5162: if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5164: PetscCall(MatCreateVecs(A, &v, NULL));
5165: PetscCall(MatCreateVecs(B, &v2, NULL));
5166: if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5167: PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs));
5168: bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5169: PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5170: PetscCall(VecGetBlockSize(v2, &bs));
5171: PetscCall(VecGetSize(v2, &N));
5172: PetscCall(VecGetLocalSize(v2, &n));
5173: PetscCall(PetscMalloc1(n * bsiz, &ddata));
5174: for (k = 0; k < nnsp_size; k++) {
5175: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5176: PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5177: PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5178: }
5179: if (nnsp_has_cnst) {
5180: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5181: PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5182: }
5183: PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5184: PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5186: PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5187: PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c));
5188: PetscCall(PetscContainerSetPointer(c, ddata));
5189: PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault));
5190: PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c));
5191: PetscCall(PetscContainerDestroy(&c));
5192: PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5193: PetscCall(MatDestroy(&dmat));
5195: for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5196: PetscCall(PetscFree(nullvecs2));
5197: PetscCall(MatSetNearNullSpace(B, NullSpace));
5198: PetscCall(MatNullSpaceDestroy(&NullSpace));
5199: PetscCall(VecDestroy(&v));
5200: PetscCall(VecDestroy(&v2));
5201: PetscCall(VecScatterDestroy(&sct));
5202: PetscFunctionReturn(PETSC_SUCCESS);
5203: }
5205: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5206: {
5207: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5208: PC_IS *pcis = (PC_IS *)pc->data;
5209: PC pc_temp;
5210: Mat A_RR;
5211: MatNullSpace nnsp;
5212: MatReuse reuse;
5213: PetscScalar m_one = -1.0;
5214: PetscReal value;
5215: PetscInt n_D, n_R;
5216: PetscBool issbaij, opts, isset, issym;
5217: void (*f)(void) = NULL;
5218: char dir_prefix[256], neu_prefix[256], str_level[16];
5219: size_t len;
5221: PetscFunctionBegin;
5222: PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5223: /* approximate solver, propagate NearNullSpace if needed */
5224: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5225: MatNullSpace gnnsp1, gnnsp2;
5226: PetscBool lhas, ghas;
5228: PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5229: PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5230: PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5231: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5232: PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5233: if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5234: }
5236: /* compute prefixes */
5237: PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5238: PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5239: if (!pcbddc->current_level) {
5240: PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5241: PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5242: PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5243: PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5244: } else {
5245: PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level)));
5246: PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5247: len -= 15; /* remove "pc_bddc_coarse_" */
5248: if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
5249: if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5250: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5251: PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5252: PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5253: PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5254: PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5255: PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5256: PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5257: }
5259: /* DIRICHLET PROBLEM */
5260: if (dirichlet) {
5261: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5262: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5263: PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5264: if (pcbddc->dbg_flag) {
5265: Mat A_IIn;
5267: PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5268: PetscCall(MatDestroy(&pcis->A_II));
5269: pcis->A_II = A_IIn;
5270: }
5271: }
5272: PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5273: if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5275: /* Matrix for Dirichlet problem is pcis->A_II */
5276: n_D = pcis->n - pcis->n_B;
5277: opts = PETSC_FALSE;
5278: if (!pcbddc->ksp_D) { /* create object if not yet build */
5279: opts = PETSC_TRUE;
5280: PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5281: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5282: /* default */
5283: PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5284: PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5285: PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5286: PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5287: if (issbaij) {
5288: PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5289: } else {
5290: PetscCall(PCSetType(pc_temp, PCLU));
5291: }
5292: PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5293: }
5294: PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5295: PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5296: /* Allow user's customization */
5297: if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5298: PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5299: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5300: PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5301: }
5302: PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5303: PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5304: PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5305: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5306: PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
5307: const PetscInt *idxs;
5308: PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
5310: PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5311: PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5312: PetscCall(PetscMalloc1(nl * cdim, &scoords));
5313: for (i = 0; i < nl; i++) {
5314: for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5315: }
5316: PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5317: PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5318: PetscCall(PetscFree(scoords));
5319: }
5320: if (sub_schurs && sub_schurs->reuse_solver) {
5321: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5323: PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5324: }
5326: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5327: if (!n_D) {
5328: PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5329: PetscCall(PCSetType(pc_temp, PCNONE));
5330: }
5331: PetscCall(KSPSetUp(pcbddc->ksp_D));
5332: /* set ksp_D into pcis data */
5333: PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5334: PetscCall(KSPDestroy(&pcis->ksp_D));
5335: pcis->ksp_D = pcbddc->ksp_D;
5336: }
5338: /* NEUMANN PROBLEM */
5339: A_RR = NULL;
5340: if (neumann) {
5341: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5342: PetscInt ibs, mbs;
5343: PetscBool issbaij, reuse_neumann_solver, isset, issym;
5344: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
5346: reuse_neumann_solver = PETSC_FALSE;
5347: if (sub_schurs && sub_schurs->reuse_solver) {
5348: IS iP;
5350: reuse_neumann_solver = PETSC_TRUE;
5351: PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5352: if (iP) reuse_neumann_solver = PETSC_FALSE;
5353: }
5354: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5355: PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5356: if (pcbddc->ksp_R) { /* already created ksp */
5357: PetscInt nn_R;
5358: PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5359: PetscCall(PetscObjectReference((PetscObject)A_RR));
5360: PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5361: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5362: PetscCall(KSPReset(pcbddc->ksp_R));
5363: PetscCall(MatDestroy(&A_RR));
5364: reuse = MAT_INITIAL_MATRIX;
5365: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5366: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5367: PetscCall(MatDestroy(&A_RR));
5368: reuse = MAT_INITIAL_MATRIX;
5369: } else { /* safe to reuse the matrix */
5370: reuse = MAT_REUSE_MATRIX;
5371: }
5372: }
5373: /* last check */
5374: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5375: PetscCall(MatDestroy(&A_RR));
5376: reuse = MAT_INITIAL_MATRIX;
5377: }
5378: } else { /* first time, so we need to create the matrix */
5379: reuse = MAT_INITIAL_MATRIX;
5380: }
5381: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5382: TODO: Get Rid of these conversions */
5383: PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5384: PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5385: PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5386: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5387: if (matis->A == pcbddc->local_mat) {
5388: PetscCall(MatDestroy(&pcbddc->local_mat));
5389: PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5390: } else {
5391: PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5392: }
5393: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5394: if (matis->A == pcbddc->local_mat) {
5395: PetscCall(MatDestroy(&pcbddc->local_mat));
5396: PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5397: } else {
5398: PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5399: }
5400: }
5401: /* extract A_RR */
5402: if (reuse_neumann_solver) {
5403: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5405: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5406: PetscCall(MatDestroy(&A_RR));
5407: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5408: PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
5409: } else {
5410: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
5411: }
5412: } else {
5413: PetscCall(MatDestroy(&A_RR));
5414: PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
5415: PetscCall(PetscObjectReference((PetscObject)A_RR));
5416: }
5417: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5418: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
5419: }
5420: PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5421: if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
5422: opts = PETSC_FALSE;
5423: if (!pcbddc->ksp_R) { /* create object if not present */
5424: opts = PETSC_TRUE;
5425: PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
5426: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
5427: /* default */
5428: PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
5429: PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
5430: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5431: PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
5432: if (issbaij) {
5433: PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5434: } else {
5435: PetscCall(PCSetType(pc_temp, PCLU));
5436: }
5437: PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
5438: }
5439: PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
5440: PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
5441: if (opts) { /* Allow user's customization once */
5442: PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
5443: }
5444: PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
5445: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5446: PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
5447: }
5448: PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
5449: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5450: PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5451: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5452: PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
5453: const PetscInt *idxs;
5454: PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
5456: PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
5457: PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
5458: PetscCall(PetscMalloc1(nl * cdim, &scoords));
5459: for (i = 0; i < nl; i++) {
5460: for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5461: }
5462: PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
5463: PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5464: PetscCall(PetscFree(scoords));
5465: }
5467: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5468: if (!n_R) {
5469: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5470: PetscCall(PCSetType(pc_temp, PCNONE));
5471: }
5472: /* Reuse solver if it is present */
5473: if (reuse_neumann_solver) {
5474: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5476: PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
5477: }
5478: PetscCall(KSPSetUp(pcbddc->ksp_R));
5479: }
5481: if (pcbddc->dbg_flag) {
5482: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5483: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5484: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5485: }
5486: PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5488: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5489: if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
5490: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
5491: if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
5492: /* check Dirichlet and Neumann solvers */
5493: if (pcbddc->dbg_flag) {
5494: if (dirichlet) { /* Dirichlet */
5495: PetscCall(VecSetRandom(pcis->vec1_D, NULL));
5496: PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
5497: PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
5498: PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
5499: PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
5500: PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
5501: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value));
5502: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5503: }
5504: if (neumann) { /* Neumann */
5505: PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
5506: PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
5507: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
5508: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5509: PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
5510: PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
5511: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value));
5512: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5513: }
5514: }
5515: /* free Neumann problem's matrix */
5516: PetscCall(MatDestroy(&A_RR));
5517: PetscFunctionReturn(PETSC_SUCCESS);
5518: }
5520: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5521: {
5522: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5523: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5524: PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5526: PetscFunctionBegin;
5527: if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
5528: if (!pcbddc->switch_static) {
5529: if (applytranspose && pcbddc->local_auxmat1) {
5530: PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
5531: PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
5532: }
5533: if (!reuse_solver) {
5534: PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5535: PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5536: } else {
5537: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5539: PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
5540: PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
5541: }
5542: } else {
5543: PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5544: PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5545: PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5546: PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5547: if (applytranspose && pcbddc->local_auxmat1) {
5548: PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
5549: PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
5550: PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5551: PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5552: }
5553: }
5554: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
5555: if (!reuse_solver || pcbddc->switch_static) {
5556: if (applytranspose) {
5557: PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
5558: } else {
5559: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
5560: }
5561: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
5562: } else {
5563: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5565: if (applytranspose) {
5566: PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
5567: } else {
5568: PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
5569: }
5570: }
5571: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
5572: PetscCall(VecSet(inout_B, 0.));
5573: if (!pcbddc->switch_static) {
5574: if (!reuse_solver) {
5575: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5576: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5577: } else {
5578: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5580: PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
5581: PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
5582: }
5583: if (!applytranspose && pcbddc->local_auxmat1) {
5584: PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
5585: PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
5586: }
5587: } else {
5588: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5589: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5590: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5591: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5592: if (!applytranspose && pcbddc->local_auxmat1) {
5593: PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
5594: PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
5595: }
5596: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5597: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5598: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5599: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5600: }
5601: PetscFunctionReturn(PETSC_SUCCESS);
5602: }
5604: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5605: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5606: {
5607: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5608: PC_IS *pcis = (PC_IS *)(pc->data);
5609: const PetscScalar zero = 0.0;
5611: PetscFunctionBegin;
5612: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5613: if (!pcbddc->benign_apply_coarse_only) {
5614: if (applytranspose) {
5615: PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
5616: if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
5617: } else {
5618: PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
5619: if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
5620: }
5621: } else {
5622: PetscCall(VecSet(pcbddc->vec1_P, zero));
5623: }
5625: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5626: if (pcbddc->benign_n) {
5627: PetscScalar *array;
5628: PetscInt j;
5630: PetscCall(VecGetArray(pcbddc->vec1_P, &array));
5631: for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
5632: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
5633: }
5635: /* start communications from local primal nodes to rhs of coarse solver */
5636: PetscCall(VecSet(pcbddc->coarse_vec, zero));
5637: PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
5638: PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
5640: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5641: if (pcbddc->coarse_ksp) {
5642: Mat coarse_mat;
5643: Vec rhs, sol;
5644: MatNullSpace nullsp;
5645: PetscBool isbddc = PETSC_FALSE;
5647: if (pcbddc->benign_have_null) {
5648: PC coarse_pc;
5650: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5651: PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
5652: /* we need to propagate to coarser levels the need for a possible benign correction */
5653: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5654: PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data);
5655: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5656: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5657: }
5658: }
5659: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
5660: PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
5661: PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
5662: if (applytranspose) {
5663: PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
5664: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5665: PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
5666: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5667: PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
5668: PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
5669: if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
5670: } else {
5671: PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
5672: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5673: PC coarse_pc;
5675: if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
5676: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5677: PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
5678: PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
5679: PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
5680: } else {
5681: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5682: PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
5683: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5684: PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
5685: if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
5686: }
5687: }
5688: /* we don't need the benign correction at coarser levels anymore */
5689: if (pcbddc->benign_have_null && isbddc) {
5690: PC coarse_pc;
5691: PC_BDDC *coarsepcbddc;
5693: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5694: coarsepcbddc = (PC_BDDC *)(coarse_pc->data);
5695: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5696: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5697: }
5698: }
5700: /* Local solution on R nodes */
5701: if (pcis->n && !pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
5702: /* communications from coarse sol to local primal nodes */
5703: PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
5704: PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
5706: /* Sum contributions from the two levels */
5707: if (!pcbddc->benign_apply_coarse_only) {
5708: if (applytranspose) {
5709: PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
5710: if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
5711: } else {
5712: PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
5713: if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
5714: }
5715: /* store p0 */
5716: if (pcbddc->benign_n) {
5717: PetscScalar *array;
5718: PetscInt j;
5720: PetscCall(VecGetArray(pcbddc->vec1_P, &array));
5721: for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
5722: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
5723: }
5724: } else { /* expand the coarse solution */
5725: if (applytranspose) {
5726: PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
5727: } else {
5728: PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
5729: }
5730: }
5731: PetscFunctionReturn(PETSC_SUCCESS);
5732: }
5734: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
5735: {
5736: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5737: Vec from, to;
5738: const PetscScalar *array;
5740: PetscFunctionBegin;
5741: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5742: from = pcbddc->coarse_vec;
5743: to = pcbddc->vec1_P;
5744: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5745: Vec tvec;
5747: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
5748: PetscCall(VecResetArray(tvec));
5749: PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
5750: PetscCall(VecGetArrayRead(tvec, &array));
5751: PetscCall(VecPlaceArray(from, array));
5752: PetscCall(VecRestoreArrayRead(tvec, &array));
5753: }
5754: } else { /* from local to global -> put data in coarse right hand side */
5755: from = pcbddc->vec1_P;
5756: to = pcbddc->coarse_vec;
5757: }
5758: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
5759: PetscFunctionReturn(PETSC_SUCCESS);
5760: }
5762: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5763: {
5764: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5765: Vec from, to;
5766: const PetscScalar *array;
5768: PetscFunctionBegin;
5769: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5770: from = pcbddc->coarse_vec;
5771: to = pcbddc->vec1_P;
5772: } else { /* from local to global -> put data in coarse right hand side */
5773: from = pcbddc->vec1_P;
5774: to = pcbddc->coarse_vec;
5775: }
5776: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
5777: if (smode == SCATTER_FORWARD) {
5778: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5779: Vec tvec;
5781: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
5782: PetscCall(VecGetArrayRead(to, &array));
5783: PetscCall(VecPlaceArray(tvec, array));
5784: PetscCall(VecRestoreArrayRead(to, &array));
5785: }
5786: } else {
5787: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5788: PetscCall(VecResetArray(from));
5789: }
5790: }
5791: PetscFunctionReturn(PETSC_SUCCESS);
5792: }
5794: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5795: {
5796: PC_IS *pcis = (PC_IS *)(pc->data);
5797: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5798: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
5799: /* one and zero */
5800: PetscScalar one = 1.0, zero = 0.0;
5801: /* space to store constraints and their local indices */
5802: PetscScalar *constraints_data;
5803: PetscInt *constraints_idxs, *constraints_idxs_B;
5804: PetscInt *constraints_idxs_ptr, *constraints_data_ptr;
5805: PetscInt *constraints_n;
5806: /* iterators */
5807: PetscInt i, j, k, total_counts, total_counts_cc, cum;
5808: /* BLAS integers */
5809: PetscBLASInt lwork, lierr;
5810: PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
5811: PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
5812: /* reuse */
5813: PetscInt olocal_primal_size, olocal_primal_size_cc;
5814: PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
5815: /* change of basis */
5816: PetscBool qr_needed;
5817: PetscBT change_basis, qr_needed_idx;
5818: /* auxiliary stuff */
5819: PetscInt *nnz, *is_indices;
5820: PetscInt ncc;
5821: /* some quantities */
5822: PetscInt n_vertices, total_primal_vertices, valid_constraints;
5823: PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
5824: PetscReal tol; /* tolerance for retaining eigenmodes */
5826: PetscFunctionBegin;
5827: tol = PetscSqrtReal(PETSC_SMALL);
5828: /* Destroy Mat objects computed previously */
5829: PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
5830: PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
5831: PetscCall(MatDestroy(&pcbddc->switch_static_change));
5832: /* save info on constraints from previous setup (if any) */
5833: olocal_primal_size = pcbddc->local_primal_size;
5834: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5835: PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
5836: PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
5837: PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
5838: PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
5839: PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
5841: if (!pcbddc->adaptive_selection) {
5842: IS ISForVertices, *ISForFaces, *ISForEdges;
5843: MatNullSpace nearnullsp;
5844: const Vec *nearnullvecs;
5845: Vec *localnearnullsp;
5846: PetscScalar *array;
5847: PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
5848: PetscBool nnsp_has_cnst;
5849: /* LAPACK working arrays for SVD or POD */
5850: PetscBool skip_lapack, boolforchange;
5851: PetscScalar *work;
5852: PetscReal *singular_vals;
5853: #if defined(PETSC_USE_COMPLEX)
5854: PetscReal *rwork;
5855: #endif
5856: PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
5857: PetscBLASInt dummy_int = 1;
5858: PetscScalar dummy_scalar = 1.;
5859: PetscBool use_pod = PETSC_FALSE;
5861: /* MKL SVD with same input gives different results on different processes! */
5862: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
5863: use_pod = PETSC_TRUE;
5864: #endif
5865: /* Get index sets for faces, edges and vertices from graph */
5866: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
5867: o_nf = n_ISForFaces;
5868: o_ne = n_ISForEdges;
5869: n_vertices = 0;
5870: if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
5871: /* print some info */
5872: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5873: if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
5874: PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
5875: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5876: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
5877: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
5878: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
5879: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
5880: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5881: PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
5882: }
5884: if (!pcbddc->use_vertices) n_vertices = 0;
5885: if (!pcbddc->use_edges) n_ISForEdges = 0;
5886: if (!pcbddc->use_faces) n_ISForFaces = 0;
5888: /* check if near null space is attached to global mat */
5889: if (pcbddc->use_nnsp) {
5890: PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
5891: } else nearnullsp = NULL;
5893: if (nearnullsp) {
5894: PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
5895: /* remove any stored info */
5896: PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
5897: PetscCall(PetscFree(pcbddc->onearnullvecs_state));
5898: /* store information for BDDC solver reuse */
5899: PetscCall(PetscObjectReference((PetscObject)nearnullsp));
5900: pcbddc->onearnullspace = nearnullsp;
5901: PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
5902: for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
5903: } else { /* if near null space is not provided BDDC uses constants by default */
5904: nnsp_size = 0;
5905: nnsp_has_cnst = PETSC_TRUE;
5906: }
5907: /* get max number of constraints on a single cc */
5908: max_constraints = nnsp_size;
5909: if (nnsp_has_cnst) max_constraints++;
5911: /*
5912: Evaluate maximum storage size needed by the procedure
5913: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5914: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5915: There can be multiple constraints per connected component
5916: */
5917: ncc = n_vertices + n_ISForFaces + n_ISForEdges;
5918: PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
5920: total_counts = n_ISForFaces + n_ISForEdges;
5921: total_counts *= max_constraints;
5922: total_counts += n_vertices;
5923: PetscCall(PetscBTCreate(total_counts, &change_basis));
5925: total_counts = 0;
5926: max_size_of_constraint = 0;
5927: for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
5928: IS used_is;
5929: if (i < n_ISForEdges) {
5930: used_is = ISForEdges[i];
5931: } else {
5932: used_is = ISForFaces[i - n_ISForEdges];
5933: }
5934: PetscCall(ISGetSize(used_is, &j));
5935: total_counts += j;
5936: max_size_of_constraint = PetscMax(j, max_size_of_constraint);
5937: }
5938: PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
5940: /* get local part of global near null space vectors */
5941: PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
5942: for (k = 0; k < nnsp_size; k++) {
5943: PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
5944: PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
5945: PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
5946: }
5948: /* whether or not to skip lapack calls */
5949: skip_lapack = PETSC_TRUE;
5950: if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
5952: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5953: if (!skip_lapack) {
5954: PetscScalar temp_work;
5956: if (use_pod) {
5957: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5958: PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
5959: PetscCall(PetscMalloc1(max_constraints, &singular_vals));
5960: PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
5961: #if defined(PETSC_USE_COMPLEX)
5962: PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
5963: #endif
5964: /* now we evaluate the optimal workspace using query with lwork=-1 */
5965: PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
5966: PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
5967: lwork = -1;
5968: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
5969: #if !defined(PETSC_USE_COMPLEX)
5970: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
5971: #else
5972: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
5973: #endif
5974: PetscCall(PetscFPTrapPop());
5975: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr);
5976: } else {
5977: #if !defined(PETSC_MISSING_LAPACK_GESVD)
5978: /* SVD */
5979: PetscInt max_n, min_n;
5980: max_n = max_size_of_constraint;
5981: min_n = max_constraints;
5982: if (max_size_of_constraint < max_constraints) {
5983: min_n = max_size_of_constraint;
5984: max_n = max_constraints;
5985: }
5986: PetscCall(PetscMalloc1(min_n, &singular_vals));
5987: #if defined(PETSC_USE_COMPLEX)
5988: PetscCall(PetscMalloc1(5 * min_n, &rwork));
5989: #endif
5990: /* now we evaluate the optimal workspace using query with lwork=-1 */
5991: lwork = -1;
5992: PetscCall(PetscBLASIntCast(max_n, &Blas_M));
5993: PetscCall(PetscBLASIntCast(min_n, &Blas_N));
5994: PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
5995: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
5996: #if !defined(PETSC_USE_COMPLEX)
5997: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
5998: #else
5999: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6000: #endif
6001: PetscCall(PetscFPTrapPop());
6002: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr);
6003: #else
6004: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6005: #endif /* on missing GESVD */
6006: }
6007: /* Allocate optimal workspace */
6008: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6009: PetscCall(PetscMalloc1(lwork, &work));
6010: }
6011: /* Now we can loop on constraining sets */
6012: total_counts = 0;
6013: constraints_idxs_ptr[0] = 0;
6014: constraints_data_ptr[0] = 0;
6015: /* vertices */
6016: if (n_vertices) {
6017: PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6018: PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6019: for (i = 0; i < n_vertices; i++) {
6020: constraints_n[total_counts] = 1;
6021: constraints_data[total_counts] = 1.0;
6022: constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6023: constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6024: total_counts++;
6025: }
6026: PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6027: }
6029: /* edges and faces */
6030: total_counts_cc = total_counts;
6031: for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6032: IS used_is;
6033: PetscBool idxs_copied = PETSC_FALSE;
6035: if (ncc < n_ISForEdges) {
6036: used_is = ISForEdges[ncc];
6037: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6038: } else {
6039: used_is = ISForFaces[ncc - n_ISForEdges];
6040: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6041: }
6042: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6044: PetscCall(ISGetSize(used_is, &size_of_constraint));
6045: if (!size_of_constraint) continue;
6046: PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6047: /* change of basis should not be performed on local periodic nodes */
6048: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6049: if (nnsp_has_cnst) {
6050: PetscScalar quad_value;
6052: PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6053: idxs_copied = PETSC_TRUE;
6055: if (!pcbddc->use_nnsp_true) {
6056: quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6057: } else {
6058: quad_value = 1.0;
6059: }
6060: for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6061: temp_constraints++;
6062: total_counts++;
6063: }
6064: for (k = 0; k < nnsp_size; k++) {
6065: PetscReal real_value;
6066: PetscScalar *ptr_to_data;
6068: PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6069: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6070: for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6071: PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6072: /* check if array is null on the connected component */
6073: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6074: PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6075: if (real_value > tol * size_of_constraint) { /* keep indices and values */
6076: temp_constraints++;
6077: total_counts++;
6078: if (!idxs_copied) {
6079: PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6080: idxs_copied = PETSC_TRUE;
6081: }
6082: }
6083: }
6084: PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6085: valid_constraints = temp_constraints;
6086: if (!pcbddc->use_nnsp_true && temp_constraints) {
6087: if (temp_constraints == 1) { /* just normalize the constraint */
6088: PetscScalar norm, *ptr_to_data;
6090: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6091: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6092: PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6093: norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6094: PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6095: } else { /* perform SVD */
6096: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6098: if (use_pod) {
6099: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6100: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6101: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6102: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6103: from that computed using LAPACKgesvd
6104: -> This is due to a different computation of eigenvectors in LAPACKheev
6105: -> The quality of the POD-computed basis will be the same */
6106: PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6107: /* Store upper triangular part of correlation matrix */
6108: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6109: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6110: for (j = 0; j < temp_constraints; j++) {
6111: for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6112: }
6113: /* compute eigenvalues and eigenvectors of correlation matrix */
6114: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6115: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6116: #if !defined(PETSC_USE_COMPLEX)
6117: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6118: #else
6119: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6120: #endif
6121: PetscCall(PetscFPTrapPop());
6122: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr);
6123: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6124: j = 0;
6125: while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6126: total_counts = total_counts - j;
6127: valid_constraints = temp_constraints - j;
6128: /* scale and copy POD basis into used quadrature memory */
6129: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6130: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6131: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6132: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6133: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6134: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6135: if (j < temp_constraints) {
6136: PetscInt ii;
6137: for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6138: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6139: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6140: PetscCall(PetscFPTrapPop());
6141: for (k = 0; k < temp_constraints - j; k++) {
6142: for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6143: }
6144: }
6145: } else {
6146: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6147: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6148: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6149: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6150: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6151: #if !defined(PETSC_USE_COMPLEX)
6152: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6153: #else
6154: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6155: #endif
6156: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
6157: PetscCall(PetscFPTrapPop());
6158: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6159: k = temp_constraints;
6160: if (k > size_of_constraint) k = size_of_constraint;
6161: j = 0;
6162: while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6163: valid_constraints = k - j;
6164: total_counts = total_counts - temp_constraints + valid_constraints;
6165: #else
6166: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6167: #endif /* on missing GESVD */
6168: }
6169: }
6170: }
6171: /* update pointers information */
6172: if (valid_constraints) {
6173: constraints_n[total_counts_cc] = valid_constraints;
6174: constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6175: constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6176: /* set change_of_basis flag */
6177: if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6178: total_counts_cc++;
6179: }
6180: }
6181: /* free workspace */
6182: if (!skip_lapack) {
6183: PetscCall(PetscFree(work));
6184: #if defined(PETSC_USE_COMPLEX)
6185: PetscCall(PetscFree(rwork));
6186: #endif
6187: PetscCall(PetscFree(singular_vals));
6188: PetscCall(PetscFree(correlation_mat));
6189: PetscCall(PetscFree(temp_basis));
6190: }
6191: for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6192: PetscCall(PetscFree(localnearnullsp));
6193: /* free index sets of faces, edges and vertices */
6194: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6195: } else {
6196: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6198: total_counts = 0;
6199: n_vertices = 0;
6200: if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6201: max_constraints = 0;
6202: total_counts_cc = 0;
6203: for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6204: total_counts += pcbddc->adaptive_constraints_n[i];
6205: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6206: max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6207: }
6208: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6209: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6210: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6211: constraints_data = pcbddc->adaptive_constraints_data;
6212: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6213: PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6214: total_counts_cc = 0;
6215: for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6216: if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6217: }
6219: max_size_of_constraint = 0;
6220: for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6221: PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6222: /* Change of basis */
6223: PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6224: if (pcbddc->use_change_of_basis) {
6225: for (i = 0; i < sub_schurs->n_subs; i++) {
6226: if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6227: }
6228: }
6229: }
6230: pcbddc->local_primal_size = total_counts;
6231: PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6233: /* map constraints_idxs in boundary numbering */
6234: if (pcbddc->use_change_of_basis) {
6235: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6236: PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6237: }
6239: /* Create constraint matrix */
6240: PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6241: PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6242: PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6244: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6245: /* determine if a QR strategy is needed for change of basis */
6246: qr_needed = pcbddc->use_qr_single;
6247: PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6248: total_primal_vertices = 0;
6249: pcbddc->local_primal_size_cc = 0;
6250: for (i = 0; i < total_counts_cc; i++) {
6251: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6252: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6253: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6254: pcbddc->local_primal_size_cc += 1;
6255: } else if (PetscBTLookup(change_basis, i)) {
6256: for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6257: pcbddc->local_primal_size_cc += constraints_n[i];
6258: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6259: PetscCall(PetscBTSet(qr_needed_idx, i));
6260: qr_needed = PETSC_TRUE;
6261: }
6262: } else {
6263: pcbddc->local_primal_size_cc += 1;
6264: }
6265: }
6266: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6267: pcbddc->n_vertices = total_primal_vertices;
6268: /* permute indices in order to have a sorted set of vertices */
6269: PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6270: PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6271: PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6272: for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6274: /* nonzero structure of constraint matrix */
6275: /* and get reference dof for local constraints */
6276: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6277: for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6279: j = total_primal_vertices;
6280: total_counts = total_primal_vertices;
6281: cum = total_primal_vertices;
6282: for (i = n_vertices; i < total_counts_cc; i++) {
6283: if (!PetscBTLookup(change_basis, i)) {
6284: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6285: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6286: cum++;
6287: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6288: for (k = 0; k < constraints_n[i]; k++) {
6289: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6290: nnz[j + k] = size_of_constraint;
6291: }
6292: j += constraints_n[i];
6293: }
6294: }
6295: PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6296: PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6297: PetscCall(PetscFree(nnz));
6299: /* set values in constraint matrix */
6300: for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6301: total_counts = total_primal_vertices;
6302: for (i = n_vertices; i < total_counts_cc; i++) {
6303: if (!PetscBTLookup(change_basis, i)) {
6304: PetscInt *cols;
6306: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6307: cols = constraints_idxs + constraints_idxs_ptr[i];
6308: for (k = 0; k < constraints_n[i]; k++) {
6309: PetscInt row = total_counts + k;
6310: PetscScalar *vals;
6312: vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6313: PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6314: }
6315: total_counts += constraints_n[i];
6316: }
6317: }
6318: /* assembling */
6319: PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6320: PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6321: PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6323: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6324: if (pcbddc->use_change_of_basis) {
6325: /* dual and primal dofs on a single cc */
6326: PetscInt dual_dofs, primal_dofs;
6327: /* working stuff for GEQRF */
6328: PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6329: PetscBLASInt lqr_work;
6330: /* working stuff for UNGQR */
6331: PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6332: PetscBLASInt lgqr_work;
6333: /* working stuff for TRTRS */
6334: PetscScalar *trs_rhs = NULL;
6335: PetscBLASInt Blas_NRHS;
6336: /* pointers for values insertion into change of basis matrix */
6337: PetscInt *start_rows, *start_cols;
6338: PetscScalar *start_vals;
6339: /* working stuff for values insertion */
6340: PetscBT is_primal;
6341: PetscInt *aux_primal_numbering_B;
6342: /* matrix sizes */
6343: PetscInt global_size, local_size;
6344: /* temporary change of basis */
6345: Mat localChangeOfBasisMatrix;
6346: /* extra space for debugging */
6347: PetscScalar *dbg_work = NULL;
6349: PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6350: PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6351: PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6352: /* nonzeros for local mat */
6353: PetscCall(PetscMalloc1(pcis->n, &nnz));
6354: if (!pcbddc->benign_change || pcbddc->fake_change) {
6355: for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6356: } else {
6357: const PetscInt *ii;
6358: PetscInt n;
6359: PetscBool flg_row;
6360: PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6361: for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6362: PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6363: }
6364: for (i = n_vertices; i < total_counts_cc; i++) {
6365: if (PetscBTLookup(change_basis, i)) {
6366: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6367: if (PetscBTLookup(qr_needed_idx, i)) {
6368: for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6369: } else {
6370: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6371: for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6372: }
6373: }
6374: }
6375: PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6376: PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6377: PetscCall(PetscFree(nnz));
6378: /* Set interior change in the matrix */
6379: if (!pcbddc->benign_change || pcbddc->fake_change) {
6380: for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6381: } else {
6382: const PetscInt *ii, *jj;
6383: PetscScalar *aa;
6384: PetscInt n;
6385: PetscBool flg_row;
6386: PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6387: PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6388: for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6389: PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6390: PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6391: }
6393: if (pcbddc->dbg_flag) {
6394: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6395: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6396: }
6398: /* Now we loop on the constraints which need a change of basis */
6399: /*
6400: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6401: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6403: Basic blocks of change of basis matrix T computed:
6405: - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6407: | 1 0 ... 0 s_1/S |
6408: | 0 1 ... 0 s_2/S |
6409: | ... |
6410: | 0 ... 1 s_{n-1}/S |
6411: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6413: with S = \sum_{i=1}^n s_i^2
6414: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6415: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6417: - QR decomposition of constraints otherwise
6418: */
6419: if (qr_needed && max_size_of_constraint) {
6420: /* space to store Q */
6421: PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
6422: /* array to store scaling factors for reflectors */
6423: PetscCall(PetscMalloc1(max_constraints, &qr_tau));
6424: /* first we issue queries for optimal work */
6425: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
6426: PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6427: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
6428: lqr_work = -1;
6429: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
6430: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr);
6431: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
6432: PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t), &qr_work));
6433: lgqr_work = -1;
6434: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
6435: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
6436: PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
6437: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
6438: if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
6439: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
6440: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr);
6441: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
6442: PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t), &gqr_work));
6443: /* array to store rhs and solution of triangular solver */
6444: PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
6445: /* allocating workspace for check */
6446: if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
6447: }
6448: /* array to store whether a node is primal or not */
6449: PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
6450: PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
6451: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
6452: PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
6453: for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
6454: PetscCall(PetscFree(aux_primal_numbering_B));
6456: /* loop on constraints and see whether or not they need a change of basis and compute it */
6457: for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
6458: size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
6459: if (PetscBTLookup(change_basis, total_counts)) {
6460: /* get constraint info */
6461: primal_dofs = constraints_n[total_counts];
6462: dual_dofs = size_of_constraint - primal_dofs;
6464: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
6466: if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
6468: /* copy quadrature constraints for change of basis check */
6469: if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6470: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6471: PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6473: /* compute QR decomposition of constraints */
6474: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6475: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6476: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6477: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6478: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
6479: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr);
6480: PetscCall(PetscFPTrapPop());
6482: /* explicitly compute R^-T */
6483: PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
6484: for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
6485: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6486: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
6487: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6488: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
6489: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6490: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
6491: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr);
6492: PetscCall(PetscFPTrapPop());
6494: /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6495: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6496: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6497: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
6498: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6499: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6500: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
6501: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr);
6502: PetscCall(PetscFPTrapPop());
6504: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6505: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6506: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6507: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6508: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6509: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
6510: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6511: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
6512: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6513: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6514: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
6515: PetscCall(PetscFPTrapPop());
6516: PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6518: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6519: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6520: /* insert cols for primal dofs */
6521: for (j = 0; j < primal_dofs; j++) {
6522: start_vals = &qr_basis[j * size_of_constraint];
6523: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6524: PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
6525: }
6526: /* insert cols for dual dofs */
6527: for (j = 0, k = 0; j < dual_dofs; k++) {
6528: if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
6529: start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
6530: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6531: PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
6532: j++;
6533: }
6534: }
6536: /* check change of basis */
6537: if (pcbddc->dbg_flag) {
6538: PetscInt ii, jj;
6539: PetscBool valid_qr = PETSC_TRUE;
6540: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
6541: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6542: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
6543: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6544: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
6545: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
6546: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6547: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
6548: PetscCall(PetscFPTrapPop());
6549: for (jj = 0; jj < size_of_constraint; jj++) {
6550: for (ii = 0; ii < primal_dofs; ii++) {
6551: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6552: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6553: }
6554: }
6555: if (!valid_qr) {
6556: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
6557: for (jj = 0; jj < size_of_constraint; jj++) {
6558: for (ii = 0; ii < primal_dofs; ii++) {
6559: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
6560: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
6561: }
6562: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
6563: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
6564: }
6565: }
6566: }
6567: } else {
6568: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
6569: }
6570: }
6571: } else { /* simple transformation block */
6572: PetscInt row, col;
6573: PetscScalar val, norm;
6575: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6576: PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
6577: for (j = 0; j < size_of_constraint; j++) {
6578: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
6579: row = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6580: if (!PetscBTLookup(is_primal, row_B)) {
6581: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6582: PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
6583: PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
6584: } else {
6585: for (k = 0; k < size_of_constraint; k++) {
6586: col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6587: if (row != col) {
6588: val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
6589: } else {
6590: val = constraints_data[constraints_data_ptr[total_counts]] / norm;
6591: }
6592: PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
6593: }
6594: }
6595: }
6596: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
6597: }
6598: } else {
6599: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
6600: }
6601: }
6603: /* free workspace */
6604: if (qr_needed) {
6605: if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
6606: PetscCall(PetscFree(trs_rhs));
6607: PetscCall(PetscFree(qr_tau));
6608: PetscCall(PetscFree(qr_work));
6609: PetscCall(PetscFree(gqr_work));
6610: PetscCall(PetscFree(qr_basis));
6611: }
6612: PetscCall(PetscBTDestroy(&is_primal));
6613: PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
6614: PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
6616: /* assembling of global change of variable */
6617: if (!pcbddc->fake_change) {
6618: Mat tmat;
6619: PetscInt bs;
6621: PetscCall(VecGetSize(pcis->vec1_global, &global_size));
6622: PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
6623: PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
6624: PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
6625: PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
6626: PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
6627: PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
6628: PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
6629: PetscCall(MatGetBlockSize(pc->pmat, &bs));
6630: PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
6631: PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
6632: PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
6633: PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
6634: PetscCall(MatDestroy(&tmat));
6635: PetscCall(VecSet(pcis->vec1_global, 0.0));
6636: PetscCall(VecSet(pcis->vec1_N, 1.0));
6637: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
6638: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
6639: PetscCall(VecReciprocal(pcis->vec1_global));
6640: PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
6642: /* check */
6643: if (pcbddc->dbg_flag) {
6644: PetscReal error;
6645: Vec x, x_change;
6647: PetscCall(VecDuplicate(pcis->vec1_global, &x));
6648: PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
6649: PetscCall(VecSetRandom(x, NULL));
6650: PetscCall(VecCopy(x, pcis->vec1_global));
6651: PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
6652: PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
6653: PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
6654: PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
6655: PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
6656: PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
6657: PetscCall(VecAXPY(x, -1.0, x_change));
6658: PetscCall(VecNorm(x, NORM_INFINITY, &error));
6659: PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
6660: PetscCall(VecDestroy(&x));
6661: PetscCall(VecDestroy(&x_change));
6662: }
6663: /* adapt sub_schurs computed (if any) */
6664: if (pcbddc->use_deluxe_scaling) {
6665: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6667: PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6668: if (sub_schurs && sub_schurs->S_Ej_all) {
6669: Mat S_new, tmat;
6670: IS is_all_N, is_V_Sall = NULL;
6672: PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
6673: PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
6674: if (pcbddc->deluxe_zerorows) {
6675: ISLocalToGlobalMapping NtoSall;
6676: IS is_V;
6677: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
6678: PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
6679: PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
6680: PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
6681: PetscCall(ISDestroy(&is_V));
6682: }
6683: PetscCall(ISDestroy(&is_all_N));
6684: PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
6685: PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
6686: PetscCall(PetscObjectReference((PetscObject)S_new));
6687: if (pcbddc->deluxe_zerorows) {
6688: const PetscScalar *array;
6689: const PetscInt *idxs_V, *idxs_all;
6690: PetscInt i, n_V;
6692: PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
6693: PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
6694: PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
6695: PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
6696: PetscCall(VecGetArrayRead(pcis->D, &array));
6697: for (i = 0; i < n_V; i++) {
6698: PetscScalar val;
6699: PetscInt idx;
6701: idx = idxs_V[i];
6702: val = array[idxs_all[idxs_V[i]]];
6703: PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
6704: }
6705: PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
6706: PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
6707: PetscCall(VecRestoreArrayRead(pcis->D, &array));
6708: PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
6709: PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
6710: }
6711: sub_schurs->S_Ej_all = S_new;
6712: PetscCall(MatDestroy(&S_new));
6713: if (sub_schurs->sum_S_Ej_all) {
6714: PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
6715: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
6716: PetscCall(PetscObjectReference((PetscObject)S_new));
6717: if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
6718: sub_schurs->sum_S_Ej_all = S_new;
6719: PetscCall(MatDestroy(&S_new));
6720: }
6721: PetscCall(ISDestroy(&is_V_Sall));
6722: PetscCall(MatDestroy(&tmat));
6723: }
6724: /* destroy any change of basis context in sub_schurs */
6725: if (sub_schurs && sub_schurs->change) {
6726: PetscInt i;
6728: for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
6729: PetscCall(PetscFree(sub_schurs->change));
6730: }
6731: }
6732: if (pcbddc->switch_static) { /* need to save the local change */
6733: pcbddc->switch_static_change = localChangeOfBasisMatrix;
6734: } else {
6735: PetscCall(MatDestroy(&localChangeOfBasisMatrix));
6736: }
6737: /* determine if any process has changed the pressures locally */
6738: pcbddc->change_interior = pcbddc->benign_have_null;
6739: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6740: PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6741: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6742: pcbddc->use_qr_single = qr_needed;
6743: }
6744: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6745: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6746: PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
6747: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6748: } else {
6749: Mat benign_global = NULL;
6750: if (pcbddc->benign_have_null) {
6751: Mat M;
6753: pcbddc->change_interior = PETSC_TRUE;
6754: PetscCall(VecCopy(matis->counter, pcis->vec1_N));
6755: PetscCall(VecReciprocal(pcis->vec1_N));
6756: PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
6757: if (pcbddc->benign_change) {
6758: PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
6759: PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
6760: } else {
6761: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
6762: PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
6763: }
6764: PetscCall(MatISSetLocalMat(benign_global, M));
6765: PetscCall(MatDestroy(&M));
6766: PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
6767: PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
6768: }
6769: if (pcbddc->user_ChangeOfBasisMatrix) {
6770: PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix));
6771: PetscCall(MatDestroy(&benign_global));
6772: } else if (pcbddc->benign_have_null) {
6773: pcbddc->ChangeOfBasisMatrix = benign_global;
6774: }
6775: }
6776: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6777: IS is_global;
6778: const PetscInt *gidxs;
6780: PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
6781: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
6782: PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
6783: PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
6784: PetscCall(ISDestroy(&is_global));
6785: }
6786: }
6787: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
6789: if (!pcbddc->fake_change) {
6790: /* add pressure dofs to set of primal nodes for numbering purposes */
6791: for (i = 0; i < pcbddc->benign_n; i++) {
6792: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6793: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6794: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6795: pcbddc->local_primal_size_cc++;
6796: pcbddc->local_primal_size++;
6797: }
6799: /* check if a new primal space has been introduced (also take into account benign trick) */
6800: pcbddc->new_primal_space_local = PETSC_TRUE;
6801: if (olocal_primal_size == pcbddc->local_primal_size) {
6802: PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
6803: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6804: if (!pcbddc->new_primal_space_local) {
6805: PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
6806: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6807: }
6808: }
6809: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6810: PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
6811: }
6812: PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
6814: /* flush dbg viewer */
6815: if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6817: /* free workspace */
6818: PetscCall(PetscBTDestroy(&qr_needed_idx));
6819: PetscCall(PetscBTDestroy(&change_basis));
6820: if (!pcbddc->adaptive_selection) {
6821: PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
6822: PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
6823: } else {
6824: PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
6825: PetscCall(PetscFree(constraints_n));
6826: PetscCall(PetscFree(constraints_idxs_B));
6827: }
6828: PetscFunctionReturn(PETSC_SUCCESS);
6829: }
6831: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6832: {
6833: ISLocalToGlobalMapping map;
6834: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6835: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
6836: PetscInt i, N;
6837: PetscBool rcsr = PETSC_FALSE;
6839: PetscFunctionBegin;
6840: if (pcbddc->recompute_topography) {
6841: pcbddc->graphanalyzed = PETSC_FALSE;
6842: /* Reset previously computed graph */
6843: PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
6844: /* Init local Graph struct */
6845: PetscCall(MatGetSize(pc->pmat, &N, NULL));
6846: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
6847: PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
6849: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
6850: /* Check validity of the csr graph passed in by the user */
6851: PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
6852: pcbddc->mat_graph->nvtxs);
6854: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6855: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6856: PetscInt *xadj, *adjncy;
6857: PetscInt nvtxs;
6858: PetscBool flg_row = PETSC_FALSE;
6860: PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
6861: if (flg_row) {
6862: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
6863: pcbddc->computed_rowadj = PETSC_TRUE;
6864: }
6865: PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
6866: rcsr = PETSC_TRUE;
6867: }
6868: if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6870: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6871: PetscReal *lcoords;
6872: PetscInt n;
6873: MPI_Datatype dimrealtype;
6875: /* TODO: support for blocked */
6876: PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
6877: PetscCall(MatGetLocalSize(matis->A, &n, NULL));
6878: PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
6879: PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype));
6880: PetscCallMPI(MPI_Type_commit(&dimrealtype));
6881: PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
6882: PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
6883: PetscCallMPI(MPI_Type_free(&dimrealtype));
6884: PetscCall(PetscFree(pcbddc->mat_graph->coords));
6886: pcbddc->mat_graph->coords = lcoords;
6887: pcbddc->mat_graph->cloc = PETSC_TRUE;
6888: pcbddc->mat_graph->cnloc = n;
6889: }
6890: PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
6891: pcbddc->mat_graph->nvtxs);
6892: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
6894: /* Setup of Graph */
6895: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6896: PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
6898: /* attach info on disconnected subdomains if present */
6899: if (pcbddc->n_local_subs) {
6900: PetscInt *local_subs, n, totn;
6902: PetscCall(MatGetLocalSize(matis->A, &n, NULL));
6903: PetscCall(PetscMalloc1(n, &local_subs));
6904: for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
6905: for (i = 0; i < pcbddc->n_local_subs; i++) {
6906: const PetscInt *idxs;
6907: PetscInt nl, j;
6909: PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
6910: PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
6911: for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
6912: PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
6913: }
6914: for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
6915: pcbddc->mat_graph->n_local_subs = totn + 1;
6916: pcbddc->mat_graph->local_subs = local_subs;
6917: }
6918: }
6920: if (!pcbddc->graphanalyzed) {
6921: /* Graph's connected components analysis */
6922: PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
6923: pcbddc->graphanalyzed = PETSC_TRUE;
6924: pcbddc->corner_selected = pcbddc->corner_selection;
6925: }
6926: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6927: PetscFunctionReturn(PETSC_SUCCESS);
6928: }
6930: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
6931: {
6932: PetscInt i, j, n;
6933: PetscScalar *alphas;
6934: PetscReal norm, *onorms;
6936: PetscFunctionBegin;
6937: n = *nio;
6938: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
6939: PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
6940: PetscCall(VecNormalize(vecs[0], &norm));
6941: if (norm < PETSC_SMALL) {
6942: onorms[0] = 0.0;
6943: PetscCall(VecSet(vecs[0], 0.0));
6944: } else {
6945: onorms[0] = norm;
6946: }
6948: for (i = 1; i < n; i++) {
6949: PetscCall(VecMDot(vecs[i], i, vecs, alphas));
6950: for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
6951: PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
6952: PetscCall(VecNormalize(vecs[i], &norm));
6953: if (norm < PETSC_SMALL) {
6954: onorms[i] = 0.0;
6955: PetscCall(VecSet(vecs[i], 0.0));
6956: } else {
6957: onorms[i] = norm;
6958: }
6959: }
6960: /* push nonzero vectors at the beginning */
6961: for (i = 0; i < n; i++) {
6962: if (onorms[i] == 0.0) {
6963: for (j = i + 1; j < n; j++) {
6964: if (onorms[j] != 0.0) {
6965: PetscCall(VecCopy(vecs[j], vecs[i]));
6966: onorms[j] = 0.0;
6967: }
6968: }
6969: }
6970: }
6971: for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
6972: PetscCall(PetscFree2(alphas, onorms));
6973: PetscFunctionReturn(PETSC_SUCCESS);
6974: }
6976: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
6977: {
6978: ISLocalToGlobalMapping mapping;
6979: Mat A;
6980: PetscInt n_neighs, *neighs, *n_shared, **shared;
6981: PetscMPIInt size, rank, color;
6982: PetscInt *xadj, *adjncy;
6983: PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
6984: PetscInt im_active, active_procs, N, n, i, j, threshold = 2;
6985: PetscInt void_procs, *procs_candidates = NULL;
6986: PetscInt xadj_count, *count;
6987: PetscBool ismatis, use_vwgt = PETSC_FALSE;
6988: PetscSubcomm psubcomm;
6989: MPI_Comm subcomm;
6991: PetscFunctionBegin;
6993: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
6994: PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
6997: PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
6999: if (have_void) *have_void = PETSC_FALSE;
7000: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7001: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7002: PetscCall(MatISGetLocalMat(mat, &A));
7003: PetscCall(MatGetLocalSize(A, &n, NULL));
7004: im_active = !!n;
7005: PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7006: void_procs = size - active_procs;
7007: /* get ranks of of non-active processes in mat communicator */
7008: if (void_procs) {
7009: PetscInt ncand;
7011: if (have_void) *have_void = PETSC_TRUE;
7012: PetscCall(PetscMalloc1(size, &procs_candidates));
7013: PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7014: for (i = 0, ncand = 0; i < size; i++) {
7015: if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7016: }
7017: /* force n_subdomains to be not greater that the number of non-active processes */
7018: *n_subdomains = PetscMin(void_procs, *n_subdomains);
7019: }
7021: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7022: number of subdomains requested 1 -> send to rank-0 or first candidate in voids */
7023: PetscCall(MatGetSize(mat, &N, NULL));
7024: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7025: PetscInt issize, isidx, dest;
7026: if (*n_subdomains == 1) dest = 0;
7027: else dest = rank;
7028: if (im_active) {
7029: issize = 1;
7030: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7031: isidx = procs_candidates[dest];
7032: } else {
7033: isidx = dest;
7034: }
7035: } else {
7036: issize = 0;
7037: isidx = -1;
7038: }
7039: if (*n_subdomains != 1) *n_subdomains = active_procs;
7040: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7041: PetscCall(PetscFree(procs_candidates));
7042: PetscFunctionReturn(PETSC_SUCCESS);
7043: }
7044: PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL));
7045: PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL));
7046: threshold = PetscMax(threshold, 2);
7048: /* Get info on mapping */
7049: PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7050: PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7052: /* build local CSR graph of subdomains' connectivity */
7053: PetscCall(PetscMalloc1(2, &xadj));
7054: xadj[0] = 0;
7055: xadj[1] = PetscMax(n_neighs - 1, 0);
7056: PetscCall(PetscMalloc1(xadj[1], &adjncy));
7057: PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7058: PetscCall(PetscCalloc1(n, &count));
7059: for (i = 1; i < n_neighs; i++)
7060: for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7062: xadj_count = 0;
7063: for (i = 1; i < n_neighs; i++) {
7064: for (j = 0; j < n_shared[i]; j++) {
7065: if (count[shared[i][j]] < threshold) {
7066: adjncy[xadj_count] = neighs[i];
7067: adjncy_wgt[xadj_count] = n_shared[i];
7068: xadj_count++;
7069: break;
7070: }
7071: }
7072: }
7073: xadj[1] = xadj_count;
7074: PetscCall(PetscFree(count));
7075: PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7076: PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7078: PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7080: /* Restrict work on active processes only */
7081: PetscCall(PetscMPIIntCast(im_active, &color));
7082: if (void_procs) {
7083: PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7084: PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7085: PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7086: subcomm = PetscSubcommChild(psubcomm);
7087: } else {
7088: psubcomm = NULL;
7089: subcomm = PetscObjectComm((PetscObject)mat);
7090: }
7092: v_wgt = NULL;
7093: if (!color) {
7094: PetscCall(PetscFree(xadj));
7095: PetscCall(PetscFree(adjncy));
7096: PetscCall(PetscFree(adjncy_wgt));
7097: } else {
7098: Mat subdomain_adj;
7099: IS new_ranks, new_ranks_contig;
7100: MatPartitioning partitioner;
7101: PetscInt rstart = 0, rend = 0;
7102: PetscInt *is_indices, *oldranks;
7103: PetscMPIInt size;
7104: PetscBool aggregate;
7106: PetscCallMPI(MPI_Comm_size(subcomm, &size));
7107: if (void_procs) {
7108: PetscInt prank = rank;
7109: PetscCall(PetscMalloc1(size, &oldranks));
7110: PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7111: for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7112: PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7113: } else {
7114: oldranks = NULL;
7115: }
7116: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7117: if (aggregate) { /* TODO: all this part could be made more efficient */
7118: PetscInt lrows, row, ncols, *cols;
7119: PetscMPIInt nrank;
7120: PetscScalar *vals;
7122: PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7123: lrows = 0;
7124: if (nrank < redprocs) {
7125: lrows = size / redprocs;
7126: if (nrank < size % redprocs) lrows++;
7127: }
7128: PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7129: PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7130: PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7131: PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7132: row = nrank;
7133: ncols = xadj[1] - xadj[0];
7134: cols = adjncy;
7135: PetscCall(PetscMalloc1(ncols, &vals));
7136: for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7137: PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7138: PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7139: PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7140: PetscCall(PetscFree(xadj));
7141: PetscCall(PetscFree(adjncy));
7142: PetscCall(PetscFree(adjncy_wgt));
7143: PetscCall(PetscFree(vals));
7144: if (use_vwgt) {
7145: Vec v;
7146: const PetscScalar *array;
7147: PetscInt nl;
7149: PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7150: PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7151: PetscCall(VecAssemblyBegin(v));
7152: PetscCall(VecAssemblyEnd(v));
7153: PetscCall(VecGetLocalSize(v, &nl));
7154: PetscCall(VecGetArrayRead(v, &array));
7155: PetscCall(PetscMalloc1(nl, &v_wgt));
7156: for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7157: PetscCall(VecRestoreArrayRead(v, &array));
7158: PetscCall(VecDestroy(&v));
7159: }
7160: } else {
7161: PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7162: if (use_vwgt) {
7163: PetscCall(PetscMalloc1(1, &v_wgt));
7164: v_wgt[0] = n;
7165: }
7166: }
7167: /* PetscCall(MatView(subdomain_adj,0)); */
7169: /* Partition */
7170: PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7171: #if defined(PETSC_HAVE_PTSCOTCH)
7172: PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7173: #elif defined(PETSC_HAVE_PARMETIS)
7174: PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7175: #else
7176: PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7177: #endif
7178: PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7179: if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7180: *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7181: PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7182: PetscCall(MatPartitioningSetFromOptions(partitioner));
7183: PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7184: /* PetscCall(MatPartitioningView(partitioner,0)); */
7186: /* renumber new_ranks to avoid "holes" in new set of processors */
7187: PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7188: PetscCall(ISDestroy(&new_ranks));
7189: PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7190: if (!aggregate) {
7191: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7192: PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7193: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7194: } else if (oldranks) {
7195: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7196: } else {
7197: ranks_send_to_idx[0] = is_indices[0];
7198: }
7199: } else {
7200: PetscInt idx = 0;
7201: PetscMPIInt tag;
7202: MPI_Request *reqs;
7204: PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7205: PetscCall(PetscMalloc1(rend - rstart, &reqs));
7206: for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7207: PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7208: PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE));
7209: PetscCall(PetscFree(reqs));
7210: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7211: PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7212: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7213: } else if (oldranks) {
7214: ranks_send_to_idx[0] = oldranks[idx];
7215: } else {
7216: ranks_send_to_idx[0] = idx;
7217: }
7218: }
7219: PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7220: /* clean up */
7221: PetscCall(PetscFree(oldranks));
7222: PetscCall(ISDestroy(&new_ranks_contig));
7223: PetscCall(MatDestroy(&subdomain_adj));
7224: PetscCall(MatPartitioningDestroy(&partitioner));
7225: }
7226: PetscCall(PetscSubcommDestroy(&psubcomm));
7227: PetscCall(PetscFree(procs_candidates));
7229: /* assemble parallel IS for sends */
7230: i = 1;
7231: if (!color) i = 0;
7232: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7233: PetscFunctionReturn(PETSC_SUCCESS);
7234: }
7236: typedef enum {
7237: MATDENSE_PRIVATE = 0,
7238: MATAIJ_PRIVATE,
7239: MATBAIJ_PRIVATE,
7240: MATSBAIJ_PRIVATE
7241: } MatTypePrivate;
7243: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7244: {
7245: Mat local_mat;
7246: IS is_sends_internal;
7247: PetscInt rows, cols, new_local_rows;
7248: PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7249: PetscBool ismatis, isdense, newisdense, destroy_mat;
7250: ISLocalToGlobalMapping l2gmap;
7251: PetscInt *l2gmap_indices;
7252: const PetscInt *is_indices;
7253: MatType new_local_type;
7254: /* buffers */
7255: PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7256: PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7257: PetscInt *recv_buffer_idxs_local;
7258: PetscScalar *ptr_vals, *recv_buffer_vals;
7259: const PetscScalar *send_buffer_vals;
7260: PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7261: /* MPI */
7262: MPI_Comm comm, comm_n;
7263: PetscSubcomm subcomm;
7264: PetscMPIInt n_sends, n_recvs, size;
7265: PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7266: PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7267: PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7268: MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7269: MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7271: PetscFunctionBegin;
7273: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7274: PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7281: if (nvecs) {
7282: PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7284: }
7285: /* further checks */
7286: PetscCall(MatISGetLocalMat(mat, &local_mat));
7287: PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7288: PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7289: PetscCall(MatGetSize(local_mat, &rows, &cols));
7290: PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7291: if (reuse && *mat_n) {
7292: PetscInt mrows, mcols, mnrows, mncols;
7294: PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7295: PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7296: PetscCall(MatGetSize(mat, &mrows, &mcols));
7297: PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7298: PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7299: PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7300: }
7301: PetscCall(MatGetBlockSize(local_mat, &bs));
7304: /* prepare IS for sending if not provided */
7305: if (!is_sends) {
7306: PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7307: PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7308: } else {
7309: PetscCall(PetscObjectReference((PetscObject)is_sends));
7310: is_sends_internal = is_sends;
7311: }
7313: /* get comm */
7314: PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7316: /* compute number of sends */
7317: PetscCall(ISGetLocalSize(is_sends_internal, &i));
7318: PetscCall(PetscMPIIntCast(i, &n_sends));
7320: /* compute number of receives */
7321: PetscCallMPI(MPI_Comm_size(comm, &size));
7322: PetscCall(PetscMalloc1(size, &iflags));
7323: PetscCall(PetscArrayzero(iflags, size));
7324: PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7325: for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7326: PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7327: PetscCall(PetscFree(iflags));
7329: /* restrict comm if requested */
7330: subcomm = NULL;
7331: destroy_mat = PETSC_FALSE;
7332: if (restrict_comm) {
7333: PetscMPIInt color, subcommsize;
7335: color = 0;
7336: if (restrict_full) {
7337: if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7338: } else {
7339: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7340: }
7341: PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7342: subcommsize = size - subcommsize;
7343: /* check if reuse has been requested */
7344: if (reuse) {
7345: if (*mat_n) {
7346: PetscMPIInt subcommsize2;
7347: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7348: PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7349: comm_n = PetscObjectComm((PetscObject)*mat_n);
7350: } else {
7351: comm_n = PETSC_COMM_SELF;
7352: }
7353: } else { /* MAT_INITIAL_MATRIX */
7354: PetscMPIInt rank;
7356: PetscCallMPI(MPI_Comm_rank(comm, &rank));
7357: PetscCall(PetscSubcommCreate(comm, &subcomm));
7358: PetscCall(PetscSubcommSetNumber(subcomm, 2));
7359: PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7360: comm_n = PetscSubcommChild(subcomm);
7361: }
7362: /* flag to destroy *mat_n if not significative */
7363: if (color) destroy_mat = PETSC_TRUE;
7364: } else {
7365: comm_n = comm;
7366: }
7368: /* prepare send/receive buffers */
7369: PetscCall(PetscMalloc1(size, &ilengths_idxs));
7370: PetscCall(PetscArrayzero(ilengths_idxs, size));
7371: PetscCall(PetscMalloc1(size, &ilengths_vals));
7372: PetscCall(PetscArrayzero(ilengths_vals, size));
7373: if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
7375: /* Get data from local matrices */
7376: PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
7377: /* TODO: See below some guidelines on how to prepare the local buffers */
7378: /*
7379: send_buffer_vals should contain the raw values of the local matrix
7380: send_buffer_idxs should contain:
7381: - MatType_PRIVATE type
7382: - PetscInt size_of_l2gmap
7383: - PetscInt global_row_indices[size_of_l2gmap]
7384: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7385: */
7386: {
7387: ISLocalToGlobalMapping mapping;
7389: PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7390: PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
7391: PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
7392: PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
7393: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7394: send_buffer_idxs[1] = i;
7395: PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
7396: PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
7397: PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
7398: PetscCall(PetscMPIIntCast(i, &len));
7399: for (i = 0; i < n_sends; i++) {
7400: ilengths_vals[is_indices[i]] = len * len;
7401: ilengths_idxs[is_indices[i]] = len + 2;
7402: }
7403: }
7404: PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
7405: /* additional is (if any) */
7406: if (nis) {
7407: PetscMPIInt psum;
7408: PetscInt j;
7409: for (j = 0, psum = 0; j < nis; j++) {
7410: PetscInt plen;
7411: PetscCall(ISGetLocalSize(isarray[j], &plen));
7412: PetscCall(PetscMPIIntCast(plen, &len));
7413: psum += len + 1; /* indices + length */
7414: }
7415: PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
7416: for (j = 0, psum = 0; j < nis; j++) {
7417: PetscInt plen;
7418: const PetscInt *is_array_idxs;
7419: PetscCall(ISGetLocalSize(isarray[j], &plen));
7420: send_buffer_idxs_is[psum] = plen;
7421: PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
7422: PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
7423: PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
7424: psum += plen + 1; /* indices + length */
7425: }
7426: for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
7427: PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
7428: }
7429: PetscCall(MatISRestoreLocalMat(mat, &local_mat));
7431: buf_size_idxs = 0;
7432: buf_size_vals = 0;
7433: buf_size_idxs_is = 0;
7434: buf_size_vecs = 0;
7435: for (i = 0; i < n_recvs; i++) {
7436: buf_size_idxs += (PetscInt)olengths_idxs[i];
7437: buf_size_vals += (PetscInt)olengths_vals[i];
7438: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7439: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7440: }
7441: PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
7442: PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
7443: PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
7444: PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
7446: /* get new tags for clean communications */
7447: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
7448: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
7449: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
7450: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
7452: /* allocate for requests */
7453: PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
7454: PetscCall(PetscMalloc1(n_sends, &send_req_vals));
7455: PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
7456: PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
7457: PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
7458: PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
7459: PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
7460: PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
7462: /* communications */
7463: ptr_idxs = recv_buffer_idxs;
7464: ptr_vals = recv_buffer_vals;
7465: ptr_idxs_is = recv_buffer_idxs_is;
7466: ptr_vecs = recv_buffer_vecs;
7467: for (i = 0; i < n_recvs; i++) {
7468: source_dest = onodes[i];
7469: PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]));
7470: PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]));
7471: ptr_idxs += olengths_idxs[i];
7472: ptr_vals += olengths_vals[i];
7473: if (nis) {
7474: source_dest = onodes_is[i];
7475: PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]));
7476: ptr_idxs_is += olengths_idxs_is[i];
7477: }
7478: if (nvecs) {
7479: source_dest = onodes[i];
7480: PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]));
7481: ptr_vecs += olengths_idxs[i] - 2;
7482: }
7483: }
7484: for (i = 0; i < n_sends; i++) {
7485: PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
7486: PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
7487: PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
7488: if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
7489: if (nvecs) {
7490: PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
7491: PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
7492: }
7493: }
7494: PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
7495: PetscCall(ISDestroy(&is_sends_internal));
7497: /* assemble new l2g map */
7498: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
7499: ptr_idxs = recv_buffer_idxs;
7500: new_local_rows = 0;
7501: for (i = 0; i < n_recvs; i++) {
7502: new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7503: ptr_idxs += olengths_idxs[i];
7504: }
7505: PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
7506: ptr_idxs = recv_buffer_idxs;
7507: new_local_rows = 0;
7508: for (i = 0; i < n_recvs; i++) {
7509: PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
7510: new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7511: ptr_idxs += olengths_idxs[i];
7512: }
7513: PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
7514: PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
7515: PetscCall(PetscFree(l2gmap_indices));
7517: /* infer new local matrix type from received local matrices type */
7518: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7519: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7520: if (n_recvs) {
7521: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7522: ptr_idxs = recv_buffer_idxs;
7523: for (i = 0; i < n_recvs; i++) {
7524: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7525: new_local_type_private = MATAIJ_PRIVATE;
7526: break;
7527: }
7528: ptr_idxs += olengths_idxs[i];
7529: }
7530: switch (new_local_type_private) {
7531: case MATDENSE_PRIVATE:
7532: new_local_type = MATSEQAIJ;
7533: bs = 1;
7534: break;
7535: case MATAIJ_PRIVATE:
7536: new_local_type = MATSEQAIJ;
7537: bs = 1;
7538: break;
7539: case MATBAIJ_PRIVATE:
7540: new_local_type = MATSEQBAIJ;
7541: break;
7542: case MATSBAIJ_PRIVATE:
7543: new_local_type = MATSEQSBAIJ;
7544: break;
7545: default:
7546: SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
7547: }
7548: } else { /* by default, new_local_type is seqaij */
7549: new_local_type = MATSEQAIJ;
7550: bs = 1;
7551: }
7553: /* create MATIS object if needed */
7554: if (!reuse) {
7555: PetscCall(MatGetSize(mat, &rows, &cols));
7556: PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
7557: } else {
7558: /* it also destroys the local matrices */
7559: if (*mat_n) {
7560: PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
7561: } else { /* this is a fake object */
7562: PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
7563: }
7564: }
7565: PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
7566: PetscCall(MatSetType(local_mat, new_local_type));
7568: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
7570: /* Global to local map of received indices */
7571: PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
7572: PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
7573: PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
7575: /* restore attributes -> type of incoming data and its size */
7576: buf_size_idxs = 0;
7577: for (i = 0; i < n_recvs; i++) {
7578: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7579: recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
7580: buf_size_idxs += (PetscInt)olengths_idxs[i];
7581: }
7582: PetscCall(PetscFree(recv_buffer_idxs));
7584: /* set preallocation */
7585: PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
7586: if (!newisdense) {
7587: PetscInt *new_local_nnz = NULL;
7589: ptr_idxs = recv_buffer_idxs_local;
7590: if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
7591: for (i = 0; i < n_recvs; i++) {
7592: PetscInt j;
7593: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7594: for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
7595: } else {
7596: /* TODO */
7597: }
7598: ptr_idxs += olengths_idxs[i];
7599: }
7600: if (new_local_nnz) {
7601: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
7602: PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
7603: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
7604: PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
7605: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
7606: PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
7607: } else {
7608: PetscCall(MatSetUp(local_mat));
7609: }
7610: PetscCall(PetscFree(new_local_nnz));
7611: } else {
7612: PetscCall(MatSetUp(local_mat));
7613: }
7615: /* set values */
7616: ptr_vals = recv_buffer_vals;
7617: ptr_idxs = recv_buffer_idxs_local;
7618: for (i = 0; i < n_recvs; i++) {
7619: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7620: PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
7621: PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
7622: PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
7623: PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
7624: PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
7625: } else {
7626: /* TODO */
7627: }
7628: ptr_idxs += olengths_idxs[i];
7629: ptr_vals += olengths_vals[i];
7630: }
7631: PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
7632: PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
7633: PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
7634: PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
7635: PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
7636: PetscCall(PetscFree(recv_buffer_vals));
7638: #if 0
7639: if (!restrict_comm) { /* check */
7640: Vec lvec,rvec;
7641: PetscReal infty_error;
7643: PetscCall(MatCreateVecs(mat,&rvec,&lvec));
7644: PetscCall(VecSetRandom(rvec,NULL));
7645: PetscCall(MatMult(mat,rvec,lvec));
7646: PetscCall(VecScale(lvec,-1.0));
7647: PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
7648: PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
7649: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
7650: PetscCall(VecDestroy(&rvec));
7651: PetscCall(VecDestroy(&lvec));
7652: }
7653: #endif
7655: /* assemble new additional is (if any) */
7656: if (nis) {
7657: PetscInt **temp_idxs, *count_is, j, psum;
7659: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
7660: PetscCall(PetscCalloc1(nis, &count_is));
7661: ptr_idxs = recv_buffer_idxs_is;
7662: psum = 0;
7663: for (i = 0; i < n_recvs; i++) {
7664: for (j = 0; j < nis; j++) {
7665: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7666: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7667: psum += plen;
7668: ptr_idxs += plen + 1; /* shift pointer to received data */
7669: }
7670: }
7671: PetscCall(PetscMalloc1(nis, &temp_idxs));
7672: PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
7673: for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1];
7674: PetscCall(PetscArrayzero(count_is, nis));
7675: ptr_idxs = recv_buffer_idxs_is;
7676: for (i = 0; i < n_recvs; i++) {
7677: for (j = 0; j < nis; j++) {
7678: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7679: PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
7680: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7681: ptr_idxs += plen + 1; /* shift pointer to received data */
7682: }
7683: }
7684: for (i = 0; i < nis; i++) {
7685: PetscCall(ISDestroy(&isarray[i]));
7686: PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
7687: PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
7688: }
7689: PetscCall(PetscFree(count_is));
7690: PetscCall(PetscFree(temp_idxs[0]));
7691: PetscCall(PetscFree(temp_idxs));
7692: }
7693: /* free workspace */
7694: PetscCall(PetscFree(recv_buffer_idxs_is));
7695: PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
7696: PetscCall(PetscFree(send_buffer_idxs));
7697: PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
7698: if (isdense) {
7699: PetscCall(MatISGetLocalMat(mat, &local_mat));
7700: PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
7701: PetscCall(MatISRestoreLocalMat(mat, &local_mat));
7702: } else {
7703: /* PetscCall(PetscFree(send_buffer_vals)); */
7704: }
7705: if (nis) {
7706: PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
7707: PetscCall(PetscFree(send_buffer_idxs_is));
7708: }
7710: if (nvecs) {
7711: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
7712: PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
7713: PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
7714: PetscCall(VecDestroy(&nnsp_vec[0]));
7715: PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
7716: PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
7717: PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
7718: /* set values */
7719: ptr_vals = recv_buffer_vecs;
7720: ptr_idxs = recv_buffer_idxs_local;
7721: PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
7722: for (i = 0; i < n_recvs; i++) {
7723: PetscInt j;
7724: for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
7725: ptr_idxs += olengths_idxs[i];
7726: ptr_vals += olengths_idxs[i] - 2;
7727: }
7728: PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
7729: PetscCall(VecAssemblyBegin(nnsp_vec[0]));
7730: PetscCall(VecAssemblyEnd(nnsp_vec[0]));
7731: }
7733: PetscCall(PetscFree(recv_buffer_vecs));
7734: PetscCall(PetscFree(recv_buffer_idxs_local));
7735: PetscCall(PetscFree(recv_req_idxs));
7736: PetscCall(PetscFree(recv_req_vals));
7737: PetscCall(PetscFree(recv_req_vecs));
7738: PetscCall(PetscFree(recv_req_idxs_is));
7739: PetscCall(PetscFree(send_req_idxs));
7740: PetscCall(PetscFree(send_req_vals));
7741: PetscCall(PetscFree(send_req_vecs));
7742: PetscCall(PetscFree(send_req_idxs_is));
7743: PetscCall(PetscFree(ilengths_vals));
7744: PetscCall(PetscFree(ilengths_idxs));
7745: PetscCall(PetscFree(olengths_vals));
7746: PetscCall(PetscFree(olengths_idxs));
7747: PetscCall(PetscFree(onodes));
7748: if (nis) {
7749: PetscCall(PetscFree(ilengths_idxs_is));
7750: PetscCall(PetscFree(olengths_idxs_is));
7751: PetscCall(PetscFree(onodes_is));
7752: }
7753: PetscCall(PetscSubcommDestroy(&subcomm));
7754: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
7755: PetscCall(MatDestroy(mat_n));
7756: for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
7757: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7758: PetscCall(VecDestroy(&nnsp_vec[0]));
7759: }
7760: *mat_n = NULL;
7761: }
7762: PetscFunctionReturn(PETSC_SUCCESS);
7763: }
7765: /* temporary hack into ksp private data structure */
7766: #include <petsc/private/kspimpl.h>
7768: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals)
7769: {
7770: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
7771: PC_IS *pcis = (PC_IS *)pc->data;
7772: Mat coarse_mat, coarse_mat_is, coarse_submat_dense;
7773: Mat coarsedivudotp = NULL;
7774: Mat coarseG, t_coarse_mat_is;
7775: MatNullSpace CoarseNullSpace = NULL;
7776: ISLocalToGlobalMapping coarse_islg;
7777: IS coarse_is, *isarray, corners;
7778: PetscInt i, im_active = -1, active_procs = -1;
7779: PetscInt nis, nisdofs, nisneu, nisvert;
7780: PetscInt coarse_eqs_per_proc;
7781: PC pc_temp;
7782: PCType coarse_pc_type;
7783: KSPType coarse_ksp_type;
7784: PetscBool multilevel_requested, multilevel_allowed;
7785: PetscBool coarse_reuse;
7786: PetscInt ncoarse, nedcfield;
7787: PetscBool compute_vecs = PETSC_FALSE;
7788: PetscScalar *array;
7789: MatReuse coarse_mat_reuse;
7790: PetscBool restr, full_restr, have_void;
7791: PetscMPIInt size;
7793: PetscFunctionBegin;
7794: PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
7795: /* Assign global numbering to coarse dofs */
7796: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7797: PetscInt ocoarse_size;
7798: compute_vecs = PETSC_TRUE;
7800: pcbddc->new_primal_space = PETSC_TRUE;
7801: ocoarse_size = pcbddc->coarse_size;
7802: PetscCall(PetscFree(pcbddc->global_primal_indices));
7803: PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
7804: /* see if we can avoid some work */
7805: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7806: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7807: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7808: PetscCall(KSPReset(pcbddc->coarse_ksp));
7809: coarse_reuse = PETSC_FALSE;
7810: } else { /* we can safely reuse already computed coarse matrix */
7811: coarse_reuse = PETSC_TRUE;
7812: }
7813: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7814: coarse_reuse = PETSC_FALSE;
7815: }
7816: /* reset any subassembling information */
7817: if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
7818: } else { /* primal space is unchanged, so we can reuse coarse matrix */
7819: coarse_reuse = PETSC_TRUE;
7820: }
7821: if (coarse_reuse && pcbddc->coarse_ksp) {
7822: PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
7823: PetscCall(PetscObjectReference((PetscObject)coarse_mat));
7824: coarse_mat_reuse = MAT_REUSE_MATRIX;
7825: } else {
7826: coarse_mat = NULL;
7827: coarse_mat_reuse = MAT_INITIAL_MATRIX;
7828: }
7830: /* creates temporary l2gmap and IS for coarse indexes */
7831: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
7832: PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
7834: /* creates temporary MATIS object for coarse matrix */
7835: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense));
7836: PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is));
7837: PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense));
7838: PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
7839: PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
7840: PetscCall(MatDestroy(&coarse_submat_dense));
7842: /* count "active" (i.e. with positive local size) and "void" processes */
7843: im_active = !!(pcis->n);
7844: PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
7846: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7847: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
7848: /* full_restr : just use the receivers from the subassembling pattern */
7849: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
7850: coarse_mat_is = NULL;
7851: multilevel_allowed = PETSC_FALSE;
7852: multilevel_requested = PETSC_FALSE;
7853: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
7854: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
7855: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7856: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7857: if (multilevel_requested) {
7858: ncoarse = active_procs / pcbddc->coarsening_ratio;
7859: restr = PETSC_FALSE;
7860: full_restr = PETSC_FALSE;
7861: } else {
7862: ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
7863: restr = PETSC_TRUE;
7864: full_restr = PETSC_TRUE;
7865: }
7866: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7867: ncoarse = PetscMax(1, ncoarse);
7868: if (!pcbddc->coarse_subassembling) {
7869: if (pcbddc->coarsening_ratio > 1) {
7870: if (multilevel_requested) {
7871: PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
7872: } else {
7873: PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
7874: }
7875: } else {
7876: PetscMPIInt rank;
7878: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
7879: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7880: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
7881: }
7882: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7883: PetscInt psum;
7884: if (pcbddc->coarse_ksp) psum = 1;
7885: else psum = 0;
7886: PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
7887: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7888: }
7889: /* determine if we can go multilevel */
7890: if (multilevel_requested) {
7891: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7892: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7893: }
7894: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
7896: /* dump subassembling pattern */
7897: if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
7898: /* compute dofs splitting and neumann boundaries for coarse dofs */
7899: nedcfield = -1;
7900: corners = NULL;
7901: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
7902: PetscInt *tidxs, *tidxs2, nout, tsize, i;
7903: const PetscInt *idxs;
7904: ISLocalToGlobalMapping tmap;
7906: /* create map between primal indices (in local representative ordering) and local primal numbering */
7907: PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
7908: /* allocate space for temporary storage */
7909: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
7910: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
7911: /* allocate for IS array */
7912: nisdofs = pcbddc->n_ISForDofsLocal;
7913: if (pcbddc->nedclocal) {
7914: if (pcbddc->nedfield > -1) {
7915: nedcfield = pcbddc->nedfield;
7916: } else {
7917: nedcfield = 0;
7918: PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
7919: nisdofs = 1;
7920: }
7921: }
7922: nisneu = !!pcbddc->NeumannBoundariesLocal;
7923: nisvert = 0; /* nisvert is not used */
7924: nis = nisdofs + nisneu + nisvert;
7925: PetscCall(PetscMalloc1(nis, &isarray));
7926: /* dofs splitting */
7927: for (i = 0; i < nisdofs; i++) {
7928: /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
7929: if (nedcfield != i) {
7930: PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
7931: PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
7932: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7933: PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
7934: } else {
7935: PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
7936: PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
7937: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7938: PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
7939: PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
7940: }
7941: PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7942: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
7943: /* PetscCall(ISView(isarray[i],0)); */
7944: }
7945: /* neumann boundaries */
7946: if (pcbddc->NeumannBoundariesLocal) {
7947: /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
7948: PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
7949: PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
7950: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7951: PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
7952: PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7953: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
7954: /* PetscCall(ISView(isarray[nisdofs],0)); */
7955: }
7956: /* coordinates */
7957: if (pcbddc->corner_selected) {
7958: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
7959: PetscCall(ISGetLocalSize(corners, &tsize));
7960: PetscCall(ISGetIndices(corners, &idxs));
7961: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7962: PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
7963: PetscCall(ISRestoreIndices(corners, &idxs));
7964: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
7965: PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7966: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
7967: }
7968: PetscCall(PetscFree(tidxs));
7969: PetscCall(PetscFree(tidxs2));
7970: PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
7971: } else {
7972: nis = 0;
7973: nisdofs = 0;
7974: nisneu = 0;
7975: nisvert = 0;
7976: isarray = NULL;
7977: }
7978: /* destroy no longer needed map */
7979: PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
7981: /* subassemble */
7982: if (multilevel_allowed) {
7983: Vec vp[1];
7984: PetscInt nvecs = 0;
7985: PetscBool reuse, reuser;
7987: if (coarse_mat) reuse = PETSC_TRUE;
7988: else reuse = PETSC_FALSE;
7989: PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7990: vp[0] = NULL;
7991: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7992: PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
7993: PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
7994: PetscCall(VecSetType(vp[0], VECSTANDARD));
7995: nvecs = 1;
7997: if (pcbddc->divudotp) {
7998: Mat B, loc_divudotp;
7999: Vec v, p;
8000: IS dummy;
8001: PetscInt np;
8003: PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8004: PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8005: PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8006: PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8007: PetscCall(MatCreateVecs(B, &v, &p));
8008: PetscCall(VecSet(p, 1.));
8009: PetscCall(MatMultTranspose(B, p, v));
8010: PetscCall(VecDestroy(&p));
8011: PetscCall(MatDestroy(&B));
8012: PetscCall(VecGetArray(vp[0], &array));
8013: PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8014: PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8015: PetscCall(VecResetArray(pcbddc->vec1_P));
8016: PetscCall(VecRestoreArray(vp[0], &array));
8017: PetscCall(ISDestroy(&dummy));
8018: PetscCall(VecDestroy(&v));
8019: }
8020: }
8021: if (reuser) {
8022: PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8023: } else {
8024: PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8025: }
8026: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8027: PetscScalar *arraym;
8028: const PetscScalar *arrayv;
8029: PetscInt nl;
8030: PetscCall(VecGetLocalSize(vp[0], &nl));
8031: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8032: PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8033: PetscCall(VecGetArrayRead(vp[0], &arrayv));
8034: PetscCall(PetscArraycpy(arraym, arrayv, nl));
8035: PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8036: PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8037: PetscCall(VecDestroy(&vp[0]));
8038: } else {
8039: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8040: }
8041: } else {
8042: PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8043: }
8044: if (coarse_mat_is || coarse_mat) {
8045: if (!multilevel_allowed) {
8046: PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8047: } else {
8048: /* if this matrix is present, it means we are not reusing the coarse matrix */
8049: if (coarse_mat_is) {
8050: PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8051: PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8052: coarse_mat = coarse_mat_is;
8053: }
8054: }
8055: }
8056: PetscCall(MatDestroy(&t_coarse_mat_is));
8057: PetscCall(MatDestroy(&coarse_mat_is));
8059: /* create local to global scatters for coarse problem */
8060: if (compute_vecs) {
8061: PetscInt lrows;
8062: PetscCall(VecDestroy(&pcbddc->coarse_vec));
8063: if (coarse_mat) {
8064: PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8065: } else {
8066: lrows = 0;
8067: }
8068: PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8069: PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8070: PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8071: PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8072: PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8073: }
8074: PetscCall(ISDestroy(&coarse_is));
8076: /* set defaults for coarse KSP and PC */
8077: if (multilevel_allowed) {
8078: coarse_ksp_type = KSPRICHARDSON;
8079: coarse_pc_type = PCBDDC;
8080: } else {
8081: coarse_ksp_type = KSPPREONLY;
8082: coarse_pc_type = PCREDUNDANT;
8083: }
8085: /* print some info if requested */
8086: if (pcbddc->dbg_flag) {
8087: if (!multilevel_allowed) {
8088: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8089: if (multilevel_requested) {
8090: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio));
8091: } else if (pcbddc->max_levels) {
8092: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8093: }
8094: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8095: }
8096: }
8098: /* communicate coarse discrete gradient */
8099: coarseG = NULL;
8100: if (pcbddc->nedcG && multilevel_allowed) {
8101: MPI_Comm ccomm;
8102: if (coarse_mat) {
8103: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8104: } else {
8105: ccomm = MPI_COMM_NULL;
8106: }
8107: PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8108: }
8110: /* create the coarse KSP object only once with defaults */
8111: if (coarse_mat) {
8112: PetscBool isredundant, isbddc, force, valid;
8113: PetscViewer dbg_viewer = NULL;
8114: PetscBool isset, issym, isher, isspd;
8116: if (pcbddc->dbg_flag) {
8117: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8118: PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8119: }
8120: if (!pcbddc->coarse_ksp) {
8121: char prefix[256], str_level[16];
8122: size_t len;
8124: PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8125: PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8126: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8127: PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1));
8128: PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8129: PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8130: PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8131: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8132: /* TODO is this logic correct? should check for coarse_mat type */
8133: PetscCall(PCSetType(pc_temp, coarse_pc_type));
8134: /* prefix */
8135: PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8136: PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8137: if (!pcbddc->current_level) {
8138: PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8139: PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8140: } else {
8141: PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8142: if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
8143: if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8144: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8145: PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8146: PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level)));
8147: PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8148: }
8149: PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8150: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8151: PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8152: PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8153: PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8154: /* allow user customization */
8155: PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8156: /* get some info after set from options */
8157: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8158: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8159: force = PETSC_FALSE;
8160: PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8161: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8162: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8163: if (multilevel_allowed && !force && !valid) {
8164: isbddc = PETSC_TRUE;
8165: PetscCall(PCSetType(pc_temp, PCBDDC));
8166: PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8167: PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8168: PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8169: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8170: PetscObjectOptionsBegin((PetscObject)pc_temp);
8171: PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8172: PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8173: PetscOptionsEnd();
8174: pc_temp->setfromoptionscalled++;
8175: }
8176: }
8177: }
8178: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8179: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8180: if (nisdofs) {
8181: PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8182: for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8183: }
8184: if (nisneu) {
8185: PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8186: PetscCall(ISDestroy(&isarray[nisdofs]));
8187: }
8188: if (nisvert) {
8189: PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8190: PetscCall(ISDestroy(&isarray[nis - 1]));
8191: }
8192: if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8194: /* get some info after set from options */
8195: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8197: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8198: if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8199: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8200: force = PETSC_FALSE;
8201: PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8202: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8203: if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8204: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8205: if (isredundant) {
8206: KSP inner_ksp;
8207: PC inner_pc;
8209: PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8210: PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8211: }
8213: /* parameters which miss an API */
8214: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8215: if (isbddc) {
8216: PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8218: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8219: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8220: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8221: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8222: if (pcbddc_coarse->benign_saddle_point) {
8223: Mat coarsedivudotp_is;
8224: ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8225: IS row, col;
8226: const PetscInt *gidxs;
8227: PetscInt n, st, M, N;
8229: PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8230: PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8231: st = st - n;
8232: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8233: PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8234: PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8235: PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8236: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8237: PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8238: PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8239: PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8240: PetscCall(ISGetSize(row, &M));
8241: PetscCall(MatGetSize(coarse_mat, &N, NULL));
8242: PetscCall(ISDestroy(&row));
8243: PetscCall(ISDestroy(&col));
8244: PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8245: PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8246: PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8247: PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8248: PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8249: PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8250: PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8251: PetscCall(MatDestroy(&coarsedivudotp));
8252: PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8253: PetscCall(MatDestroy(&coarsedivudotp_is));
8254: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8255: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8256: }
8257: }
8259: /* propagate symmetry info of coarse matrix */
8260: PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8261: PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8262: if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8263: PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8264: if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8265: PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8266: if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8268: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8269: /* set operators */
8270: PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8271: PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8272: PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8273: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8274: }
8275: PetscCall(MatDestroy(&coarseG));
8276: PetscCall(PetscFree(isarray));
8277: #if 0
8278: {
8279: PetscViewer viewer;
8280: char filename[256];
8281: PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8282: PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8283: PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8284: PetscCall(MatView(coarse_mat,viewer));
8285: PetscCall(PetscViewerPopFormat(viewer));
8286: PetscCall(PetscViewerDestroy(&viewer));
8287: }
8288: #endif
8290: if (corners) {
8291: Vec gv;
8292: IS is;
8293: const PetscInt *idxs;
8294: PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8295: PetscScalar *coords;
8297: PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8298: PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8299: PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8300: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8301: PetscCall(VecSetBlockSize(gv, cdim));
8302: PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8303: PetscCall(VecSetType(gv, VECSTANDARD));
8304: PetscCall(VecSetFromOptions(gv));
8305: PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8307: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8308: PetscCall(ISGetLocalSize(is, &n));
8309: PetscCall(ISGetIndices(is, &idxs));
8310: PetscCall(PetscMalloc1(n * cdim, &coords));
8311: for (i = 0; i < n; i++) {
8312: for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8313: }
8314: PetscCall(ISRestoreIndices(is, &idxs));
8315: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8317: PetscCall(ISGetLocalSize(corners, &n));
8318: PetscCall(ISGetIndices(corners, &idxs));
8319: PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8320: PetscCall(ISRestoreIndices(corners, &idxs));
8321: PetscCall(PetscFree(coords));
8322: PetscCall(VecAssemblyBegin(gv));
8323: PetscCall(VecAssemblyEnd(gv));
8324: PetscCall(VecGetArray(gv, &coords));
8325: if (pcbddc->coarse_ksp) {
8326: PC coarse_pc;
8327: PetscBool isbddc;
8329: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8330: PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8331: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8332: PetscReal *realcoords;
8334: PetscCall(VecGetLocalSize(gv, &n));
8335: #if defined(PETSC_USE_COMPLEX)
8336: PetscCall(PetscMalloc1(n, &realcoords));
8337: for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8338: #else
8339: realcoords = coords;
8340: #endif
8341: PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8342: #if defined(PETSC_USE_COMPLEX)
8343: PetscCall(PetscFree(realcoords));
8344: #endif
8345: }
8346: }
8347: PetscCall(VecRestoreArray(gv, &coords));
8348: PetscCall(VecDestroy(&gv));
8349: }
8350: PetscCall(ISDestroy(&corners));
8352: if (pcbddc->coarse_ksp) {
8353: Vec crhs, csol;
8355: PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
8356: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
8357: if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL));
8358: if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs)));
8359: }
8360: PetscCall(MatDestroy(&coarsedivudotp));
8362: /* compute null space for coarse solver if the benign trick has been requested */
8363: if (pcbddc->benign_null) {
8364: PetscCall(VecSet(pcbddc->vec1_P, 0.));
8365: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
8366: PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
8367: PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
8368: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
8369: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
8370: if (coarse_mat) {
8371: Vec nullv;
8372: PetscScalar *array, *array2;
8373: PetscInt nl;
8375: PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
8376: PetscCall(VecGetLocalSize(nullv, &nl));
8377: PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
8378: PetscCall(VecGetArray(nullv, &array2));
8379: PetscCall(PetscArraycpy(array2, array, nl));
8380: PetscCall(VecRestoreArray(nullv, &array2));
8381: PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
8382: PetscCall(VecNormalize(nullv, NULL));
8383: PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
8384: PetscCall(VecDestroy(&nullv));
8385: }
8386: }
8387: PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8389: PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
8390: if (pcbddc->coarse_ksp) {
8391: PetscBool ispreonly;
8393: if (CoarseNullSpace) {
8394: PetscBool isnull;
8396: PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
8397: if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
8398: /* TODO: add local nullspaces (if any) */
8399: }
8400: /* setup coarse ksp */
8401: PetscCall(KSPSetUp(pcbddc->coarse_ksp));
8402: /* Check coarse problem if in debug mode or if solving with an iterative method */
8403: PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
8404: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8405: KSP check_ksp;
8406: KSPType check_ksp_type;
8407: PC check_pc;
8408: Vec check_vec, coarse_vec;
8409: PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
8410: PetscInt its;
8411: PetscBool compute_eigs;
8412: PetscReal *eigs_r, *eigs_c;
8413: PetscInt neigs;
8414: const char *prefix;
8416: /* Create ksp object suitable for estimation of extreme eigenvalues */
8417: PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
8418: PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
8419: PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
8420: PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
8421: PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size));
8422: /* prevent from setup unneeded object */
8423: PetscCall(KSPGetPC(check_ksp, &check_pc));
8424: PetscCall(PCSetType(check_pc, PCNONE));
8425: if (ispreonly) {
8426: check_ksp_type = KSPPREONLY;
8427: compute_eigs = PETSC_FALSE;
8428: } else {
8429: check_ksp_type = KSPGMRES;
8430: compute_eigs = PETSC_TRUE;
8431: }
8432: PetscCall(KSPSetType(check_ksp, check_ksp_type));
8433: PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
8434: PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
8435: PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
8436: PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
8437: PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
8438: PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
8439: PetscCall(KSPSetFromOptions(check_ksp));
8440: PetscCall(KSPSetUp(check_ksp));
8441: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
8442: PetscCall(KSPSetPC(check_ksp, check_pc));
8443: /* create random vec */
8444: PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
8445: PetscCall(VecSetRandom(check_vec, NULL));
8446: PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
8447: /* solve coarse problem */
8448: PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
8449: PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
8450: /* set eigenvalue estimation if preonly has not been requested */
8451: if (compute_eigs) {
8452: PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
8453: PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
8454: PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
8455: if (neigs) {
8456: lambda_max = eigs_r[neigs - 1];
8457: lambda_min = eigs_r[0];
8458: if (pcbddc->use_coarse_estimates) {
8459: if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8460: PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
8461: PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
8462: }
8463: }
8464: }
8465: }
8467: /* check coarse problem residual error */
8468: if (pcbddc->dbg_flag) {
8469: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8470: PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
8471: PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
8472: PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
8473: PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
8474: PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
8475: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
8476: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer));
8477: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer));
8478: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error));
8479: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
8480: if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
8481: if (compute_eigs) {
8482: PetscReal lambda_max_s, lambda_min_s;
8483: KSPConvergedReason reason;
8484: PetscCall(KSPGetType(check_ksp, &check_ksp_type));
8485: PetscCall(KSPGetIterationNumber(check_ksp, &its));
8486: PetscCall(KSPGetConvergedReason(check_ksp, &reason));
8487: PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
8488: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
8489: for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
8490: }
8491: PetscCall(PetscViewerFlush(dbg_viewer));
8492: PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
8493: }
8494: PetscCall(VecDestroy(&check_vec));
8495: PetscCall(VecDestroy(&coarse_vec));
8496: PetscCall(KSPDestroy(&check_ksp));
8497: if (compute_eigs) {
8498: PetscCall(PetscFree(eigs_r));
8499: PetscCall(PetscFree(eigs_c));
8500: }
8501: }
8502: }
8503: PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
8504: /* print additional info */
8505: if (pcbddc->dbg_flag) {
8506: /* waits until all processes reaches this point */
8507: PetscCall(PetscBarrier((PetscObject)pc));
8508: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
8509: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8510: }
8512: /* free memory */
8513: PetscCall(MatDestroy(&coarse_mat));
8514: PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
8515: PetscFunctionReturn(PETSC_SUCCESS);
8516: }
8518: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
8519: {
8520: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8521: PC_IS *pcis = (PC_IS *)pc->data;
8522: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
8523: IS subset, subset_mult, subset_n;
8524: PetscInt local_size, coarse_size = 0;
8525: PetscInt *local_primal_indices = NULL;
8526: const PetscInt *t_local_primal_indices;
8528: PetscFunctionBegin;
8529: /* Compute global number of coarse dofs */
8530: PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
8531: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
8532: PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
8533: PetscCall(ISDestroy(&subset_n));
8534: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
8535: PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
8536: PetscCall(ISDestroy(&subset));
8537: PetscCall(ISDestroy(&subset_mult));
8538: PetscCall(ISGetLocalSize(subset_n, &local_size));
8539: PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
8540: PetscCall(PetscMalloc1(local_size, &local_primal_indices));
8541: PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
8542: PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
8543: PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
8544: PetscCall(ISDestroy(&subset_n));
8546: /* check numbering */
8547: if (pcbddc->dbg_flag) {
8548: PetscScalar coarsesum, *array, *array2;
8549: PetscInt i;
8550: PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE;
8552: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8553: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8554: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n"));
8555: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8556: /* counter */
8557: PetscCall(VecSet(pcis->vec1_global, 0.0));
8558: PetscCall(VecSet(pcis->vec1_N, 1.0));
8559: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8560: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8561: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD));
8562: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD));
8563: PetscCall(VecSet(pcis->vec1_N, 0.0));
8564: for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES));
8565: PetscCall(VecAssemblyBegin(pcis->vec1_N));
8566: PetscCall(VecAssemblyEnd(pcis->vec1_N));
8567: PetscCall(VecSet(pcis->vec1_global, 0.0));
8568: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8569: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8570: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
8571: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
8572: PetscCall(VecGetArray(pcis->vec1_N, &array));
8573: PetscCall(VecGetArray(pcis->vec2_N, &array2));
8574: for (i = 0; i < pcis->n; i++) {
8575: if (array[i] != 0.0 && array[i] != array2[i]) {
8576: PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi;
8577: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8578: set_error = PETSC_TRUE;
8579: PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi));
8580: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh));
8581: }
8582: }
8583: PetscCall(VecRestoreArray(pcis->vec2_N, &array2));
8584: PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8585: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8586: for (i = 0; i < pcis->n; i++) {
8587: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]);
8588: }
8589: PetscCall(VecRestoreArray(pcis->vec1_N, &array));
8590: PetscCall(VecSet(pcis->vec1_global, 0.0));
8591: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8592: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8593: PetscCall(VecSum(pcis->vec1_global, &coarsesum));
8594: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum)));
8595: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8596: PetscInt *gidxs;
8598: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs));
8599: PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs));
8600: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n"));
8601: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8602: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank));
8603: for (i = 0; i < pcbddc->local_primal_size; i++) {
8604: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i]));
8605: }
8606: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8607: PetscCall(PetscFree(gidxs));
8608: }
8609: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8610: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8611: PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed");
8612: }
8614: /* get back data */
8615: *coarse_size_n = coarse_size;
8616: *local_primal_indices_n = local_primal_indices;
8617: PetscFunctionReturn(PETSC_SUCCESS);
8618: }
8620: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
8621: {
8622: IS localis_t;
8623: PetscInt i, lsize, *idxs, n;
8624: PetscScalar *vals;
8626: PetscFunctionBegin;
8627: /* get indices in local ordering exploiting local to global map */
8628: PetscCall(ISGetLocalSize(globalis, &lsize));
8629: PetscCall(PetscMalloc1(lsize, &vals));
8630: for (i = 0; i < lsize; i++) vals[i] = 1.0;
8631: PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
8632: PetscCall(VecSet(gwork, 0.0));
8633: PetscCall(VecSet(lwork, 0.0));
8634: if (idxs) { /* multilevel guard */
8635: PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
8636: PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
8637: }
8638: PetscCall(VecAssemblyBegin(gwork));
8639: PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
8640: PetscCall(PetscFree(vals));
8641: PetscCall(VecAssemblyEnd(gwork));
8642: /* now compute set in local ordering */
8643: PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
8644: PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
8645: PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
8646: PetscCall(VecGetSize(lwork, &n));
8647: for (i = 0, lsize = 0; i < n; i++) {
8648: if (PetscRealPart(vals[i]) > 0.5) lsize++;
8649: }
8650: PetscCall(PetscMalloc1(lsize, &idxs));
8651: for (i = 0, lsize = 0; i < n; i++) {
8652: if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
8653: }
8654: PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
8655: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
8656: *localis = localis_t;
8657: PetscFunctionReturn(PETSC_SUCCESS);
8658: }
8660: PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
8661: {
8662: PC_IS *pcis = (PC_IS *)pc->data;
8663: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8664: PC_IS *pcisf;
8665: PC_BDDC *pcbddcf;
8666: PC pcf;
8668: PetscFunctionBegin;
8669: PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
8670: PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
8671: PetscCall(PCSetType(pcf, PCBDDC));
8673: pcisf = (PC_IS *)pcf->data;
8674: pcbddcf = (PC_BDDC *)pcf->data;
8676: pcisf->is_B_local = pcis->is_B_local;
8677: pcisf->vec1_N = pcis->vec1_N;
8678: pcisf->BtoNmap = pcis->BtoNmap;
8679: pcisf->n = pcis->n;
8680: pcisf->n_B = pcis->n_B;
8682: PetscCall(PetscFree(pcbddcf->mat_graph));
8683: PetscCall(PetscFree(pcbddcf->sub_schurs));
8684: pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph;
8685: pcbddcf->sub_schurs = schurs;
8686: pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE;
8687: pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
8688: pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
8689: pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin;
8690: pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax;
8691: pcbddcf->use_faces = PETSC_TRUE;
8692: pcbddcf->use_change_of_basis = (PetscBool)!constraints;
8693: pcbddcf->use_change_on_faces = (PetscBool)!constraints;
8694: pcbddcf->use_qr_single = (PetscBool)!constraints;
8695: pcbddcf->fake_change = PETSC_TRUE;
8696: pcbddcf->dbg_flag = pcbddc->dbg_flag;
8698: PetscCall(PCBDDCAdaptiveSelection(pcf));
8699: PetscCall(PCBDDCConstraintsSetUp(pcf));
8701: *change = pcbddcf->ConstraintMatrix;
8702: if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
8703: if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
8704: if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
8706: if (schurs) pcbddcf->sub_schurs = NULL;
8707: pcbddcf->ConstraintMatrix = NULL;
8708: pcbddcf->mat_graph = NULL;
8709: pcisf->is_B_local = NULL;
8710: pcisf->vec1_N = NULL;
8711: pcisf->BtoNmap = NULL;
8712: PetscCall(PCDestroy(&pcf));
8713: PetscFunctionReturn(PETSC_SUCCESS);
8714: }
8716: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8717: {
8718: PC_IS *pcis = (PC_IS *)pc->data;
8719: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8720: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
8721: Mat S_j;
8722: PetscInt *used_xadj, *used_adjncy;
8723: PetscBool free_used_adj;
8725: PetscFunctionBegin;
8726: PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
8727: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8728: free_used_adj = PETSC_FALSE;
8729: if (pcbddc->sub_schurs_layers == -1) {
8730: used_xadj = NULL;
8731: used_adjncy = NULL;
8732: } else {
8733: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8734: used_xadj = pcbddc->mat_graph->xadj;
8735: used_adjncy = pcbddc->mat_graph->adjncy;
8736: } else if (pcbddc->computed_rowadj) {
8737: used_xadj = pcbddc->mat_graph->xadj;
8738: used_adjncy = pcbddc->mat_graph->adjncy;
8739: } else {
8740: PetscBool flg_row = PETSC_FALSE;
8741: const PetscInt *xadj, *adjncy;
8742: PetscInt nvtxs;
8744: PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
8745: if (flg_row) {
8746: PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
8747: PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
8748: PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
8749: free_used_adj = PETSC_TRUE;
8750: } else {
8751: pcbddc->sub_schurs_layers = -1;
8752: used_xadj = NULL;
8753: used_adjncy = NULL;
8754: }
8755: PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
8756: }
8757: }
8759: /* setup sub_schurs data */
8760: PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
8761: if (!sub_schurs->schur_explicit) {
8762: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8763: PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
8764: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
8765: } else {
8766: Mat change = NULL;
8767: Vec scaling = NULL;
8768: IS change_primal = NULL, iP;
8769: PetscInt benign_n;
8770: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8771: PetscBool need_change = PETSC_FALSE;
8772: PetscBool discrete_harmonic = PETSC_FALSE;
8774: if (!pcbddc->use_vertices && reuse_solvers) {
8775: PetscInt n_vertices;
8777: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
8778: reuse_solvers = (PetscBool)!n_vertices;
8779: }
8780: if (!pcbddc->benign_change_explicit) {
8781: benign_n = pcbddc->benign_n;
8782: } else {
8783: benign_n = 0;
8784: }
8785: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8786: We need a global reduction to avoid possible deadlocks.
8787: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8788: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8789: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8790: PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8791: need_change = (PetscBool)(!need_change);
8792: }
8793: /* If the user defines additional constraints, we import them here */
8794: if (need_change) {
8795: PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
8796: PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
8797: }
8798: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
8800: PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
8801: if (iP) {
8802: PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
8803: PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
8804: PetscOptionsEnd();
8805: }
8806: if (discrete_harmonic) {
8807: Mat A;
8808: PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
8809: PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
8810: PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
8811: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
8812: pcbddc->benign_zerodiag_subs, change, change_primal));
8813: PetscCall(MatDestroy(&A));
8814: } else {
8815: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
8816: pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
8817: }
8818: PetscCall(MatDestroy(&change));
8819: PetscCall(ISDestroy(&change_primal));
8820: }
8821: PetscCall(MatDestroy(&S_j));
8823: /* free adjacency */
8824: if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
8825: PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
8826: PetscFunctionReturn(PETSC_SUCCESS);
8827: }
8829: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8830: {
8831: PC_IS *pcis = (PC_IS *)pc->data;
8832: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8833: PCBDDCGraph graph;
8835: PetscFunctionBegin;
8836: /* attach interface graph for determining subsets */
8837: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8838: IS verticesIS, verticescomm;
8839: PetscInt vsize, *idxs;
8841: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
8842: PetscCall(ISGetSize(verticesIS, &vsize));
8843: PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
8844: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
8845: PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
8846: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
8847: PetscCall(PCBDDCGraphCreate(&graph));
8848: PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
8849: PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
8850: PetscCall(ISDestroy(&verticescomm));
8851: PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
8852: } else {
8853: graph = pcbddc->mat_graph;
8854: }
8855: /* print some info */
8856: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8857: IS vertices;
8858: PetscInt nv, nedges, nfaces;
8859: PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
8860: PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
8861: PetscCall(ISGetSize(vertices, &nv));
8862: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8863: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
8864: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
8865: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
8866: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
8867: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8868: PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
8869: PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
8870: }
8872: /* sub_schurs init */
8873: if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
8874: PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
8876: /* free graph struct */
8877: if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
8878: PetscFunctionReturn(PETSC_SUCCESS);
8879: }
8881: PetscErrorCode PCBDDCCheckOperator(PC pc)
8882: {
8883: PC_IS *pcis = (PC_IS *)pc->data;
8884: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8886: PetscFunctionBegin;
8887: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8888: IS zerodiag = NULL;
8889: Mat S_j, B0_B = NULL;
8890: Vec dummy_vec = NULL, vec_check_B, vec_scale_P;
8891: PetscScalar *p0_check, *array, *array2;
8892: PetscReal norm;
8893: PetscInt i;
8895: /* B0 and B0_B */
8896: if (zerodiag) {
8897: IS dummy;
8899: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy));
8900: PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
8901: PetscCall(MatCreateVecs(B0_B, NULL, &dummy_vec));
8902: PetscCall(ISDestroy(&dummy));
8903: }
8904: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8905: PetscCall(VecDuplicate(pcbddc->vec1_P, &vec_scale_P));
8906: PetscCall(VecSet(pcbddc->vec1_P, 1.0));
8907: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8908: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8909: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE));
8910: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE));
8911: PetscCall(VecReciprocal(vec_scale_P));
8912: /* S_j */
8913: PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
8914: PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
8916: /* mimic vector in \widetilde{W}_\Gamma */
8917: PetscCall(VecSetRandom(pcis->vec1_N, NULL));
8918: /* continuous in primal space */
8919: PetscCall(VecSetRandom(pcbddc->coarse_vec, NULL));
8920: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8921: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8922: PetscCall(VecGetArray(pcbddc->vec1_P, &array));
8923: PetscCall(PetscCalloc1(pcbddc->benign_n, &p0_check));
8924: for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i];
8925: PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES));
8926: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
8927: PetscCall(VecAssemblyBegin(pcis->vec1_N));
8928: PetscCall(VecAssemblyEnd(pcis->vec1_N));
8929: PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD));
8930: PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD));
8931: PetscCall(VecDuplicate(pcis->vec2_B, &vec_check_B));
8932: PetscCall(VecCopy(pcis->vec2_B, vec_check_B));
8934: /* assemble rhs for coarse problem */
8935: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8936: /* local with Schur */
8937: PetscCall(MatMult(S_j, pcis->vec2_B, pcis->vec1_B));
8938: if (zerodiag) {
8939: PetscCall(VecGetArray(dummy_vec, &array));
8940: for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i];
8941: PetscCall(VecRestoreArray(dummy_vec, &array));
8942: PetscCall(MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B));
8943: }
8944: /* sum on primal nodes the local contributions */
8945: PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE));
8946: PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE));
8947: PetscCall(VecGetArray(pcis->vec1_N, &array));
8948: PetscCall(VecGetArray(pcbddc->vec1_P, &array2));
8949: for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8950: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array2));
8951: PetscCall(VecRestoreArray(pcis->vec1_N, &array));
8952: PetscCall(VecSet(pcbddc->coarse_vec, 0.));
8953: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8954: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8955: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8956: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8957: PetscCall(VecGetArray(pcbddc->vec1_P, &array));
8958: /* scale primal nodes (BDDC sums contibutions) */
8959: PetscCall(VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P));
8960: PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES));
8961: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
8962: PetscCall(VecAssemblyBegin(pcis->vec1_N));
8963: PetscCall(VecAssemblyEnd(pcis->vec1_N));
8964: PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
8965: PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
8966: /* global: \widetilde{B0}_B w_\Gamma */
8967: if (zerodiag) {
8968: PetscCall(MatMult(B0_B, pcis->vec2_B, dummy_vec));
8969: PetscCall(VecGetArray(dummy_vec, &array));
8970: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i];
8971: PetscCall(VecRestoreArray(dummy_vec, &array));
8972: }
8973: /* BDDC */
8974: PetscCall(VecSet(pcis->vec1_D, 0.));
8975: PetscCall(PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE));
8977: PetscCall(VecCopy(pcis->vec1_B, pcis->vec2_B));
8978: PetscCall(VecAXPY(pcis->vec1_B, -1.0, vec_check_B));
8979: PetscCall(VecNorm(pcis->vec1_B, NORM_INFINITY, &norm));
8980: PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm));
8981: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i])));
8982: PetscCall(PetscFree(p0_check));
8983: PetscCall(VecDestroy(&vec_scale_P));
8984: PetscCall(VecDestroy(&vec_check_B));
8985: PetscCall(VecDestroy(&dummy_vec));
8986: PetscCall(MatDestroy(&S_j));
8987: PetscCall(MatDestroy(&B0_B));
8988: }
8989: PetscFunctionReturn(PETSC_SUCCESS);
8990: }
8992: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8993: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8994: {
8995: Mat At;
8996: IS rows;
8997: PetscInt rst, ren;
8998: PetscLayout rmap;
9000: PetscFunctionBegin;
9001: rst = ren = 0;
9002: if (ccomm != MPI_COMM_NULL) {
9003: PetscCall(PetscLayoutCreate(ccomm, &rmap));
9004: PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9005: PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9006: PetscCall(PetscLayoutSetUp(rmap));
9007: PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9008: }
9009: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9010: PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9011: PetscCall(ISDestroy(&rows));
9013: if (ccomm != MPI_COMM_NULL) {
9014: Mat_MPIAIJ *a, *b;
9015: IS from, to;
9016: Vec gvec;
9017: PetscInt lsize;
9019: PetscCall(MatCreate(ccomm, B));
9020: PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9021: PetscCall(MatSetType(*B, MATAIJ));
9022: PetscCall(PetscLayoutDestroy(&((*B)->rmap)));
9023: PetscCall(PetscLayoutSetUp((*B)->cmap));
9024: a = (Mat_MPIAIJ *)At->data;
9025: b = (Mat_MPIAIJ *)(*B)->data;
9026: PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9027: PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9028: PetscCall(PetscObjectReference((PetscObject)a->A));
9029: PetscCall(PetscObjectReference((PetscObject)a->B));
9030: b->A = a->A;
9031: b->B = a->B;
9033: b->donotstash = a->donotstash;
9034: b->roworiented = a->roworiented;
9035: b->rowindices = NULL;
9036: b->rowvalues = NULL;
9037: b->getrowactive = PETSC_FALSE;
9039: (*B)->rmap = rmap;
9040: (*B)->factortype = A->factortype;
9041: (*B)->assembled = PETSC_TRUE;
9042: (*B)->insertmode = NOT_SET_VALUES;
9043: (*B)->preallocated = PETSC_TRUE;
9045: if (a->colmap) {
9046: #if defined(PETSC_USE_CTABLE)
9047: PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9048: #else
9049: PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9050: PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9051: #endif
9052: } else b->colmap = NULL;
9053: if (a->garray) {
9054: PetscInt len;
9055: len = a->B->cmap->n;
9056: PetscCall(PetscMalloc1(len + 1, &b->garray));
9057: if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9058: } else b->garray = NULL;
9060: PetscCall(PetscObjectReference((PetscObject)a->lvec));
9061: b->lvec = a->lvec;
9063: /* cannot use VecScatterCopy */
9064: PetscCall(VecGetLocalSize(b->lvec, &lsize));
9065: PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9066: PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9067: PetscCall(MatCreateVecs(*B, &gvec, NULL));
9068: PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9069: PetscCall(ISDestroy(&from));
9070: PetscCall(ISDestroy(&to));
9071: PetscCall(VecDestroy(&gvec));
9072: }
9073: PetscCall(MatDestroy(&At));
9074: PetscFunctionReturn(PETSC_SUCCESS);
9075: }