Actual source code: nn.c
2: #include <../src/ksp/pc/impls/is/nn/nn.h>
4: /* -------------------------------------------------------------------------- */
5: /*
6: PCSetUp_NN - Prepares for the use of the NN preconditioner
7: by setting data structures and options.
9: Input Parameter:
10: . pc - the preconditioner context
12: Application Interface Routine: PCSetUp()
14: Note:
15: The interface routine PCSetUp() is not usually called directly by
16: the user, but instead is called by PCApply() if necessary.
17: */
18: static PetscErrorCode PCSetUp_NN(PC pc)
19: {
20: PetscFunctionBegin;
21: if (!pc->setupcalled) {
22: /* Set up all the "iterative substructuring" common block */
23: PetscCall(PCISSetUp(pc, PETSC_TRUE, PETSC_TRUE));
24: /* Create the coarse matrix. */
25: PetscCall(PCNNCreateCoarseMatrix(pc));
26: }
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: /* -------------------------------------------------------------------------- */
31: /*
32: PCApply_NN - Applies the NN preconditioner to a vector.
34: Input Parameters:
35: + pc - the preconditioner context
36: - r - input vector (global)
38: Output Parameter:
39: . z - output vector (global)
41: Application Interface Routine: PCApply()
42: */
43: static PetscErrorCode PCApply_NN(PC pc, Vec r, Vec z)
44: {
45: PC_IS *pcis = (PC_IS *)(pc->data);
46: PetscScalar m_one = -1.0;
47: Vec w = pcis->vec1_global;
49: PetscFunctionBegin;
50: /*
51: Dirichlet solvers.
52: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
53: Storing the local results at vec2_D
54: */
55: PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
56: PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
57: PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
59: /*
60: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
61: Storing the result in the interface portion of the global vector w.
62: */
63: PetscCall(MatMult(pcis->A_BI, pcis->vec2_D, pcis->vec1_B));
64: PetscCall(VecScale(pcis->vec1_B, m_one));
65: PetscCall(VecCopy(r, w));
66: PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
67: PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
69: /*
70: Apply the interface preconditioner
71: */
72: PetscCall(PCNNApplyInterfacePreconditioner(pc, w, z, pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec3_B, pcis->vec1_D, pcis->vec3_D, pcis->vec1_N, pcis->vec2_N));
74: /*
75: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
76: The result is stored in vec1_D.
77: */
78: PetscCall(VecScatterBegin(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
79: PetscCall(VecScatterEnd(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
80: PetscCall(MatMult(pcis->A_IB, pcis->vec1_B, pcis->vec1_D));
82: /*
83: Dirichlet solvers.
84: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
85: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
86: */
87: PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
88: PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
89: PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
90: PetscCall(VecScale(pcis->vec2_D, m_one));
91: PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
92: PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: /* -------------------------------------------------------------------------- */
97: /*
98: PCDestroy_NN - Destroys the private context for the NN preconditioner
99: that was created with PCCreate_NN().
101: Input Parameter:
102: . pc - the preconditioner context
104: Application Interface Routine: PCDestroy()
105: */
106: static PetscErrorCode PCDestroy_NN(PC pc)
107: {
108: PC_NN *pcnn = (PC_NN *)pc->data;
110: PetscFunctionBegin;
111: PetscCall(PCISDestroy(pc));
113: PetscCall(MatDestroy(&pcnn->coarse_mat));
114: PetscCall(VecDestroy(&pcnn->coarse_x));
115: PetscCall(VecDestroy(&pcnn->coarse_b));
116: PetscCall(KSPDestroy(&pcnn->ksp_coarse));
117: if (pcnn->DZ_IN) {
118: PetscCall(PetscFree(pcnn->DZ_IN[0]));
119: PetscCall(PetscFree(pcnn->DZ_IN));
120: }
122: /*
123: Free the private data structure that was hanging off the PC
124: */
125: PetscCall(PetscFree(pc->data));
126: PetscFunctionReturn(PETSC_SUCCESS);
127: }
129: /*MC
130: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
132: Options Database Keys:
133: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
134: (this skips the first coarse grid solve in the preconditioner)
135: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
136: (this skips the second coarse grid solve in the preconditioner)
137: . -pc_is_damp_fixed <fact> -
138: . -pc_is_remove_nullspace_fixed -
139: . -pc_is_set_damping_factor_floating <fact> -
140: . -pc_is_not_damp_floating -
141: - -pc_is_not_remove_nullspace_floating -
143: Options Database prefixes for the subsolvers this preconditioner uses:
144: + -nn_coarse_pc_ - for the coarse grid preconditioner
145: . -is_localD_pc_ - for the Dirichlet subproblem preconditioner
146: - -is_localN_pc_ - for the Neumann subproblem preconditioner
148: Level: intermediate
150: Notes:
151: The matrix used with this preconditioner must be of type `MATIS`
153: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
154: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
155: on the subdomains; though in our experience using approximate solvers is slower.).
157: Contributed by Paulo Goldfeld
159: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `MATIS`, `PCBDDC`
160: M*/
162: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
163: {
164: PC_NN *pcnn;
166: PetscFunctionBegin;
167: /*
168: Creates the private data structure for this preconditioner and
169: attach it to the PC object.
170: */
171: PetscCall(PetscNew(&pcnn));
172: pc->data = (void *)pcnn;
174: PetscCall(PCISCreate(pc));
175: pcnn->coarse_mat = NULL;
176: pcnn->coarse_x = NULL;
177: pcnn->coarse_b = NULL;
178: pcnn->ksp_coarse = NULL;
179: pcnn->DZ_IN = NULL;
181: /*
182: Set the pointers for the functions that are provided above.
183: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
184: are called, they will automatically call these functions. Note we
185: choose not to provide a couple of these functions since they are
186: not needed.
187: */
188: pc->ops->apply = PCApply_NN;
189: pc->ops->applytranspose = NULL;
190: pc->ops->setup = PCSetUp_NN;
191: pc->ops->destroy = PCDestroy_NN;
192: pc->ops->view = NULL;
193: pc->ops->applyrichardson = NULL;
194: pc->ops->applysymmetricleft = NULL;
195: pc->ops->applysymmetricright = NULL;
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: /*
200: PCNNCreateCoarseMatrix -
201: */
202: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
203: {
204: MPI_Request *send_request, *recv_request;
205: PetscInt i, j, k;
206: PetscScalar *mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
207: PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
209: /* aliasing some names */
210: PC_IS *pcis = (PC_IS *)(pc->data);
211: PC_NN *pcnn = (PC_NN *)pc->data;
212: PetscInt n_neigh = pcis->n_neigh;
213: PetscInt *neigh = pcis->neigh;
214: PetscInt *n_shared = pcis->n_shared;
215: PetscInt **shared = pcis->shared;
216: PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */
218: PetscFunctionBegin;
219: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
220: PetscCall(PetscMalloc1(n_neigh * n_neigh + 1, &mat));
222: /* Allocate memory for DZ */
223: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
224: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
225: {
226: PetscInt size_of_Z = 0;
227: PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &pcnn->DZ_IN));
228: DZ_IN = pcnn->DZ_IN;
229: PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &DZ_OUT));
230: for (i = 0; i < n_neigh; i++) size_of_Z += n_shared[i];
231: PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_IN[0]));
232: PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_OUT[0]));
233: }
234: for (i = 1; i < n_neigh; i++) {
235: DZ_IN[i] = DZ_IN[i - 1] + n_shared[i - 1];
236: DZ_OUT[i] = DZ_OUT[i - 1] + n_shared[i - 1];
237: }
239: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
240: /* First, set the auxiliary array pcis->work_N. */
241: PetscCall(PCISScatterArrayNToVecB(pcis->work_N, pcis->D, INSERT_VALUES, SCATTER_REVERSE, pc));
242: for (i = 1; i < n_neigh; i++) {
243: for (j = 0; j < n_shared[i]; j++) DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
244: }
246: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
247: /* Notice that send_request[] and recv_request[] could have one less element. */
248: /* We make them longer to have request[i] corresponding to neigh[i]. */
249: {
250: PetscMPIInt tag;
251: PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
252: PetscCall(PetscMalloc2(n_neigh + 1, &send_request, n_neigh + 1, &recv_request));
253: for (i = 1; i < n_neigh; i++) {
254: PetscCallMPI(MPI_Isend((void *)(DZ_OUT[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(send_request[i])));
255: PetscCallMPI(MPI_Irecv((void *)(DZ_IN[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(recv_request[i])));
256: }
257: }
259: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
260: for (j = 0; j < n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
262: /* Start computing with local D*Z while communication goes on. */
263: /* Apply Schur complement. The result is "stored" in vec (more */
264: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
265: /* and also scattered to pcnn->work_N. */
266: PetscCall(PCNNApplySchurToChunk(pc, n_shared[0], shared[0], DZ_IN[0], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
268: /* Compute the first column, while completing the receiving. */
269: for (i = 0; i < n_neigh; i++) {
270: MPI_Status stat;
271: PetscMPIInt ind = 0;
272: if (i > 0) {
273: PetscCallMPI(MPI_Waitany(n_neigh - 1, recv_request + 1, &ind, &stat));
274: ind++;
275: }
276: mat[ind * n_neigh + 0] = 0.0;
277: for (k = 0; k < n_shared[ind]; k++) mat[ind * n_neigh + 0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
278: }
280: /* Compute the remaining of the columns */
281: for (j = 1; j < n_neigh; j++) {
282: PetscCall(PCNNApplySchurToChunk(pc, n_shared[j], shared[j], DZ_IN[j], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
283: for (i = 0; i < n_neigh; i++) {
284: mat[i * n_neigh + j] = 0.0;
285: for (k = 0; k < n_shared[i]; k++) mat[i * n_neigh + j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
286: }
287: }
289: /* Complete the sending. */
290: if (n_neigh > 1) {
291: MPI_Status *stat;
292: PetscCall(PetscMalloc1(n_neigh - 1, &stat));
293: if (n_neigh - 1) PetscCallMPI(MPI_Waitall(n_neigh - 1, &(send_request[1]), stat));
294: PetscCall(PetscFree(stat));
295: }
297: /* Free the memory for the MPI requests */
298: PetscCall(PetscFree2(send_request, recv_request));
300: /* Free the memory for DZ_OUT */
301: if (DZ_OUT) {
302: PetscCall(PetscFree(DZ_OUT[0]));
303: PetscCall(PetscFree(DZ_OUT));
304: }
306: {
307: PetscMPIInt size;
308: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
309: /* Create the global coarse vectors (rhs and solution). */
310: PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), 1, size, &(pcnn->coarse_b)));
311: PetscCall(VecDuplicate(pcnn->coarse_b, &(pcnn->coarse_x)));
312: /* Create and set the global coarse AIJ matrix. */
313: PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &(pcnn->coarse_mat)));
314: PetscCall(MatSetSizes(pcnn->coarse_mat, 1, 1, size, size));
315: PetscCall(MatSetType(pcnn->coarse_mat, MATAIJ));
316: PetscCall(MatSeqAIJSetPreallocation(pcnn->coarse_mat, 1, NULL));
317: PetscCall(MatMPIAIJSetPreallocation(pcnn->coarse_mat, 1, NULL, n_neigh, NULL));
318: PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
319: PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
320: PetscCall(MatSetValues(pcnn->coarse_mat, n_neigh, neigh, n_neigh, neigh, mat, ADD_VALUES));
321: PetscCall(MatAssemblyBegin(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
322: PetscCall(MatAssemblyEnd(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
323: }
325: {
326: PetscMPIInt rank;
327: PetscScalar one = 1.0;
328: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
329: /* "Zero out" rows of not-purely-Neumann subdomains */
330: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
331: PetscCall(MatZeroRows(pcnn->coarse_mat, 0, NULL, one, NULL, NULL));
332: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
333: PetscInt row = (PetscInt)rank;
334: PetscCall(MatZeroRows(pcnn->coarse_mat, 1, &row, one, NULL, NULL));
335: }
336: }
338: /* Create the coarse linear solver context */
339: {
340: PC pc_ctx, inner_pc;
341: KSP inner_ksp;
343: PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &pcnn->ksp_coarse));
344: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse, (PetscObject)pc, 2));
345: PetscCall(KSPSetOperators(pcnn->ksp_coarse, pcnn->coarse_mat, pcnn->coarse_mat));
346: PetscCall(KSPGetPC(pcnn->ksp_coarse, &pc_ctx));
347: PetscCall(PCSetType(pc_ctx, PCREDUNDANT));
348: PetscCall(KSPSetType(pcnn->ksp_coarse, KSPPREONLY));
349: PetscCall(PCRedundantGetKSP(pc_ctx, &inner_ksp));
350: PetscCall(KSPGetPC(inner_ksp, &inner_pc));
351: PetscCall(PCSetType(inner_pc, PCLU));
352: PetscCall(KSPSetOptionsPrefix(pcnn->ksp_coarse, "nn_coarse_"));
353: PetscCall(KSPSetFromOptions(pcnn->ksp_coarse));
354: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
355: PetscCall(KSPSetUp(pcnn->ksp_coarse));
356: }
358: /* Free the memory for mat */
359: PetscCall(PetscFree(mat));
361: /* for DEBUGGING, save the coarse matrix to a file. */
362: {
363: PetscBool flg = PETSC_FALSE;
364: PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_save_coarse_matrix", &flg, NULL));
365: if (flg) {
366: PetscViewer viewer;
367: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_WORLD, "coarse.m", &viewer));
368: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
369: PetscCall(MatView(pcnn->coarse_mat, viewer));
370: PetscCall(PetscViewerPopFormat(viewer));
371: PetscCall(PetscViewerDestroy(&viewer));
372: }
373: }
375: /* Set the variable pcnn->factor_coarse_rhs. */
376: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
378: /* See historical note 02, at the bottom of this file. */
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: /*
383: PCNNApplySchurToChunk -
385: Input parameters:
386: . pcnn
387: . n - size of chunk
388: . idx - indices of chunk
389: . chunk - values
391: Output parameters:
392: . array_N - result of Schur complement applied to chunk, scattered to big array
393: . vec1_B - result of Schur complement applied to chunk
394: . vec2_B - garbage (used as work space)
395: . vec1_D - garbage (used as work space)
396: . vec2_D - garbage (used as work space)
398: */
399: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
400: {
401: PetscInt i;
402: PC_IS *pcis = (PC_IS *)(pc->data);
404: PetscFunctionBegin;
405: PetscCall(PetscArrayzero(array_N, pcis->n));
406: for (i = 0; i < n; i++) array_N[idx[i]] = chunk[i];
407: PetscCall(PCISScatterArrayNToVecB(array_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc));
408: PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
409: PetscCall(PCISScatterArrayNToVecB(array_N, vec1_B, INSERT_VALUES, SCATTER_REVERSE, pc));
410: PetscFunctionReturn(PETSC_SUCCESS);
411: }
413: /*
414: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
415: the preconditioner for the Schur complement.
417: Input parameter:
418: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
420: Output parameters:
421: . z - global vector of interior and interface nodes. The values on the interface are the result of
422: the application of the interface preconditioner to the interface part of r. The values on the
423: interior nodes are garbage.
424: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
425: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
426: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
427: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
428: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
429: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
430: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
431: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
433: */
434: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, Vec vec1_N, Vec vec2_N)
435: {
436: PC_IS *pcis = (PC_IS *)(pc->data);
438: PetscFunctionBegin;
439: /*
440: First balancing step.
441: */
442: {
443: PetscBool flg = PETSC_FALSE;
444: PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_turn_off_first_balancing", &flg, NULL));
445: if (!flg) {
446: PetscCall(PCNNBalancing(pc, r, (Vec)0, z, vec1_B, vec2_B, (Vec)0, vec1_D, vec2_D, work_N));
447: } else {
448: PetscCall(VecCopy(r, z));
449: }
450: }
452: /*
453: Extract the local interface part of z and scale it by D
454: */
455: PetscCall(VecScatterBegin(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
456: PetscCall(VecScatterEnd(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
457: PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
459: /* Neumann Solver */
460: PetscCall(PCISApplyInvSchur(pc, vec2_B, vec1_B, vec1_N, vec2_N));
462: /*
463: Second balancing step.
464: */
465: {
466: PetscBool flg = PETSC_FALSE;
467: PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_turn_off_second_balancing", &flg, NULL));
468: if (!flg) {
469: PetscCall(PCNNBalancing(pc, r, vec1_B, z, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D, work_N));
470: } else {
471: PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
472: PetscCall(VecSet(z, 0.0));
473: PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
474: PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
475: }
476: }
477: PetscFunctionReturn(PETSC_SUCCESS);
478: }
480: /*
481: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
482: input argument u is provided), or s, as given in equations
483: (12) and (13), if the input argument u is a null vector.
484: Notice that the input argument u plays the role of u_i in
485: equation (14). The equation numbers refer to [Man93].
487: Input Parameters:
488: + pcnn - NN preconditioner context.
489: . r - MPI vector of all nodes (interior and interface). It's preserved.
490: - u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
492: Output Parameters:
493: + z - MPI vector of interior and interface nodes. Returns s or z (see description above).
494: . vec1_B - Sequential vector of local interface nodes. Workspace.
495: . vec2_B - Sequential vector of local interface nodes. Workspace.
496: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
497: . vec1_D - Sequential vector of local interior nodes. Workspace.
498: . vec2_D - Sequential vector of local interior nodes. Workspace.
499: - work_N - Array of all local nodes (interior and interface). Workspace.
501: */
502: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
503: {
504: PetscInt k;
505: PetscScalar value;
506: PetscScalar *lambda;
507: PC_NN *pcnn = (PC_NN *)(pc->data);
508: PC_IS *pcis = (PC_IS *)(pc->data);
510: PetscFunctionBegin;
511: PetscCall(PetscLogEventBegin(PC_ApplyCoarse, pc, 0, 0, 0));
512: if (u) {
513: if (!vec3_B) vec3_B = u;
514: PetscCall(VecPointwiseMult(vec1_B, pcis->D, u));
515: PetscCall(VecSet(z, 0.0));
516: PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
517: PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
518: PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
519: PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
520: PetscCall(PCISApplySchur(pc, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D));
521: PetscCall(VecScale(vec3_B, -1.0));
522: PetscCall(VecCopy(r, z));
523: PetscCall(VecScatterBegin(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
524: PetscCall(VecScatterEnd(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
525: } else {
526: PetscCall(VecCopy(r, z));
527: }
528: PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
529: PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
530: PetscCall(PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_REVERSE, pc));
531: for (k = 0, value = 0.0; k < pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
532: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
533: {
534: PetscMPIInt rank;
535: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
536: PetscCall(VecSetValue(pcnn->coarse_b, rank, value, INSERT_VALUES));
537: /*
538: Since we are only inserting local values (one value actually) we don't need to do the
539: reduction that tells us there is no data that needs to be moved. Hence we comment out these
540: PetscCall(VecAssemblyBegin(pcnn->coarse_b));
541: PetscCall(VecAssemblyEnd (pcnn->coarse_b));
542: */
543: }
544: PetscCall(KSPSolve(pcnn->ksp_coarse, pcnn->coarse_b, pcnn->coarse_x));
545: if (!u) PetscCall(VecScale(pcnn->coarse_x, -1.0));
546: PetscCall(VecGetArray(pcnn->coarse_x, &lambda));
547: for (k = 0; k < pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
548: PetscCall(VecRestoreArray(pcnn->coarse_x, &lambda));
549: PetscCall(PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc));
550: PetscCall(VecSet(z, 0.0));
551: PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
552: PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
553: if (!u) {
554: PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
555: PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
556: PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
557: PetscCall(VecCopy(r, z));
558: }
559: PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
560: PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
561: PetscCall(PetscLogEventEnd(PC_ApplyCoarse, pc, 0, 0, 0));
562: PetscFunctionReturn(PETSC_SUCCESS);
563: }
565: /* */
566: /* From now on, "footnotes" (or "historical notes"). */
567: /* */
568: /*
569: Historical note 01
571: We considered the possibility of an alternative D_i that would still
572: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
573: The basic principle was still the pseudo-inverse of the counting
574: function; the difference was that we would not count subdomains
575: that do not contribute to the coarse space (i.e., not pure-Neumann
576: subdomains).
578: This turned out to be a bad idea: we would solve trivial Neumann
579: problems in the not pure-Neumann subdomains, since we would be scaling
580: the balanced residual by zero.
581: */
583: /*
584: Historical note 02
586: We tried an alternative coarse problem, that would eliminate exactly a
587: constant error. Turned out not to improve the overall convergence.
588: */