Actual source code: ex51.c
2: static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\
3: uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\
4: matrix assembly, the matrix is intentionally laid out across processors\n\
5: differently from the way it is assembled. Input arguments are:\n\
6: -m <size> -p <order> : mesh size and polynomial order\n\n";
8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
9: based on src/ksp/ksp/tutorials/ex3.c
10: */
12: #include <petscksp.h>
14: /* Declare user-defined routines */
15: static PetscReal src(PetscReal, PetscReal);
16: static PetscReal ubdy(PetscReal, PetscReal);
17: static PetscReal polyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal);
18: static PetscReal derivPolyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal);
19: static PetscErrorCode Form1DElementMass(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *);
20: static PetscErrorCode Form1DElementStiffness(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *);
21: static PetscErrorCode Form2DElementMass(PetscInt, PetscScalar *, PetscScalar *);
22: static PetscErrorCode Form2DElementStiffness(PetscInt, PetscScalar *, PetscScalar *, PetscScalar *);
23: static PetscErrorCode FormNodalRhs(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *);
24: static PetscErrorCode FormNodalSoln(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *);
25: static void leggaulob(PetscReal, PetscReal, PetscReal[], PetscReal[], int);
26: static void qAndLEvaluation(int, PetscReal, PetscReal *, PetscReal *, PetscReal *);
28: int main(int argc, char **args)
29: {
30: PetscInt p = 2, m = 5;
31: PetscInt num1Dnodes, num2Dnodes;
32: PetscScalar *Ke1D, *Ke2D, *Me1D, *Me2D;
33: PetscScalar *r, *ue, val;
34: Vec u, ustar, b, q;
35: Mat A, Mass;
36: KSP ksp;
37: PetscInt M, N;
38: PetscMPIInt rank, size;
39: PetscReal x, y, h, norm;
40: PetscInt *idx, indx, count, *rows, i, j, k, start, end, its;
41: PetscReal *rowsx, *rowsy;
42: PetscReal *gllNode, *gllWgts;
44: PetscFunctionBeginUser;
45: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
46: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Options for p-FEM", "");
47: PetscCall(PetscOptionsInt("-m", "Number of elements in each direction", "None", m, &m, NULL));
48: PetscCall(PetscOptionsInt("-p", "Order of each element (tensor product basis)", "None", p, &p, NULL));
49: PetscOptionsEnd();
50: PetscCheck(p > 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Option -p value should be greater than zero");
51: N = (p * m + 1) * (p * m + 1); /* dimension of matrix */
52: M = m * m; /* number of elements */
53: h = 1.0 / m; /* mesh width */
54: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
55: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
57: /* Create stiffness matrix */
58: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
59: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N));
60: PetscCall(MatSetFromOptions(A));
61: PetscCall(MatSetUp(A));
63: /* Create matrix */
64: PetscCall(MatCreate(PETSC_COMM_WORLD, &Mass));
65: PetscCall(MatSetSizes(Mass, PETSC_DECIDE, PETSC_DECIDE, N, N));
66: PetscCall(MatSetFromOptions(Mass));
67: PetscCall(MatSetUp(Mass));
68: start = rank * (M / size) + ((M % size) < rank ? (M % size) : rank);
69: end = start + M / size + ((M % size) > rank);
71: /* Allocate element stiffness matrices */
72: num1Dnodes = (p + 1);
73: num2Dnodes = num1Dnodes * num1Dnodes;
75: PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Me1D));
76: PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Ke1D));
77: PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Me2D));
78: PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Ke2D));
79: PetscCall(PetscMalloc1(num2Dnodes, &idx));
80: PetscCall(PetscMalloc1(num2Dnodes, &r));
81: PetscCall(PetscMalloc1(num2Dnodes, &ue));
83: /* Allocate quadrature and create stiffness matrices */
84: PetscCall(PetscMalloc1(p + 1, &gllNode));
85: PetscCall(PetscMalloc1(p + 1, &gllWgts));
86: leggaulob(0.0, 1.0, gllNode, gllWgts, p); /* Get GLL nodes and weights */
87: PetscCall(Form1DElementMass(h, p, gllNode, gllWgts, Me1D));
88: PetscCall(Form1DElementStiffness(h, p, gllNode, gllWgts, Ke1D));
89: PetscCall(Form2DElementMass(p, Me1D, Me2D));
90: PetscCall(Form2DElementStiffness(p, Ke1D, Me1D, Ke2D));
92: /* Assemble matrix */
93: for (i = start; i < end; i++) {
94: indx = 0;
95: for (k = 0; k < (p + 1); ++k) {
96: for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j;
97: }
98: PetscCall(MatSetValues(A, num2Dnodes, idx, num2Dnodes, idx, Ke2D, ADD_VALUES));
99: PetscCall(MatSetValues(Mass, num2Dnodes, idx, num2Dnodes, idx, Me2D, ADD_VALUES));
100: }
101: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
102: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
103: PetscCall(MatAssemblyBegin(Mass, MAT_FINAL_ASSEMBLY));
104: PetscCall(MatAssemblyEnd(Mass, MAT_FINAL_ASSEMBLY));
106: PetscCall(PetscFree(Me1D));
107: PetscCall(PetscFree(Ke1D));
108: PetscCall(PetscFree(Me2D));
109: PetscCall(PetscFree(Ke2D));
111: /* Create right-hand-side and solution vectors */
112: PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
113: PetscCall(VecSetSizes(u, PETSC_DECIDE, N));
114: PetscCall(VecSetFromOptions(u));
115: PetscCall(PetscObjectSetName((PetscObject)u, "Approx. Solution"));
116: PetscCall(VecDuplicate(u, &b));
117: PetscCall(PetscObjectSetName((PetscObject)b, "Right hand side"));
118: PetscCall(VecDuplicate(u, &q));
119: PetscCall(PetscObjectSetName((PetscObject)q, "Right hand side 2"));
120: PetscCall(VecDuplicate(b, &ustar));
121: PetscCall(VecSet(u, 0.0));
122: PetscCall(VecSet(b, 0.0));
123: PetscCall(VecSet(q, 0.0));
125: /* Assemble nodal right-hand-side and soln vector */
126: for (i = start; i < end; i++) {
127: x = h * (i % m);
128: y = h * (i / m);
129: indx = 0;
130: for (k = 0; k < (p + 1); ++k) {
131: for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j;
132: }
133: PetscCall(FormNodalRhs(p, x, y, h, gllNode, r));
134: PetscCall(FormNodalSoln(p, x, y, h, gllNode, ue));
135: PetscCall(VecSetValues(q, num2Dnodes, idx, r, INSERT_VALUES));
136: PetscCall(VecSetValues(ustar, num2Dnodes, idx, ue, INSERT_VALUES));
137: }
138: PetscCall(VecAssemblyBegin(q));
139: PetscCall(VecAssemblyEnd(q));
140: PetscCall(VecAssemblyBegin(ustar));
141: PetscCall(VecAssemblyEnd(ustar));
143: PetscCall(PetscFree(idx));
144: PetscCall(PetscFree(r));
145: PetscCall(PetscFree(ue));
147: /* Get FE right-hand side vector */
148: PetscCall(MatMult(Mass, q, b));
150: /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
151: PetscCall(PetscMalloc1(4 * p * m, &rows));
152: PetscCall(PetscMalloc1(4 * p * m, &rowsx));
153: PetscCall(PetscMalloc1(4 * p * m, &rowsy));
154: for (i = 0; i < p * m + 1; i++) {
155: rows[i] = i; /* bottom */
156: rowsx[i] = (i / p) * h + gllNode[i % p] * h;
157: rowsy[i] = 0.0;
158: rows[3 * p * m - 1 + i] = (p * m) * (p * m + 1) + i; /* top */
159: rowsx[3 * p * m - 1 + i] = (i / p) * h + gllNode[i % p] * h;
160: rowsy[3 * p * m - 1 + i] = 1.0;
161: }
162: count = p * m + 1; /* left side */
163: indx = 1;
164: for (i = p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) {
165: rows[count] = i;
166: rowsx[count] = 0.0;
167: rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h;
168: indx++;
169: }
170: count = 2 * p * m; /* right side */
171: indx = 1;
172: for (i = 2 * p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) {
173: rows[count] = i;
174: rowsx[count] = 1.0;
175: rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h;
176: indx++;
177: }
178: for (i = 0; i < 4 * p * m; i++) {
179: x = rowsx[i];
180: y = rowsy[i];
181: val = ubdy(x, y);
182: PetscCall(VecSetValues(b, 1, &rows[i], &val, INSERT_VALUES));
183: PetscCall(VecSetValues(u, 1, &rows[i], &val, INSERT_VALUES));
184: }
185: PetscCall(MatZeroRows(A, 4 * p * m, rows, 1.0, 0, 0));
186: PetscCall(PetscFree(rows));
187: PetscCall(PetscFree(rowsx));
188: PetscCall(PetscFree(rowsy));
190: PetscCall(VecAssemblyBegin(u));
191: PetscCall(VecAssemblyEnd(u));
192: PetscCall(VecAssemblyBegin(b));
193: PetscCall(VecAssemblyEnd(b));
195: /* Solve linear system */
196: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
197: PetscCall(KSPSetOperators(ksp, A, A));
198: PetscCall(KSPSetInitialGuessNonzero(ksp, PETSC_TRUE));
199: PetscCall(KSPSetFromOptions(ksp));
200: PetscCall(KSPSolve(ksp, b, u));
202: /* Check error */
203: PetscCall(VecAXPY(u, -1.0, ustar));
204: PetscCall(VecNorm(u, NORM_2, &norm));
205: PetscCall(KSPGetIterationNumber(ksp, &its));
206: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)(norm * h), its));
208: PetscCall(PetscFree(gllNode));
209: PetscCall(PetscFree(gllWgts));
211: PetscCall(KSPDestroy(&ksp));
212: PetscCall(VecDestroy(&u));
213: PetscCall(VecDestroy(&b));
214: PetscCall(VecDestroy(&q));
215: PetscCall(VecDestroy(&ustar));
216: PetscCall(MatDestroy(&A));
217: PetscCall(MatDestroy(&Mass));
219: PetscCall(PetscFinalize());
220: return 0;
221: }
223: /* --------------------------------------------------------------------- */
225: /* 1d element stiffness mass matrix */
226: static PetscErrorCode Form1DElementMass(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Me1D)
227: {
228: PetscInt i, j, k;
229: PetscInt indx;
231: PetscFunctionBeginUser;
232: for (j = 0; j < (P + 1); ++j) {
233: for (i = 0; i < (P + 1); ++i) {
234: indx = j * (P + 1) + i;
235: Me1D[indx] = 0.0;
236: for (k = 0; k < (P + 1); ++k) Me1D[indx] += H * gqw[k] * polyBasisFunc(P, i, gqn, gqn[k]) * polyBasisFunc(P, j, gqn, gqn[k]);
237: }
238: }
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: /* --------------------------------------------------------------------- */
244: /* 1d element stiffness matrix for derivative */
245: static PetscErrorCode Form1DElementStiffness(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Ke1D)
246: {
247: PetscInt i, j, k;
248: PetscInt indx;
250: PetscFunctionBeginUser;
251: for (j = 0; j < (P + 1); ++j) {
252: for (i = 0; i < (P + 1); ++i) {
253: indx = j * (P + 1) + i;
254: Ke1D[indx] = 0.0;
255: for (k = 0; k < (P + 1); ++k) Ke1D[indx] += (1. / H) * gqw[k] * derivPolyBasisFunc(P, i, gqn, gqn[k]) * derivPolyBasisFunc(P, j, gqn, gqn[k]);
256: }
257: }
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: /* --------------------------------------------------------------------- */
263: /* element mass matrix */
264: static PetscErrorCode Form2DElementMass(PetscInt P, PetscScalar *Me1D, PetscScalar *Me2D)
265: {
266: PetscInt i1, j1, i2, j2;
267: PetscInt indx1, indx2, indx3;
269: PetscFunctionBeginUser;
270: for (j2 = 0; j2 < (P + 1); ++j2) {
271: for (i2 = 0; i2 < (P + 1); ++i2) {
272: for (j1 = 0; j1 < (P + 1); ++j1) {
273: for (i1 = 0; i1 < (P + 1); ++i1) {
274: indx1 = j1 * (P + 1) + i1;
275: indx2 = j2 * (P + 1) + i2;
276: indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1);
277: Me2D[indx3] = Me1D[indx1] * Me1D[indx2];
278: }
279: }
280: }
281: }
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: /* --------------------------------------------------------------------- */
287: /* element stiffness for Laplacian */
288: static PetscErrorCode Form2DElementStiffness(PetscInt P, PetscScalar *Ke1D, PetscScalar *Me1D, PetscScalar *Ke2D)
289: {
290: PetscInt i1, j1, i2, j2;
291: PetscInt indx1, indx2, indx3;
293: PetscFunctionBeginUser;
294: for (j2 = 0; j2 < (P + 1); ++j2) {
295: for (i2 = 0; i2 < (P + 1); ++i2) {
296: for (j1 = 0; j1 < (P + 1); ++j1) {
297: for (i1 = 0; i1 < (P + 1); ++i1) {
298: indx1 = j1 * (P + 1) + i1;
299: indx2 = j2 * (P + 1) + i2;
300: indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1);
301: Ke2D[indx3] = Ke1D[indx1] * Me1D[indx2] + Me1D[indx1] * Ke1D[indx2];
302: }
303: }
304: }
305: }
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: /* --------------------------------------------------------------------- */
311: static PetscErrorCode FormNodalRhs(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *r)
312: {
313: PetscInt i, j, indx;
315: PetscFunctionBeginUser;
316: indx = 0;
317: for (j = 0; j < (P + 1); ++j) {
318: for (i = 0; i < (P + 1); ++i) {
319: r[indx] = src(x + H * nds[i], y + H * nds[j]);
320: indx++;
321: }
322: }
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
326: /* --------------------------------------------------------------------- */
328: static PetscErrorCode FormNodalSoln(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *u)
329: {
330: PetscInt i, j, indx;
332: PetscFunctionBeginUser;
333: indx = 0;
334: for (j = 0; j < (P + 1); ++j) {
335: for (i = 0; i < (P + 1); ++i) {
336: u[indx] = ubdy(x + H * nds[i], y + H * nds[j]);
337: indx++;
338: }
339: }
340: PetscFunctionReturn(PETSC_SUCCESS);
341: }
343: /* --------------------------------------------------------------------- */
345: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
346: {
347: PetscReal denominator = 1.;
348: PetscReal numerator = 1.;
349: PetscInt i = 0;
351: for (i = 0; i < (order + 1); i++) {
352: if (i != basis) {
353: numerator *= (xval - xLocVal[i]);
354: denominator *= (xLocVal[basis] - xLocVal[i]);
355: }
356: }
357: return (numerator / denominator);
358: }
360: /* --------------------------------------------------------------------- */
362: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
363: {
364: PetscReal denominator;
365: PetscReal numerator;
366: PetscReal numtmp;
367: PetscInt i = 0, j = 0;
369: denominator = 1.;
370: for (i = 0; i < (order + 1); i++) {
371: if (i != basis) denominator *= (xLocVal[basis] - xLocVal[i]);
372: }
373: numerator = 0.;
374: for (j = 0; j < (order + 1); ++j) {
375: if (j != basis) {
376: numtmp = 1.;
377: for (i = 0; i < (order + 1); ++i) {
378: if (i != basis && j != i) numtmp *= (xval - xLocVal[i]);
379: }
380: numerator += numtmp;
381: }
382: }
384: return (numerator / denominator);
385: }
387: /* --------------------------------------------------------------------- */
389: static PetscReal ubdy(PetscReal x, PetscReal y)
390: {
391: return x * x * y * y;
392: }
394: static PetscReal src(PetscReal x, PetscReal y)
395: {
396: return -2. * y * y - 2. * x * x;
397: }
398: /* --------------------------------------------------------------------- */
400: static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n)
401: /*******************************************************************************
402: Given the lower and upper limits of integration x1 and x2, and given n, this
403: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
404: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
405: *******************************************************************************/
406: {
407: PetscInt j, m;
408: PetscReal z1, z, xm, xl, q, qp, Ln, scale;
409: if (n == 1) {
410: x[0] = x1; /* Scale the root to the desired interval, */
411: x[1] = x2; /* and put in its symmetric counterpart. */
412: w[0] = 1.; /* Compute the weight */
413: w[1] = 1.; /* and its symmetric counterpart. */
414: } else {
415: x[0] = x1; /* Scale the root to the desired interval, */
416: x[n] = x2; /* and put in its symmetric counterpart. */
417: w[0] = 2. / (n * (n + 1)); /* Compute the weight */
418: w[n] = 2. / (n * (n + 1)); /* and its symmetric counterpart. */
419: m = (n + 1) / 2; /* The roots are symmetric, so we only find half of them. */
420: xm = 0.5 * (x2 + x1);
421: xl = 0.5 * (x2 - x1);
422: for (j = 1; j <= (m - 1); j++) { /* Loop over the desired roots. */
423: z = -1.0 * PetscCosReal((PETSC_PI * (j + 0.25) / (n)) - (3.0 / (8.0 * n * PETSC_PI)) * (1.0 / (j + 0.25)));
424: /* Starting with the above approximation to the ith root, we enter */
425: /* the main loop of refinement by Newton's method. */
426: do {
427: qAndLEvaluation(n, z, &q, &qp, &Ln);
428: z1 = z;
429: z = z1 - q / qp; /* Newton's method. */
430: } while (PetscAbsReal(z - z1) > 3.0e-11);
431: qAndLEvaluation(n, z, &q, &qp, &Ln);
432: x[j] = xm + xl * z; /* Scale the root to the desired interval, */
433: x[n - j] = xm - xl * z; /* and put in its symmetric counterpart. */
434: w[j] = 2.0 / (n * (n + 1) * Ln * Ln); /* Compute the weight */
435: w[n - j] = w[j]; /* and its symmetric counterpart. */
436: }
437: }
438: if (n % 2 == 0) {
439: qAndLEvaluation(n, 0.0, &q, &qp, &Ln);
440: x[n / 2] = (x2 - x1) / 2.0;
441: w[n / 2] = 2.0 / (n * (n + 1) * Ln * Ln);
442: }
443: /* scale the weights according to mapping from [-1,1] to [0,1] */
444: scale = (x2 - x1) / 2.0;
445: for (j = 0; j <= n; ++j) w[j] = w[j] * scale;
446: }
448: /******************************************************************************/
449: static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
450: /*******************************************************************************
451: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
452: addition to L_N(x) as these are needed for the GLL points. See the book titled
453: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
454: for Scientists and Engineers" by David A. Kopriva.
455: *******************************************************************************/
456: {
457: PetscInt k;
459: PetscReal Lnp;
460: PetscReal Lnp1, Lnp1p;
461: PetscReal Lnm1, Lnm1p;
462: PetscReal Lnm2, Lnm2p;
464: Lnm1 = 1.0;
465: *Ln = x;
466: Lnm1p = 0.0;
467: Lnp = 1.0;
469: for (k = 2; k <= n; ++k) {
470: Lnm2 = Lnm1;
471: Lnm1 = *Ln;
472: Lnm2p = Lnm1p;
473: Lnm1p = Lnp;
474: *Ln = (2. * k - 1.) / (1.0 * k) * x * Lnm1 - (k - 1.) / (1.0 * k) * Lnm2;
475: Lnp = Lnm2p + (2.0 * k - 1) * Lnm1;
476: }
477: k = n + 1;
478: Lnp1 = (2. * k - 1.) / (1.0 * k) * x * (*Ln) - (k - 1.) / (1.0 * k) * Lnm1;
479: Lnp1p = Lnm1p + (2.0 * k - 1) * (*Ln);
480: *q = Lnp1 - Lnm1;
481: *qp = Lnp1p - Lnm1p;
482: }
484: /*TEST
486: test:
487: nsize: 2
488: args: -ksp_monitor_short
490: TEST*/