Actual source code: ex226.c
1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
3: #include <petscmat.h>
5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6: int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7: {
8: return i + j * m + k * m * n;
9: }
11: int main(int argc, char **argv)
12: {
13: Mat A, B, C, PtAP, PtAP_copy, PtAP_squared;
14: PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15: PetscScalar v;
16: PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17: char stencil[PETSC_MAX_PATH_LEN];
18: #if defined(PETSC_USE_LOG)
19: PetscLogStage fullMatMatMultStage;
20: #endif
22: PetscFunctionBeginUser;
23: PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
24: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
25: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
26: PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
27: PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
28: PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
30: /* Create a aij matrix A */
31: M = N = m * n * o;
32: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
33: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
34: PetscCall(MatSetType(A, MATAIJ));
35: PetscCall(MatSetFromOptions(A));
37: /* Consistency checks */
38: PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
40: /************ 2D stencils ***************/
41: PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
42: if (equal) { /* 5-point stencil, 2D */
43: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
44: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
45: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
46: for (Ii = Istart; Ii < Iend; Ii++) {
47: v = -1.0;
48: k = Ii / (m * n);
49: j = (Ii - k * m * n) / m;
50: i = (Ii - k * m * n - j * m);
51: if (i > 0) {
52: J = global_index(i - 1, j, k, m, n);
53: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
54: }
55: if (i < m - 1) {
56: J = global_index(i + 1, j, k, m, n);
57: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
58: }
59: if (j > 0) {
60: J = global_index(i, j - 1, k, m, n);
61: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
62: }
63: if (j < n - 1) {
64: J = global_index(i, j + 1, k, m, n);
65: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
66: }
67: v = 4.0;
68: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
69: }
70: }
71: PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
72: if (equal) { /* 9-point stencil, 2D */
73: PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
74: PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
75: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
76: for (Ii = Istart; Ii < Iend; Ii++) {
77: v = -1.0;
78: k = Ii / (m * n);
79: j = (Ii - k * m * n) / m;
80: i = (Ii - k * m * n - j * m);
81: if (i > 0) {
82: J = global_index(i - 1, j, k, m, n);
83: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
84: }
85: if (i > 0 && j > 0) {
86: J = global_index(i - 1, j - 1, k, m, n);
87: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
88: }
89: if (j > 0) {
90: J = global_index(i, j - 1, k, m, n);
91: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
92: }
93: if (i < m - 1 && j > 0) {
94: J = global_index(i + 1, j - 1, k, m, n);
95: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
96: }
97: if (i < m - 1) {
98: J = global_index(i + 1, j, k, m, n);
99: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
100: }
101: if (i < m - 1 && j < n - 1) {
102: J = global_index(i + 1, j + 1, k, m, n);
103: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
104: }
105: if (j < n - 1) {
106: J = global_index(i, j + 1, k, m, n);
107: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
108: }
109: if (i > 0 && j < n - 1) {
110: J = global_index(i - 1, j + 1, k, m, n);
111: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
112: }
113: v = 8.0;
114: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
115: }
116: }
117: PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
118: if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
119: PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
120: PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
121: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
122: for (Ii = Istart; Ii < Iend; Ii++) {
123: v = -1.0;
124: k = Ii / (m * n);
125: j = (Ii - k * m * n) / m;
126: i = (Ii - k * m * n - j * m);
127: if (i > 0) {
128: J = global_index(i - 1, j, k, m, n);
129: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
130: }
131: if (i > 1) {
132: J = global_index(i - 2, j, k, m, n);
133: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
134: }
135: if (i < m - 1) {
136: J = global_index(i + 1, j, k, m, n);
137: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
138: }
139: if (i < m - 2) {
140: J = global_index(i + 2, j, k, m, n);
141: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
142: }
143: if (j > 0) {
144: J = global_index(i, j - 1, k, m, n);
145: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
146: }
147: if (j > 1) {
148: J = global_index(i, j - 2, k, m, n);
149: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
150: }
151: if (j < n - 1) {
152: J = global_index(i, j + 1, k, m, n);
153: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
154: }
155: if (j < n - 2) {
156: J = global_index(i, j + 2, k, m, n);
157: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
158: }
159: v = 8.0;
160: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
161: }
162: }
163: PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
164: if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
165: PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
166: PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
167: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
168: for (Ii = Istart; Ii < Iend; Ii++) {
169: v = -1.0;
170: k = Ii / (m * n);
171: j = (Ii - k * m * n) / m;
172: i = (Ii - k * m * n - j * m);
173: if (i > 0) {
174: J = global_index(i - 1, j, k, m, n);
175: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
176: }
177: if (i > 1) {
178: J = global_index(i - 2, j, k, m, n);
179: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
180: }
181: if (i > 2) {
182: J = global_index(i - 3, j, k, m, n);
183: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
184: }
185: if (i < m - 1) {
186: J = global_index(i + 1, j, k, m, n);
187: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
188: }
189: if (i < m - 2) {
190: J = global_index(i + 2, j, k, m, n);
191: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
192: }
193: if (i < m - 3) {
194: J = global_index(i + 3, j, k, m, n);
195: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
196: }
197: if (j > 0) {
198: J = global_index(i, j - 1, k, m, n);
199: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
200: }
201: if (j > 1) {
202: J = global_index(i, j - 2, k, m, n);
203: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
204: }
205: if (j > 2) {
206: J = global_index(i, j - 3, k, m, n);
207: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
208: }
209: if (j < n - 1) {
210: J = global_index(i, j + 1, k, m, n);
211: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
212: }
213: if (j < n - 2) {
214: J = global_index(i, j + 2, k, m, n);
215: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
216: }
217: if (j < n - 3) {
218: J = global_index(i, j + 3, k, m, n);
219: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
220: }
221: v = 12.0;
222: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
223: }
224: }
225: /************ 3D stencils ***************/
226: PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
227: if (equal) { /* 7-point stencil, 3D */
228: PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
229: PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
230: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
231: for (Ii = Istart; Ii < Iend; Ii++) {
232: v = -1.0;
233: k = Ii / (m * n);
234: j = (Ii - k * m * n) / m;
235: i = (Ii - k * m * n - j * m);
236: if (i > 0) {
237: J = global_index(i - 1, j, k, m, n);
238: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
239: }
240: if (i < m - 1) {
241: J = global_index(i + 1, j, k, m, n);
242: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
243: }
244: if (j > 0) {
245: J = global_index(i, j - 1, k, m, n);
246: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
247: }
248: if (j < n - 1) {
249: J = global_index(i, j + 1, k, m, n);
250: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
251: }
252: if (k > 0) {
253: J = global_index(i, j, k - 1, m, n);
254: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
255: }
256: if (k < o - 1) {
257: J = global_index(i, j, k + 1, m, n);
258: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
259: }
260: v = 6.0;
261: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
262: }
263: }
264: PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
265: if (equal) { /* 13-point stencil, 3D */
266: PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
267: PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
268: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
269: for (Ii = Istart; Ii < Iend; Ii++) {
270: v = -1.0;
271: k = Ii / (m * n);
272: j = (Ii - k * m * n) / m;
273: i = (Ii - k * m * n - j * m);
274: if (i > 0) {
275: J = global_index(i - 1, j, k, m, n);
276: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
277: }
278: if (i > 1) {
279: J = global_index(i - 2, j, k, m, n);
280: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
281: }
282: if (i < m - 1) {
283: J = global_index(i + 1, j, k, m, n);
284: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
285: }
286: if (i < m - 2) {
287: J = global_index(i + 2, j, k, m, n);
288: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
289: }
290: if (j > 0) {
291: J = global_index(i, j - 1, k, m, n);
292: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293: }
294: if (j > 1) {
295: J = global_index(i, j - 2, k, m, n);
296: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
297: }
298: if (j < n - 1) {
299: J = global_index(i, j + 1, k, m, n);
300: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
301: }
302: if (j < n - 2) {
303: J = global_index(i, j + 2, k, m, n);
304: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
305: }
306: if (k > 0) {
307: J = global_index(i, j, k - 1, m, n);
308: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
309: }
310: if (k > 1) {
311: J = global_index(i, j, k - 2, m, n);
312: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
313: }
314: if (k < o - 1) {
315: J = global_index(i, j, k + 1, m, n);
316: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
317: }
318: if (k < o - 2) {
319: J = global_index(i, j, k + 2, m, n);
320: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
321: }
322: v = 12.0;
323: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
324: }
325: }
326: PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
327: if (equal) { /* 19-point stencil, 3D */
328: PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
329: PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
330: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
331: for (Ii = Istart; Ii < Iend; Ii++) {
332: v = -1.0;
333: k = Ii / (m * n);
334: j = (Ii - k * m * n) / m;
335: i = (Ii - k * m * n - j * m);
336: /* one hop */
337: if (i > 0) {
338: J = global_index(i - 1, j, k, m, n);
339: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
340: }
341: if (i < m - 1) {
342: J = global_index(i + 1, j, k, m, n);
343: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
344: }
345: if (j > 0) {
346: J = global_index(i, j - 1, k, m, n);
347: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
348: }
349: if (j < n - 1) {
350: J = global_index(i, j + 1, k, m, n);
351: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
352: }
353: if (k > 0) {
354: J = global_index(i, j, k - 1, m, n);
355: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
356: }
357: if (k < o - 1) {
358: J = global_index(i, j, k + 1, m, n);
359: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
360: }
361: /* two hops */
362: if (i > 0 && j > 0) {
363: J = global_index(i - 1, j - 1, k, m, n);
364: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
365: }
366: if (i > 0 && k > 0) {
367: J = global_index(i - 1, j, k - 1, m, n);
368: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
369: }
370: if (i > 0 && j < n - 1) {
371: J = global_index(i - 1, j + 1, k, m, n);
372: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
373: }
374: if (i > 0 && k < o - 1) {
375: J = global_index(i - 1, j, k + 1, m, n);
376: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
377: }
378: if (i < m - 1 && j > 0) {
379: J = global_index(i + 1, j - 1, k, m, n);
380: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
381: }
382: if (i < m - 1 && k > 0) {
383: J = global_index(i + 1, j, k - 1, m, n);
384: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
385: }
386: if (i < m - 1 && j < n - 1) {
387: J = global_index(i + 1, j + 1, k, m, n);
388: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
389: }
390: if (i < m - 1 && k < o - 1) {
391: J = global_index(i + 1, j, k + 1, m, n);
392: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
393: }
394: if (j > 0 && k > 0) {
395: J = global_index(i, j - 1, k - 1, m, n);
396: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
397: }
398: if (j > 0 && k < o - 1) {
399: J = global_index(i, j - 1, k + 1, m, n);
400: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
401: }
402: if (j < n - 1 && k > 0) {
403: J = global_index(i, j + 1, k - 1, m, n);
404: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
405: }
406: if (j < n - 1 && k < o - 1) {
407: J = global_index(i, j + 1, k + 1, m, n);
408: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
409: }
410: v = 18.0;
411: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
412: }
413: }
414: PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
415: if (equal) { /* 27-point stencil, 3D */
416: PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
417: PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
418: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
419: for (Ii = Istart; Ii < Iend; Ii++) {
420: v = -1.0;
421: k = Ii / (m * n);
422: j = (Ii - k * m * n) / m;
423: i = (Ii - k * m * n - j * m);
424: if (k > 0) {
425: if (j > 0) {
426: if (i > 0) {
427: J = global_index(i - 1, j - 1, k - 1, m, n);
428: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
429: }
430: J = global_index(i, j - 1, k - 1, m, n);
431: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
432: if (i < m - 1) {
433: J = global_index(i + 1, j - 1, k - 1, m, n);
434: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
435: }
436: }
437: {
438: if (i > 0) {
439: J = global_index(i - 1, j, k - 1, m, n);
440: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
441: }
442: J = global_index(i, j, k - 1, m, n);
443: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
444: if (i < m - 1) {
445: J = global_index(i + 1, j, k - 1, m, n);
446: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
447: }
448: }
449: if (j < n - 1) {
450: if (i > 0) {
451: J = global_index(i - 1, j + 1, k - 1, m, n);
452: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
453: }
454: J = global_index(i, j + 1, k - 1, m, n);
455: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
456: if (i < m - 1) {
457: J = global_index(i + 1, j + 1, k - 1, m, n);
458: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
459: }
460: }
461: }
462: {
463: if (j > 0) {
464: if (i > 0) {
465: J = global_index(i - 1, j - 1, k, m, n);
466: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
467: }
468: J = global_index(i, j - 1, k, m, n);
469: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
470: if (i < m - 1) {
471: J = global_index(i + 1, j - 1, k, m, n);
472: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
473: }
474: }
475: {
476: if (i > 0) {
477: J = global_index(i - 1, j, k, m, n);
478: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
479: }
480: J = global_index(i, j, k, m, n);
481: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
482: if (i < m - 1) {
483: J = global_index(i + 1, j, k, m, n);
484: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
485: }
486: }
487: if (j < n - 1) {
488: if (i > 0) {
489: J = global_index(i - 1, j + 1, k, m, n);
490: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
491: }
492: J = global_index(i, j + 1, k, m, n);
493: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
494: if (i < m - 1) {
495: J = global_index(i + 1, j + 1, k, m, n);
496: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
497: }
498: }
499: }
500: if (k < o - 1) {
501: if (j > 0) {
502: if (i > 0) {
503: J = global_index(i - 1, j - 1, k + 1, m, n);
504: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
505: }
506: J = global_index(i, j - 1, k + 1, m, n);
507: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
508: if (i < m - 1) {
509: J = global_index(i + 1, j - 1, k + 1, m, n);
510: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
511: }
512: }
513: {
514: if (i > 0) {
515: J = global_index(i - 1, j, k + 1, m, n);
516: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
517: }
518: J = global_index(i, j, k + 1, m, n);
519: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
520: if (i < m - 1) {
521: J = global_index(i + 1, j, k + 1, m, n);
522: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
523: }
524: }
525: if (j < n - 1) {
526: if (i > 0) {
527: J = global_index(i - 1, j + 1, k + 1, m, n);
528: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
529: }
530: J = global_index(i, j + 1, k + 1, m, n);
531: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
532: if (i < m - 1) {
533: J = global_index(i + 1, j + 1, k + 1, m, n);
534: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
535: }
536: }
537: }
538: v = 26.0;
539: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
540: }
541: }
542: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
543: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
545: /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
546: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
548: PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
550: /* Test C = A*B */
551: PetscCall(PetscLogStagePush(fullMatMatMultStage));
552: PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C));
554: /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */
555: PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP));
556: PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
557: PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared));
559: PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
560: PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
562: PetscCall(MatDestroy(&PtAP_squared));
563: PetscCall(MatDestroy(&PtAP_copy));
564: PetscCall(MatDestroy(&PtAP));
565: PetscCall(MatDestroy(&C));
566: PetscCall(MatDestroy(&B));
567: PetscCall(MatDestroy(&A));
568: PetscCall(PetscFinalize());
569: return 0;
570: }
572: /*TEST
574: test:
575: suffix: 1
576: nsize: 1
577: args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
579: test:
580: suffix: 2
581: nsize: 1
582: args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
584: test:
585: suffix: 3
586: nsize: 4
587: args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
589: TEST*/