Actual source code: ex226.c

  1: static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";

  3: #include <petscmat.h>

  5: /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
  6: int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
  7: {
  8:   return i + j * m + k * m * n;
  9: }

 11: int main(int argc, char **argv)
 12: {
 13:   Mat         A, B, C, PtAP, PtAP_copy, PtAP_squared;
 14:   PetscInt    i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
 15:   PetscScalar v;
 16:   PetscBool   equal = PETSC_FALSE, mat_view = PETSC_FALSE;
 17:   char        stencil[PETSC_MAX_PATH_LEN];
 18: #if defined(PETSC_USE_LOG)
 19:   PetscLogStage fullMatMatMultStage;
 20: #endif

 22:   PetscFunctionBeginUser;
 23:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 24:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 25:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 26:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
 27:   PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
 28:   PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));

 30:   /* Create a aij matrix A */
 31:   M = N = m * n * o;
 32:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 33:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
 34:   PetscCall(MatSetType(A, MATAIJ));
 35:   PetscCall(MatSetFromOptions(A));

 37:   /* Consistency checks */
 38:   PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");

 40:   /************ 2D stencils ***************/
 41:   PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
 42:   if (equal) { /* 5-point stencil, 2D */
 43:     PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
 44:     PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
 45:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 46:     for (Ii = Istart; Ii < Iend; Ii++) {
 47:       v = -1.0;
 48:       k = Ii / (m * n);
 49:       j = (Ii - k * m * n) / m;
 50:       i = (Ii - k * m * n - j * m);
 51:       if (i > 0) {
 52:         J = global_index(i - 1, j, k, m, n);
 53:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 54:       }
 55:       if (i < m - 1) {
 56:         J = global_index(i + 1, j, k, m, n);
 57:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 58:       }
 59:       if (j > 0) {
 60:         J = global_index(i, j - 1, k, m, n);
 61:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 62:       }
 63:       if (j < n - 1) {
 64:         J = global_index(i, j + 1, k, m, n);
 65:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 66:       }
 67:       v = 4.0;
 68:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
 69:     }
 70:   }
 71:   PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
 72:   if (equal) { /* 9-point stencil, 2D */
 73:     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
 74:     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
 75:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 76:     for (Ii = Istart; Ii < Iend; Ii++) {
 77:       v = -1.0;
 78:       k = Ii / (m * n);
 79:       j = (Ii - k * m * n) / m;
 80:       i = (Ii - k * m * n - j * m);
 81:       if (i > 0) {
 82:         J = global_index(i - 1, j, k, m, n);
 83:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 84:       }
 85:       if (i > 0 && j > 0) {
 86:         J = global_index(i - 1, j - 1, k, m, n);
 87:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 88:       }
 89:       if (j > 0) {
 90:         J = global_index(i, j - 1, k, m, n);
 91:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 92:       }
 93:       if (i < m - 1 && j > 0) {
 94:         J = global_index(i + 1, j - 1, k, m, n);
 95:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
 96:       }
 97:       if (i < m - 1) {
 98:         J = global_index(i + 1, j, k, m, n);
 99:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
100:       }
101:       if (i < m - 1 && j < n - 1) {
102:         J = global_index(i + 1, j + 1, k, m, n);
103:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
104:       }
105:       if (j < n - 1) {
106:         J = global_index(i, j + 1, k, m, n);
107:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
108:       }
109:       if (i > 0 && j < n - 1) {
110:         J = global_index(i - 1, j + 1, k, m, n);
111:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
112:       }
113:       v = 8.0;
114:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
115:     }
116:   }
117:   PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
118:   if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
119:     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
120:     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
121:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
122:     for (Ii = Istart; Ii < Iend; Ii++) {
123:       v = -1.0;
124:       k = Ii / (m * n);
125:       j = (Ii - k * m * n) / m;
126:       i = (Ii - k * m * n - j * m);
127:       if (i > 0) {
128:         J = global_index(i - 1, j, k, m, n);
129:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
130:       }
131:       if (i > 1) {
132:         J = global_index(i - 2, j, k, m, n);
133:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
134:       }
135:       if (i < m - 1) {
136:         J = global_index(i + 1, j, k, m, n);
137:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
138:       }
139:       if (i < m - 2) {
140:         J = global_index(i + 2, j, k, m, n);
141:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
142:       }
143:       if (j > 0) {
144:         J = global_index(i, j - 1, k, m, n);
145:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
146:       }
147:       if (j > 1) {
148:         J = global_index(i, j - 2, k, m, n);
149:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
150:       }
151:       if (j < n - 1) {
152:         J = global_index(i, j + 1, k, m, n);
153:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
154:       }
155:       if (j < n - 2) {
156:         J = global_index(i, j + 2, k, m, n);
157:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
158:       }
159:       v = 8.0;
160:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
161:     }
162:   }
163:   PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
164:   if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
165:     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
166:     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
167:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
168:     for (Ii = Istart; Ii < Iend; Ii++) {
169:       v = -1.0;
170:       k = Ii / (m * n);
171:       j = (Ii - k * m * n) / m;
172:       i = (Ii - k * m * n - j * m);
173:       if (i > 0) {
174:         J = global_index(i - 1, j, k, m, n);
175:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
176:       }
177:       if (i > 1) {
178:         J = global_index(i - 2, j, k, m, n);
179:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
180:       }
181:       if (i > 2) {
182:         J = global_index(i - 3, j, k, m, n);
183:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
184:       }
185:       if (i < m - 1) {
186:         J = global_index(i + 1, j, k, m, n);
187:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
188:       }
189:       if (i < m - 2) {
190:         J = global_index(i + 2, j, k, m, n);
191:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
192:       }
193:       if (i < m - 3) {
194:         J = global_index(i + 3, j, k, m, n);
195:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
196:       }
197:       if (j > 0) {
198:         J = global_index(i, j - 1, k, m, n);
199:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
200:       }
201:       if (j > 1) {
202:         J = global_index(i, j - 2, k, m, n);
203:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
204:       }
205:       if (j > 2) {
206:         J = global_index(i, j - 3, k, m, n);
207:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
208:       }
209:       if (j < n - 1) {
210:         J = global_index(i, j + 1, k, m, n);
211:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
212:       }
213:       if (j < n - 2) {
214:         J = global_index(i, j + 2, k, m, n);
215:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
216:       }
217:       if (j < n - 3) {
218:         J = global_index(i, j + 3, k, m, n);
219:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
220:       }
221:       v = 12.0;
222:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
223:     }
224:   }
225:   /************ 3D stencils ***************/
226:   PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
227:   if (equal) { /* 7-point stencil, 3D */
228:     PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
229:     PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
230:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
231:     for (Ii = Istart; Ii < Iend; Ii++) {
232:       v = -1.0;
233:       k = Ii / (m * n);
234:       j = (Ii - k * m * n) / m;
235:       i = (Ii - k * m * n - j * m);
236:       if (i > 0) {
237:         J = global_index(i - 1, j, k, m, n);
238:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
239:       }
240:       if (i < m - 1) {
241:         J = global_index(i + 1, j, k, m, n);
242:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
243:       }
244:       if (j > 0) {
245:         J = global_index(i, j - 1, k, m, n);
246:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
247:       }
248:       if (j < n - 1) {
249:         J = global_index(i, j + 1, k, m, n);
250:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
251:       }
252:       if (k > 0) {
253:         J = global_index(i, j, k - 1, m, n);
254:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
255:       }
256:       if (k < o - 1) {
257:         J = global_index(i, j, k + 1, m, n);
258:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
259:       }
260:       v = 6.0;
261:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
262:     }
263:   }
264:   PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
265:   if (equal) { /* 13-point stencil, 3D */
266:     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
267:     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
268:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
269:     for (Ii = Istart; Ii < Iend; Ii++) {
270:       v = -1.0;
271:       k = Ii / (m * n);
272:       j = (Ii - k * m * n) / m;
273:       i = (Ii - k * m * n - j * m);
274:       if (i > 0) {
275:         J = global_index(i - 1, j, k, m, n);
276:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
277:       }
278:       if (i > 1) {
279:         J = global_index(i - 2, j, k, m, n);
280:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
281:       }
282:       if (i < m - 1) {
283:         J = global_index(i + 1, j, k, m, n);
284:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
285:       }
286:       if (i < m - 2) {
287:         J = global_index(i + 2, j, k, m, n);
288:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
289:       }
290:       if (j > 0) {
291:         J = global_index(i, j - 1, k, m, n);
292:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293:       }
294:       if (j > 1) {
295:         J = global_index(i, j - 2, k, m, n);
296:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
297:       }
298:       if (j < n - 1) {
299:         J = global_index(i, j + 1, k, m, n);
300:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
301:       }
302:       if (j < n - 2) {
303:         J = global_index(i, j + 2, k, m, n);
304:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
305:       }
306:       if (k > 0) {
307:         J = global_index(i, j, k - 1, m, n);
308:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
309:       }
310:       if (k > 1) {
311:         J = global_index(i, j, k - 2, m, n);
312:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
313:       }
314:       if (k < o - 1) {
315:         J = global_index(i, j, k + 1, m, n);
316:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
317:       }
318:       if (k < o - 2) {
319:         J = global_index(i, j, k + 2, m, n);
320:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
321:       }
322:       v = 12.0;
323:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
324:     }
325:   }
326:   PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
327:   if (equal) { /* 19-point stencil, 3D */
328:     PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
329:     PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
330:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
331:     for (Ii = Istart; Ii < Iend; Ii++) {
332:       v = -1.0;
333:       k = Ii / (m * n);
334:       j = (Ii - k * m * n) / m;
335:       i = (Ii - k * m * n - j * m);
336:       /* one hop */
337:       if (i > 0) {
338:         J = global_index(i - 1, j, k, m, n);
339:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
340:       }
341:       if (i < m - 1) {
342:         J = global_index(i + 1, j, k, m, n);
343:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
344:       }
345:       if (j > 0) {
346:         J = global_index(i, j - 1, k, m, n);
347:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
348:       }
349:       if (j < n - 1) {
350:         J = global_index(i, j + 1, k, m, n);
351:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
352:       }
353:       if (k > 0) {
354:         J = global_index(i, j, k - 1, m, n);
355:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
356:       }
357:       if (k < o - 1) {
358:         J = global_index(i, j, k + 1, m, n);
359:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
360:       }
361:       /* two hops */
362:       if (i > 0 && j > 0) {
363:         J = global_index(i - 1, j - 1, k, m, n);
364:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
365:       }
366:       if (i > 0 && k > 0) {
367:         J = global_index(i - 1, j, k - 1, m, n);
368:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
369:       }
370:       if (i > 0 && j < n - 1) {
371:         J = global_index(i - 1, j + 1, k, m, n);
372:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
373:       }
374:       if (i > 0 && k < o - 1) {
375:         J = global_index(i - 1, j, k + 1, m, n);
376:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
377:       }
378:       if (i < m - 1 && j > 0) {
379:         J = global_index(i + 1, j - 1, k, m, n);
380:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
381:       }
382:       if (i < m - 1 && k > 0) {
383:         J = global_index(i + 1, j, k - 1, m, n);
384:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
385:       }
386:       if (i < m - 1 && j < n - 1) {
387:         J = global_index(i + 1, j + 1, k, m, n);
388:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
389:       }
390:       if (i < m - 1 && k < o - 1) {
391:         J = global_index(i + 1, j, k + 1, m, n);
392:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
393:       }
394:       if (j > 0 && k > 0) {
395:         J = global_index(i, j - 1, k - 1, m, n);
396:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
397:       }
398:       if (j > 0 && k < o - 1) {
399:         J = global_index(i, j - 1, k + 1, m, n);
400:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
401:       }
402:       if (j < n - 1 && k > 0) {
403:         J = global_index(i, j + 1, k - 1, m, n);
404:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
405:       }
406:       if (j < n - 1 && k < o - 1) {
407:         J = global_index(i, j + 1, k + 1, m, n);
408:         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
409:       }
410:       v = 18.0;
411:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
412:     }
413:   }
414:   PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
415:   if (equal) { /* 27-point stencil, 3D */
416:     PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
417:     PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
418:     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
419:     for (Ii = Istart; Ii < Iend; Ii++) {
420:       v = -1.0;
421:       k = Ii / (m * n);
422:       j = (Ii - k * m * n) / m;
423:       i = (Ii - k * m * n - j * m);
424:       if (k > 0) {
425:         if (j > 0) {
426:           if (i > 0) {
427:             J = global_index(i - 1, j - 1, k - 1, m, n);
428:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
429:           }
430:           J = global_index(i, j - 1, k - 1, m, n);
431:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
432:           if (i < m - 1) {
433:             J = global_index(i + 1, j - 1, k - 1, m, n);
434:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
435:           }
436:         }
437:         {
438:           if (i > 0) {
439:             J = global_index(i - 1, j, k - 1, m, n);
440:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
441:           }
442:           J = global_index(i, j, k - 1, m, n);
443:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
444:           if (i < m - 1) {
445:             J = global_index(i + 1, j, k - 1, m, n);
446:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
447:           }
448:         }
449:         if (j < n - 1) {
450:           if (i > 0) {
451:             J = global_index(i - 1, j + 1, k - 1, m, n);
452:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
453:           }
454:           J = global_index(i, j + 1, k - 1, m, n);
455:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
456:           if (i < m - 1) {
457:             J = global_index(i + 1, j + 1, k - 1, m, n);
458:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
459:           }
460:         }
461:       }
462:       {
463:         if (j > 0) {
464:           if (i > 0) {
465:             J = global_index(i - 1, j - 1, k, m, n);
466:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
467:           }
468:           J = global_index(i, j - 1, k, m, n);
469:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
470:           if (i < m - 1) {
471:             J = global_index(i + 1, j - 1, k, m, n);
472:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
473:           }
474:         }
475:         {
476:           if (i > 0) {
477:             J = global_index(i - 1, j, k, m, n);
478:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
479:           }
480:           J = global_index(i, j, k, m, n);
481:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
482:           if (i < m - 1) {
483:             J = global_index(i + 1, j, k, m, n);
484:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
485:           }
486:         }
487:         if (j < n - 1) {
488:           if (i > 0) {
489:             J = global_index(i - 1, j + 1, k, m, n);
490:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
491:           }
492:           J = global_index(i, j + 1, k, m, n);
493:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
494:           if (i < m - 1) {
495:             J = global_index(i + 1, j + 1, k, m, n);
496:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
497:           }
498:         }
499:       }
500:       if (k < o - 1) {
501:         if (j > 0) {
502:           if (i > 0) {
503:             J = global_index(i - 1, j - 1, k + 1, m, n);
504:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
505:           }
506:           J = global_index(i, j - 1, k + 1, m, n);
507:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
508:           if (i < m - 1) {
509:             J = global_index(i + 1, j - 1, k + 1, m, n);
510:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
511:           }
512:         }
513:         {
514:           if (i > 0) {
515:             J = global_index(i - 1, j, k + 1, m, n);
516:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
517:           }
518:           J = global_index(i, j, k + 1, m, n);
519:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
520:           if (i < m - 1) {
521:             J = global_index(i + 1, j, k + 1, m, n);
522:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
523:           }
524:         }
525:         if (j < n - 1) {
526:           if (i > 0) {
527:             J = global_index(i - 1, j + 1, k + 1, m, n);
528:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
529:           }
530:           J = global_index(i, j + 1, k + 1, m, n);
531:           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
532:           if (i < m - 1) {
533:             J = global_index(i + 1, j + 1, k + 1, m, n);
534:             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
535:           }
536:         }
537:       }
538:       v = 26.0;
539:       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
540:     }
541:   }
542:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
543:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

545:   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
546:   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

548:   PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));

550:   /* Test C = A*B */
551:   PetscCall(PetscLogStagePush(fullMatMatMultStage));
552:   PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C));

554:   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
555:   PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP));
556:   PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
557:   PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared));

559:   PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
560:   PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));

562:   PetscCall(MatDestroy(&PtAP_squared));
563:   PetscCall(MatDestroy(&PtAP_copy));
564:   PetscCall(MatDestroy(&PtAP));
565:   PetscCall(MatDestroy(&C));
566:   PetscCall(MatDestroy(&B));
567:   PetscCall(MatDestroy(&A));
568:   PetscCall(PetscFinalize());
569:   return 0;
570: }

572: /*TEST

574:  test:
575:       suffix: 1
576:       nsize: 1
577:       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted

579:  test:
580:        suffix: 2
581:        nsize: 1
582:        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge

584:  test:
585:       suffix: 3
586:       nsize: 4
587:       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi

589: TEST*/