Actual source code: xmllogevent.c
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include "../src/sys/logging/xmlviewer.h"
14: #if defined(PETSC_USE_LOG)
16: /*
17: * Support for nested PetscTimers
18: *
19: * PetscTimers keep track of a lot of useful information: Wall clock times,
20: * message passing statistics, flop counts. Information about the nested structure
21: * of the timers is lost. Example:
22: *
23: * 7:30 Start: awake
24: * 7:30 Start: morning routine
25: * 7:40 Start: eat
26: * 7:49 Done: eat
27: * 7:43 Done: morning routine
28: * 8:15 Start: work
29: * 12:15 Start: eat
30: * 12:45 Done: eat
31: * 16:00 Done: work
32: * 16:30 Start: evening routine
33: * 18:30 Start: eat
34: * 19:15 Done: eat
35: * 22:00 Done: evening routine
36: * 22:00 Done: awake
37: *
38: * Petsc timers provide the following timer results:
39: *
40: * awake: 1 call 14:30 hours
41: * morning routine: 1 call 0:13 hours
42: * eat: 3 calls 1:24 hours
43: * work: 1 call 7:45 hours
44: * evening routine 1 call 5:30 hours
45: *
46: * Nested timers can be used to get the following table:
47: *
48: * [1 call]: awake 14:30 hours
49: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
50: * [1 call]: eat 0:09 hours (69 % of morning routine)
51: * rest (morning routine) 0:04 hours (31 % of morning routine)
52: * [1 call]: work 7:45 hours (53 % of awake)
53: * [1 call]: eat 0:30 hours ( 6 % of work)
54: * rest (work) 7:15 hours (94 % of work)
55: * [1 call]: evening routine 5:30 hours (38 % of awake)
56: * [1 call]: eat 0:45 hours (14 % of evening routine)
57: * rest (evening routine) 4:45 hours (86 % of morning routine)
58: *
59: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
60: * the nested timers make the stages unnecessary.
61: *
62: */
64: /*
65: * Data structures for keeping track of nested timers:
66: *
67: * nestedEvents: information about the timers that have actually been activated
68: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
69: *
70: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
71: * (one or more) default timers, where one of the default timers has the same event-id as the
72: * nested one.
73: *
74: * Because of the risk of confusion between nested timer ids and default timer ids, we
75: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
76: * only for default events. Also, all nested event variables are prepended with 'nst', and
77: * default timers with 'dft'.
78: */
80: #define DFT_ID_AWAKE -1
82: typedef PetscLogEvent NestedEventId;
83: typedef struct {
84: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
85: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
86: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
87: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
89: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
90: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
91: } PetscNestedEvent;
93: static PetscLogEvent dftParentActive = DFT_ID_AWAKE;
94: static int nNestedEvents = 0;
95: static int nNestedEventsAllocated = 0;
96: static PetscNestedEvent *nestedEvents = NULL;
97: static PetscLogDouble thresholdTime = 0.01; /* initial value was 0.1 */
99: #define THRESHOLD (thresholdTime / 100.0 + 1e-12)
101: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
102: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
103: PETSC_INTERN PetscErrorCode PetscLogView_Nested(PetscViewer);
104: PETSC_INTERN PetscErrorCode PetscLogView_Flamegraph(PetscViewer);
106: /*@C
107: PetscLogNestedBegin - Turns on nested logging of objects and events. This logs flop
108: rates and object creation and should not slow programs down too much.
110: Logically Collective over `PETSC_COMM_WORLD`
112: Options Database Keys:
113: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
115: Usage:
116: .vb
117: PetscInitialize(...);
118: PetscLogNestedBegin();
119: ... code ...
120: PetscLogView(viewer);
121: PetscFinalize();
122: .ve
124: Level: advanced
126: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()`, `PetscLogDefaultBegin()`
127: @*/
128: PetscErrorCode PetscLogNestedBegin(void)
129: {
130: PetscFunctionBegin;
131: PetscCheck(!nestedEvents, PETSC_COMM_SELF, PETSC_ERR_COR, "nestedEvents already allocated");
133: nNestedEventsAllocated = 10;
134: PetscCall(PetscMalloc1(nNestedEventsAllocated, &nestedEvents));
135: dftParentActive = DFT_ID_AWAKE;
136: nNestedEvents = 1;
138: /* 'Awake' is nested event 0. It has no parents */
139: nestedEvents[0].nstEvent = 0;
140: nestedEvents[0].nParents = 0;
141: nestedEvents[0].dftParentsSorted = NULL;
142: nestedEvents[0].dftEvents = NULL;
143: nestedEvents[0].dftParents = NULL;
144: nestedEvents[0].dftEventsSorted = NULL;
146: PetscCall(PetscLogSet(PetscLogEventBeginNested, PetscLogEventEndNested));
147: PetscFunctionReturn(PETSC_SUCCESS);
148: }
150: /* Delete the data structures for the nested timers */
151: PetscErrorCode PetscLogNestedEnd(void)
152: {
153: int i;
155: PetscFunctionBegin;
156: if (!nestedEvents) PetscFunctionReturn(PETSC_SUCCESS);
157: for (i = 0; i < nNestedEvents; i++) PetscCall(PetscFree4(nestedEvents[i].dftParentsSorted, nestedEvents[i].dftEventsSorted, nestedEvents[i].dftParents, nestedEvents[i].dftEvents));
158: PetscCall(PetscFree(nestedEvents));
159: nestedEvents = NULL;
160: nNestedEvents = 0;
161: nNestedEventsAllocated = 0;
162: PetscFunctionReturn(PETSC_SUCCESS);
163: }
165: /*
166: UTILITIES: FIND STUFF IN SORTED ARRAYS
168: dftIndex - index to be found
169: dftArray - sorted array of PetscLogEvent-ids
170: narray - dimension of dftArray
171: entry - entry in the array where dftIndex may be found;
173: if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
174: In that case, the dftIndex can be inserted at this entry.
175: */
176: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, const PetscLogEvent *dftArray, int narray, int *entry)
177: {
178: PetscFunctionBegin;
179: if (narray == 0 || dftIndex <= dftArray[0]) {
180: *entry = 0;
181: } else if (dftIndex > dftArray[narray - 1]) {
182: *entry = narray;
183: } else {
184: int ihigh = narray - 1, ilow = 0;
185: while (ihigh > ilow) {
186: const int imiddle = (ihigh + ilow) / 2;
187: if (dftArray[imiddle] > dftIndex) {
188: ihigh = imiddle;
189: } else if (dftArray[imiddle] < dftIndex) {
190: ilow = imiddle + 1;
191: } else {
192: ihigh = imiddle;
193: ilow = imiddle;
194: }
195: }
196: *entry = ihigh;
197: }
198: PetscFunctionReturn(PETSC_SUCCESS);
199: }
201: /*
202: Utility: find the nested event with given identification
204: nstEvent - Nested event to be found
205: entry - entry in the nestedEvents where nstEvent may be found;
207: if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray
208: */
209: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, int *entry)
210: {
211: PetscFunctionBegin;
212: if (nNestedEvents == 0 || nstEvent <= nestedEvents[0].nstEvent) {
213: *entry = 0;
214: } else if (nstEvent > nestedEvents[nNestedEvents - 1].nstEvent) {
215: *entry = nNestedEvents;
216: } else {
217: int ihigh = nNestedEvents - 1, ilow = 0;
218: while (ihigh > ilow) {
219: const int imiddle = (ihigh + ilow) / 2;
220: if (nestedEvents[imiddle].nstEvent > nstEvent) {
221: ihigh = imiddle;
222: } else if (nestedEvents[imiddle].nstEvent < nstEvent) {
223: ilow = imiddle + 1;
224: } else {
225: ihigh = imiddle;
226: ilow = imiddle;
227: }
228: }
229: *entry = ihigh;
230: }
231: PetscFunctionReturn(PETSC_SUCCESS);
232: }
234: /*
235: Nested logging is not prepared yet to support user-defined logging stages, so for now we force logging on the main stage.
236: Using PetscLogStage{Push/Pop}() would be more appropriate, but these two calls do extra bookkeeping work we don't need.
237: */
239: #define MAINSTAGE 0
241: static PetscLogStage savedStage = 0;
243: static inline PetscErrorCode PetscLogStageOverride(void)
244: {
245: PetscStageLog stageLog = petsc_stageLog;
247: PetscFunctionBegin;
248: if (stageLog->curStage == MAINSTAGE) PetscFunctionReturn(PETSC_SUCCESS);
249: savedStage = stageLog->curStage;
250: stageLog->curStage = MAINSTAGE;
251: PetscCall(PetscIntStackPush(stageLog->stack, MAINSTAGE));
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: static inline PetscErrorCode PetscLogStageRestore(void)
256: {
257: PetscStageLog stageLog = petsc_stageLog;
259: PetscFunctionBegin;
260: if (savedStage == MAINSTAGE) PetscFunctionReturn(PETSC_SUCCESS);
261: stageLog->curStage = savedStage;
262: PetscCall(PetscIntStackPop(stageLog->stack, &savedStage));
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: /******************************************************************************************/
267: /* Start a nested event */
268: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
269: {
270: int entry, pentry, tentry, i;
271: PetscLogEvent dftEvent;
273: PetscFunctionBegin;
274: PetscCall(PetscLogEventFindNestedTimer(nstEvent, &entry));
275: if (entry >= nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
276: /* Nested event doesn't exist yet: create it */
278: if (nNestedEvents == nNestedEventsAllocated) {
279: /* Enlarge and re-allocate nestedEvents if needed */
280: PetscNestedEvent *tmp = nestedEvents;
281: PetscCall(PetscMalloc1(2 * nNestedEvents, &nestedEvents));
282: nNestedEventsAllocated *= 2;
283: PetscCall(PetscArraycpy(nestedEvents, tmp, nNestedEvents));
284: PetscCall(PetscFree(tmp));
285: }
287: /* Clear space in nestedEvents for new nested event */
288: nNestedEvents++;
289: for (i = nNestedEvents - 1; i > entry; i--) nestedEvents[i] = nestedEvents[i - 1];
291: /* Create event in nestedEvents */
292: nestedEvents[entry].nstEvent = nstEvent;
293: nestedEvents[entry].nParents = 1;
294: PetscCall(PetscMalloc4(1, &nestedEvents[entry].dftParentsSorted, 1, &nestedEvents[entry].dftEventsSorted, 1, &nestedEvents[entry].dftParents, 1, &nestedEvents[entry].dftEvents));
296: /* Fill in new event */
297: pentry = 0;
298: dftEvent = (PetscLogEvent)nstEvent;
300: nestedEvents[entry].nstEvent = nstEvent;
301: nestedEvents[entry].dftParents[pentry] = dftParentActive;
302: nestedEvents[entry].dftEvents[pentry] = dftEvent;
303: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
304: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
306: } else {
307: /* Nested event exists: find current dftParentActive among parents */
308: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
309: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
310: int nParents = nestedEvents[entry].nParents;
312: PetscCall(PetscLogEventFindDefaultTimer(dftParentActive, dftParentsSorted, nParents, &pentry));
314: if (pentry >= nParents || dftParentActive != dftParentsSorted[pentry]) {
315: /* dftParentActive not in the list: add it to the list */
316: int i;
317: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
318: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
319: char name[100];
321: /* Register a new default timer */
322: PetscCall(PetscSNPrintf(name, PETSC_STATIC_ARRAY_LENGTH(name), "%d -> %d", (int)dftParentActive, (int)nstEvent));
323: PetscCall(PetscLogEventRegister(name, 0, &dftEvent));
324: PetscCall(PetscLogEventFindDefaultTimer(dftEvent, dftEventsSorted, nParents, &tentry));
326: /* Reallocate parents and dftEvents to make space for new parent */
327: PetscCall(PetscMalloc4(1 + nParents, &nestedEvents[entry].dftParentsSorted, 1 + nParents, &nestedEvents[entry].dftEventsSorted, 1 + nParents, &nestedEvents[entry].dftParents, 1 + nParents, &nestedEvents[entry].dftEvents));
328: PetscCall(PetscArraycpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents));
329: PetscCall(PetscArraycpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents));
330: PetscCall(PetscArraycpy(nestedEvents[entry].dftParents, dftParents, nParents));
331: PetscCall(PetscArraycpy(nestedEvents[entry].dftEvents, dftEvents, nParents));
332: PetscCall(PetscFree4(dftParentsSorted, dftEventsSorted, dftParents, dftEvents));
334: dftParents = nestedEvents[entry].dftParents;
335: dftEvents = nestedEvents[entry].dftEvents;
336: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
337: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
339: nestedEvents[entry].nParents++;
340: nParents++;
342: for (i = nParents - 1; i > pentry; i--) {
343: dftParentsSorted[i] = dftParentsSorted[i - 1];
344: dftEvents[i] = dftEvents[i - 1];
345: }
346: for (i = nParents - 1; i > tentry; i--) {
347: dftParents[i] = dftParents[i - 1];
348: dftEventsSorted[i] = dftEventsSorted[i - 1];
349: }
351: /* Fill in the new default timer */
352: dftParentsSorted[pentry] = dftParentActive;
353: dftEvents[pentry] = dftEvent;
354: dftParents[tentry] = dftParentActive;
355: dftEventsSorted[tentry] = dftEvent;
357: } else {
358: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
359: dftEvent = nestedEvents[entry].dftEvents[pentry];
360: }
361: }
363: /* Start the default 'dftEvent'-timer and update the dftParentActive */
364: PetscCall(PetscLogStageOverride());
365: PetscCall(PetscLogEventBeginDefault(dftEvent, t, o1, o2, o3, o4));
366: PetscCall(PetscLogStageRestore());
367: dftParentActive = dftEvent;
368: PetscFunctionReturn(PETSC_SUCCESS);
369: }
371: /* End a nested event */
372: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
373: {
374: int entry, pentry, nParents;
375: PetscLogEvent *dftEventsSorted;
377: PetscFunctionBegin;
378: /* Find the nested event */
379: PetscCall(PetscLogEventFindNestedTimer(nstEvent, &entry));
380: PetscCheck(entry < nNestedEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d", entry, nNestedEvents);
381: PetscCheck(nestedEvents[entry].nstEvent == nstEvent, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d", entry, nstEvent);
382: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
383: nParents = nestedEvents[entry].nParents;
385: /* Find the current default timer among the 'dftEvents' of this event */
386: PetscCall(PetscLogEventFindDefaultTimer(dftParentActive, dftEventsSorted, nParents, &pentry));
388: PetscCheck(pentry < nParents, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d", pentry, nParents);
389: PetscCheck(dftEventsSorted[pentry] == dftParentActive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d", dftParentActive, dftEventsSorted[pentry]);
391: /* Stop the default timer and update the dftParentActive */
392: PetscCall(PetscLogStageOverride());
393: PetscCall(PetscLogEventEndDefault(dftParentActive, t, o1, o2, o3, o4));
394: PetscCall(PetscLogStageRestore());
395: dftParentActive = nestedEvents[entry].dftParents[pentry];
396: PetscFunctionReturn(PETSC_SUCCESS);
397: }
399: /*@
400: PetscLogSetThreshold - Set the threshold time for logging the events; this is a percentage out of 100, so 1. means any event
401: that takes 1 or more percent of the time.
403: Logically Collective over `PETSC_COMM_WORLD`
405: Input Parameter:
406: . newThresh - the threshold to use
408: Output Parameter:
409: . oldThresh - the previously set threshold value
411: Options Database Keys:
412: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
414: Usage:
415: .vb
416: PetscInitialize(...);
417: PetscLogNestedBegin();
418: PetscLogSetThreshold(0.1,&oldthresh);
419: ... code ...
420: PetscLogView(viewer);
421: PetscFinalize();
422: .ve
424: Level: advanced
426: .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()`, `PetscLogDefaultBegin()`,
427: `PetscLogNestedBegin()`
428: @*/
429: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
430: {
431: PetscFunctionBegin;
432: if (oldThresh) *oldThresh = thresholdTime;
433: if (newThresh == (PetscLogDouble)PETSC_DECIDE) newThresh = 0.01;
434: if (newThresh == (PetscLogDouble)PETSC_DEFAULT) newThresh = 0.01;
435: thresholdTime = PetscMax(newThresh, 0.0);
436: PetscFunctionReturn(PETSC_SUCCESS);
437: }
439: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
440: {
441: char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
442: char version[256], buildoptions[128] = "";
443: PetscMPIInt size;
444: size_t len;
446: PetscFunctionBegin;
447: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
448: PetscCall(PetscGetArchType(arch, sizeof(arch)));
449: PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
450: PetscCall(PetscGetUserName(username, sizeof(username)));
451: PetscCall(PetscGetProgramName(pname, sizeof(pname)));
452: PetscCall(PetscGetDate(date, sizeof(date)));
453: PetscCall(PetscGetVersion(version, sizeof(version)));
455: PetscCall(PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification"));
456: PetscCall(PetscViewerXMLPutString(viewer, "executable", "Executable", pname));
457: PetscCall(PetscViewerXMLPutString(viewer, "architecture", "Architecture", arch));
458: PetscCall(PetscViewerXMLPutString(viewer, "hostname", "Host", hostname));
459: PetscCall(PetscViewerXMLPutInt(viewer, "nprocesses", "Number of processes", size));
460: PetscCall(PetscViewerXMLPutString(viewer, "user", "Run by user", username));
461: PetscCall(PetscViewerXMLPutString(viewer, "date", "Started at", date));
462: PetscCall(PetscViewerXMLPutString(viewer, "petscrelease", "Petsc Release", version));
464: if (PetscDefined(USE_DEBUG)) PetscCall(PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions)));
465: if (PetscDefined(USE_COMPLEX)) PetscCall(PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions)));
466: if (PetscDefined(USE_REAL_SINGLE)) {
467: PetscCall(PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions)));
468: } else if (PetscDefined(USE_REAL___FLOAT128)) {
469: PetscCall(PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions)));
470: } else if (PetscDefined(USE_REAL___FP16)) {
471: PetscCall(PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions)));
472: }
473: if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions)));
474: #if defined(__cplusplus)
475: PetscCall(PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions)));
476: #endif
477: PetscCall(PetscStrlen(buildoptions, &len));
478: if (len) PetscCall(PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions));
479: PetscCall(PetscViewerXMLEndSection(viewer, "runspecification"));
480: PetscFunctionReturn(PETSC_SUCCESS);
481: }
483: /* Print the global performance: max, max/min, average and total of
484: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
485: */
486: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble local_val, const PetscBool print_average, const PetscBool print_total)
487: {
488: PetscLogDouble min, tot, ratio, avg;
489: MPI_Comm comm;
490: PetscMPIInt rank, size;
491: PetscLogDouble valrank[2], max[2];
493: PetscFunctionBegin;
494: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
495: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
496: PetscCallMPI(MPI_Comm_rank(comm, &rank));
498: valrank[0] = local_val;
499: valrank[1] = (PetscLogDouble)rank;
500: PetscCall(MPIU_Allreduce(&local_val, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
501: PetscCall(MPIU_Allreduce(valrank, &max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm));
502: PetscCall(MPIU_Allreduce(&local_val, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
503: avg = tot / ((PetscLogDouble)size);
504: if (min != 0.0) ratio = max[0] / min;
505: else ratio = 0.0;
507: PetscCall(PetscViewerXMLStartSection(viewer, name, desc));
508: PetscCall(PetscViewerXMLPutDouble(viewer, "max", NULL, max[0], "%e"));
509: PetscCall(PetscViewerXMLPutInt(viewer, "maxrank", "rank at which max was found", (PetscMPIInt)max[1]));
510: PetscCall(PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f"));
511: if (print_average) PetscCall(PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e"));
512: if (print_total) PetscCall(PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e"));
513: PetscCall(PetscViewerXMLEndSection(viewer, name));
514: PetscFunctionReturn(PETSC_SUCCESS);
515: }
517: /* Print the global performance: max, max/min, average and total of
518: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
519: */
520: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
521: {
522: PetscLogDouble flops, mem, red, mess;
523: const PetscBool print_total_yes = PETSC_TRUE, print_total_no = PETSC_FALSE, print_average_no = PETSC_FALSE, print_average_yes = PETSC_TRUE;
525: PetscFunctionBegin;
526: /* Must preserve reduction count before we go on */
527: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
529: /* Calculate summary information */
530: PetscCall(PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance"));
532: /* Time */
533: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", locTotalTime, print_average_yes, print_total_no));
535: /* Objects */
536: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", (PetscLogDouble)petsc_numObjects, print_average_yes, print_total_no));
538: /* Flop */
539: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", petsc_TotalFlops / 1.0E6, print_average_yes, print_total_yes));
541: /* Flop/sec -- Must talk to Barry here */
542: if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
543: else flops = 0.0;
544: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", flops / 1.0E6, print_average_yes, print_total_yes));
546: /* Memory */
547: PetscCall(PetscMallocGetMaximumUsage(&mem));
548: if (mem > 0.0) PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", mem / 1024.0 / 1024.0, print_average_yes, print_total_yes));
549: /* Messages */
550: mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
551: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", mess, print_average_yes, print_total_yes));
553: /* Message Volume */
554: mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
555: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", mess / 1024.0 / 1024.0, print_average_yes, print_total_yes));
557: /* Reductions */
558: PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", red, print_average_no, print_total_no));
559: PetscCall(PetscViewerXMLEndSection(viewer, "globalperformance"));
560: PetscFunctionReturn(PETSC_SUCCESS);
561: }
563: typedef struct {
564: PetscLogEvent dftEvent;
565: NestedEventId nstEvent;
566: PetscLogEvent dftParent;
567: NestedEventId nstParent;
568: PetscBool own;
569: int depth;
570: NestedEventId *nstPath;
571: } PetscNestedEventTree;
573: /* Compare timers to sort them in the tree */
574: static int compareTreeItems(const void *item1_, const void *item2_)
575: {
576: int i;
577: PetscNestedEventTree *item1 = (PetscNestedEventTree *)item1_;
578: PetscNestedEventTree *item2 = (PetscNestedEventTree *)item2_;
580: for (i = 0; i < PetscMin(item1->depth, item2->depth); i++) {
581: if (item1->nstPath[i] < item2->nstPath[i]) return -1;
582: if (item1->nstPath[i] > item2->nstPath[i]) return +1;
583: }
584: if (item1->depth < item2->depth) return -1;
585: if (item1->depth > item2->depth) return 1;
586: return 0;
587: }
588: /*
589: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
590: * calls that happen in some processes, but not in others.
591: *
592: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
593: * The tree is sorted so that the timers can be printed in the order of appearance.
594: *
595: * For tree-items which appear in the trees of multiple processes (which will be most items), the
596: * following rule is followed:
597: * + if information from my own process is available, then that is the information stored in tree.
598: * otherwise it is some other process's information.
599: */
600: static PetscErrorCode PetscLogNestedTreeCreate(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
601: {
602: PetscNestedEventTree *tree = NULL, *newTree;
603: int *treeIndices;
604: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
605: int yesno;
606: PetscBool done;
607: int maxdepth;
608: int depth;
609: int illegalEvent;
610: int iextra;
611: NestedEventId *nstPath, *nstMyPath;
612: MPI_Comm comm;
614: PetscFunctionBegin;
615: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
617: /* Calculate memory needed to store everybody's information and allocate tree */
618: nTimers = 0;
619: for (i = 0; i < nNestedEvents; i++) nTimers += nestedEvents[i].nParents;
621: PetscCall(PetscMalloc1(nTimers, &tree));
623: /* Fill tree with readily available information */
624: iTimer0 = 0;
625: maxDefaultTimer = 0;
626: for (i = 0; i < nNestedEvents; i++) {
627: int nParents = nestedEvents[i].nParents;
628: NestedEventId nstEvent = nestedEvents[i].nstEvent;
629: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
630: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
631: for (j = 0; j < nParents; j++) {
632: maxDefaultTimer = PetscMax(dftEvents[j], maxDefaultTimer);
634: tree[iTimer0 + j].dftEvent = dftEvents[j];
635: tree[iTimer0 + j].nstEvent = nstEvent;
636: tree[iTimer0 + j].dftParent = dftParentsSorted[j];
637: tree[iTimer0 + j].own = PETSC_TRUE;
639: tree[iTimer0 + j].nstParent = 0;
640: tree[iTimer0 + j].depth = 0;
641: tree[iTimer0 + j].nstPath = NULL;
642: }
643: iTimer0 += nParents;
644: }
646: /* Calculate the global maximum for the default timer index, so array treeIndices can
647: * be allocated only once */
648: PetscCall(MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm));
649: maxDefaultTimer = j;
651: /* Find default timer's place in the tree */
652: PetscCall(PetscCalloc1(maxDefaultTimer + 1, &treeIndices));
653: treeIndices[0] = 0;
654: for (i = 0; i < nTimers; i++) {
655: PetscLogEvent dftEvent = tree[i].dftEvent;
656: treeIndices[dftEvent] = i;
657: }
659: /* Find each dftParent's nested identification */
660: for (i = 0; i < nTimers; i++) {
661: PetscLogEvent dftParent = tree[i].dftParent;
662: if (dftParent != DFT_ID_AWAKE) {
663: int j = treeIndices[dftParent];
664: tree[i].nstParent = tree[j].nstEvent;
665: }
666: }
668: /* Find depths for each timer path */
669: done = PETSC_FALSE;
670: maxdepth = 1;
671: while (!done) {
672: done = PETSC_TRUE;
673: for (i = 0; i < nTimers; i++) {
674: if (tree[i].dftParent == DFT_ID_AWAKE) {
675: tree[i].depth = 1;
676: maxdepth = PetscMax(1, maxdepth);
677: } else {
678: int j = treeIndices[tree[i].dftParent];
679: depth = 1 + tree[j].depth;
680: if (depth > tree[i].depth) {
681: done = PETSC_FALSE;
682: tree[i].depth = depth;
683: maxdepth = PetscMax(depth, maxdepth);
684: }
685: }
686: }
687: }
689: /* Allocate the paths in the entire tree */
690: for (i = 0; i < nTimers; i++) {
691: depth = tree[i].depth;
692: PetscCall(PetscCalloc1(depth, &tree[i].nstPath));
693: }
695: /* Calculate the paths for all timers */
696: for (depth = 1; depth <= maxdepth; depth++) {
697: for (i = 0; i < nTimers; i++) {
698: if (tree[i].depth == depth) {
699: if (depth > 1) {
700: int j = treeIndices[tree[i].dftParent];
701: PetscCall(PetscArraycpy(tree[i].nstPath, tree[j].nstPath, depth - 1));
702: }
703: tree[i].nstPath[depth - 1] = tree[i].nstEvent;
704: }
705: }
706: }
707: PetscCall(PetscFree(treeIndices));
709: /* Sort the tree on basis of the paths */
710: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
712: /* Allocate an array to store paths */
713: depth = maxdepth;
714: PetscCall(MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm));
715: PetscCall(PetscMalloc1(maxdepth + 1, &nstPath));
716: PetscCall(PetscMalloc1(maxdepth + 1, &nstMyPath));
718: /* Find an illegal nested event index (1+largest nested event index) */
719: illegalEvent = 1 + nestedEvents[nNestedEvents - 1].nstEvent;
720: i = illegalEvent;
721: PetscCall(MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm));
723: /* First, detect timers which are not available in this process, but are available in others
724: * Allocate a new tree, that can contain all timers
725: * Then, fill the new tree with all (own and not-own) timers */
726: newTree = NULL;
727: for (yesno = 0; yesno <= 1; yesno++) {
728: depth = 1;
729: i = 0;
730: iextra = 0;
731: while (depth > 0) {
732: int j;
733: PetscBool same;
735: /* Construct the next path in this process's tree:
736: * if necessary, supplement with invalid path entries */
737: depth++;
738: PetscCheck(depth <= maxdepth + 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Depth %d > maxdepth+1 %d", depth, maxdepth + 1);
739: if (i < nTimers) {
740: for (j = 0; j < tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
741: for (j = tree[i].depth; j < depth; j++) nstMyPath[j] = illegalEvent;
742: } else {
743: for (j = 0; j < depth; j++) nstMyPath[j] = illegalEvent;
744: }
746: /* Communicate with other processes to obtain the next path and its depth */
747: PetscCall(MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm));
748: for (j = depth - 1; (int)j >= 0; j--) {
749: if (nstPath[j] == illegalEvent) depth = j;
750: }
752: if (depth > 0) {
753: /* If the path exists */
755: /* check whether the next path is the same as this process's next path */
756: same = PETSC_TRUE;
757: for (j = 0; same && j < depth; j++) same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;
759: if (same) {
760: /* Register 'own path' */
761: if (newTree) newTree[i + iextra] = tree[i];
762: i++;
763: } else {
764: /* Register 'not an own path' */
765: if (newTree) {
766: newTree[i + iextra].nstEvent = nstPath[depth - 1];
767: newTree[i + iextra].own = PETSC_FALSE;
768: newTree[i + iextra].depth = depth;
769: PetscCall(PetscMalloc1(depth, &newTree[i + iextra].nstPath));
770: for (j = 0; j < depth; j++) newTree[i + iextra].nstPath[j] = nstPath[j];
772: newTree[i + iextra].dftEvent = 0;
773: newTree[i + iextra].dftParent = 0;
774: newTree[i + iextra].nstParent = 0;
775: }
776: iextra++;
777: }
778: }
779: }
781: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
782: totalNTimers = nTimers + iextra;
783: if (!newTree) PetscCall(PetscMalloc1(totalNTimers, &newTree));
784: }
785: PetscCall(PetscFree(nstPath));
786: PetscCall(PetscFree(nstMyPath));
787: PetscCall(PetscFree(tree));
788: tree = newTree;
789: newTree = NULL;
791: /* Set return value and return */
792: *p_tree = tree;
793: *p_nTimers = totalNTimers;
794: PetscFunctionReturn(PETSC_SUCCESS);
795: }
797: /*
798: * Delete the nested timer tree
799: */
800: static PetscErrorCode PetscLogNestedTreeDestroy(PetscNestedEventTree *tree, int nTimers)
801: {
802: int i;
804: PetscFunctionBegin;
805: for (i = 0; i < nTimers; i++) PetscCall(PetscFree(tree[i].nstPath));
806: PetscCall(PetscFree(tree));
807: PetscFunctionReturn(PETSC_SUCCESS);
808: }
810: /* Print the global performance: max, max/min, average and total of
811: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
812: */
813: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble value, PetscLogDouble minthreshold, PetscLogDouble maxthreshold, PetscLogDouble minmaxtreshold)
814: {
815: MPI_Comm comm; /* MPI communicator in reduction */
816: PetscMPIInt rank; /* rank of this process */
817: PetscLogDouble val_in[2], max[2], min[2];
818: PetscLogDouble minvalue, maxvalue, tot;
819: PetscMPIInt size;
820: PetscMPIInt minLoc, maxLoc;
822: PetscFunctionBegin;
823: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
824: PetscCallMPI(MPI_Comm_size(comm, &size));
825: PetscCallMPI(MPI_Comm_rank(comm, &rank));
826: val_in[0] = value;
827: val_in[1] = (PetscLogDouble)rank;
828: PetscCall(MPIU_Allreduce(val_in, max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm));
829: PetscCall(MPIU_Allreduce(val_in, min, 1, MPIU_2PETSCLOGDOUBLE, MPI_MINLOC, comm));
830: maxvalue = max[0];
831: maxLoc = (PetscMPIInt)max[1];
832: minvalue = min[0];
833: minLoc = (PetscMPIInt)min[1];
834: PetscCall(MPIU_Allreduce(&value, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
836: if (maxvalue < maxthreshold && minvalue >= minthreshold) {
837: /* One call per parent or NO value: don't print */
838: } else {
839: PetscCall(PetscViewerXMLStartSection(viewer, name, NULL));
840: if (maxvalue > minvalue * minmaxtreshold) {
841: PetscCall(PetscViewerXMLPutDouble(viewer, "avgvalue", NULL, tot / size, "%g"));
842: PetscCall(PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%g"));
843: PetscCall(PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%g"));
844: PetscCall(PetscViewerXMLPutInt(viewer, "minloc", NULL, minLoc));
845: PetscCall(PetscViewerXMLPutInt(viewer, "maxloc", NULL, maxLoc));
846: } else {
847: PetscCall(PetscViewerXMLPutDouble(viewer, "value", NULL, tot / size, "%g"));
848: }
849: PetscCall(PetscViewerXMLEndSection(viewer, name));
850: }
851: PetscFunctionReturn(PETSC_SUCCESS);
852: }
854: #define N_COMM 8
855: static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer, PetscEventPerfInfo perfInfo, PetscLogDouble countsPerCall, int parentCount, int depth, const char *name, PetscLogDouble totalTime, PetscBool *isPrinted)
856: {
857: PetscLogDouble time = perfInfo.time;
858: PetscLogDouble timeMx;
859: MPI_Comm comm;
861: PetscFunctionBegin;
862: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
863: PetscCall(MPIU_Allreduce(&time, &timeMx, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
864: *isPrinted = ((timeMx / totalTime) >= THRESHOLD) ? PETSC_TRUE : PETSC_FALSE;
865: if (*isPrinted) {
866: PetscCall(PetscViewerXMLStartSection(viewer, "event", NULL));
867: PetscCall(PetscViewerXMLPutString(viewer, "name", NULL, name));
868: PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "time", time / totalTime * 100.0, 0, 0, 1.02));
869: PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", parentCount > 0 ? countsPerCall : 0, 0.99, 1.01, 1.02));
870: PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mflops", time >= timeMx * 0.001 ? 1e-6 * perfInfo.flops / time : 0, 0, 0.01, 1.05));
871: PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mbps", time >= timeMx * 0.001 ? perfInfo.messageLength / (1024 * 1024 * time) : 0, 0, 0.01, 1.05));
872: PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", time >= timeMx * 0.001 ? perfInfo.numReductions / time : 0, 0, 0.01, 1.05));
873: }
874: PetscFunctionReturn(PETSC_SUCCESS);
875: }
877: /* Count the number of times the parent event was called */
879: static int countParents(const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
880: {
881: if (tree[i].depth <= 1) {
882: return 1; /* Main event: only once */
883: } else if (!tree[i].own) {
884: return 1; /* This event didn't happen in this process, but did in another */
885: } else {
886: int iParent;
887: for (iParent = i - 1; iParent >= 0; iParent--) {
888: if (tree[iParent].depth == tree[i].depth - 1) break;
889: }
890: if (tree[iParent].depth != tree[i].depth - 1) {
891: /* ***** Internal error: cannot find parent */
892: return -2;
893: } else {
894: PetscLogEvent dftEvent = tree[iParent].dftEvent;
895: return eventPerfInfo[dftEvent].count;
896: }
897: }
898: }
900: typedef struct {
901: int id;
902: PetscLogDouble val;
903: } PetscSortItem;
905: static int compareSortItems(const void *item1_, const void *item2_)
906: {
907: PetscSortItem *item1 = (PetscSortItem *)item1_;
908: PetscSortItem *item2 = (PetscSortItem *)item2_;
909: if (item1->val > item2->val) return -1;
910: if (item1->val < item2->val) return +1;
911: return 0;
912: }
914: /*
915: * Find the number of child events.
916: */
917: static PetscErrorCode PetscLogNestedTreeGetChildrenCount(const PetscNestedEventTree *tree, int nTimers, int iStart, int depth, int *nChildren)
918: {
919: int n = 0;
921: PetscFunctionBegin;
922: for (int i = iStart + 1; i < nTimers; i++) {
923: if (tree[i].depth <= depth) break;
924: if (tree[i].depth == depth + 1) n++;
925: }
926: *nChildren = n;
927: PetscFunctionReturn(PETSC_SUCCESS);
928: }
930: /*
931: * Initialize child event sort items with ID and times.
932: */
933: static PetscErrorCode PetscLogNestedTreeSetChildrenSortItems(const PetscViewer viewer, const PetscNestedEventTree *tree, int nTimers, int iStart, int depth, int nChildren, PetscSortItem **children)
934: {
935: MPI_Comm comm;
936: PetscLogDouble *times, *maxTimes;
937: PetscStageLog stageLog;
938: PetscEventPerfInfo *eventPerfInfo;
939: const int stage = MAINSTAGE;
941: PetscFunctionBegin;
942: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
943: PetscCall(PetscLogGetStageLog(&stageLog));
944: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
946: if (nChildren > 0) {
947: /* Create an array for the id-s and maxTimes of the children,
948: * leaving 2 spaces for self-time and other-time */
950: PetscCall(PetscMalloc1(nChildren + 2, children));
951: nChildren = 0;
952: for (int i = iStart + 1; i < nTimers; i++) {
953: if (tree[i].depth <= depth) break;
954: if (tree[i].depth == depth + 1) {
955: (*children)[nChildren].id = i;
956: (*children)[nChildren].val = eventPerfInfo[tree[i].dftEvent].time;
957: nChildren++;
958: }
959: }
961: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
962: PetscCall(PetscMalloc1(nChildren, ×));
963: for (int i = 0; i < nChildren; i++) times[i] = (*children)[i].val;
965: PetscCall(PetscMalloc1(nChildren, &maxTimes));
966: PetscCall(MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
967: PetscCall(PetscFree(times));
969: for (int i = 0; i < nChildren; i++) (*children)[i].val = maxTimes[i];
970: PetscCall(PetscFree(maxTimes));
971: }
972: PetscFunctionReturn(PETSC_SUCCESS);
973: }
975: /*
976: * Set 'self' and 'other' performance info.
977: */
978: static PetscErrorCode PetscLogNestedTreeSetSelfOtherPerfInfo(const PetscNestedEventTree *tree, int iStart, PetscLogDouble totalTime, const PetscSortItem *children, int nChildren, PetscEventPerfInfo *myPerfInfo, PetscEventPerfInfo *selfPerfInfo, PetscEventPerfInfo *otherPerfInfo, int *parentCount, PetscLogDouble *countsPerCall)
979: {
980: const int stage = MAINSTAGE;
981: PetscStageLog stageLog;
982: PetscEventPerfInfo *eventPerfInfo;
984: PetscFunctionBegin;
985: PetscCall(PetscLogGetStageLog(&stageLog));
986: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
987: if (!tree[iStart].own) {
988: /* Set values for a timer that was not activated in this process
989: * (but was, in other processes of this run) */
990: PetscCall(PetscMemzero(myPerfInfo, sizeof(*myPerfInfo)));
992: *selfPerfInfo = *myPerfInfo;
993: *otherPerfInfo = *myPerfInfo;
995: *parentCount = 1;
996: *countsPerCall = 0;
997: } else {
998: /* Set the values for a timer that was activated in this process */
999: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1001: *parentCount = countParents(tree, eventPerfInfo, iStart);
1002: *myPerfInfo = eventPerfInfo[dftEvent];
1003: *countsPerCall = (PetscLogDouble)myPerfInfo->count / (PetscLogDouble)*parentCount;
1005: *selfPerfInfo = *myPerfInfo;
1006: otherPerfInfo->time = 0;
1007: otherPerfInfo->flops = 0;
1008: otherPerfInfo->numMessages = 0;
1009: otherPerfInfo->messageLength = 0;
1010: otherPerfInfo->numReductions = 0;
1012: for (int i = 0; i < nChildren; i++) {
1013: /* For all child counters: subtract the child values from self-timers */
1015: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1016: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1018: selfPerfInfo->time -= childPerfInfo.time;
1019: selfPerfInfo->flops -= childPerfInfo.flops;
1020: selfPerfInfo->numMessages -= childPerfInfo.numMessages;
1021: selfPerfInfo->messageLength -= childPerfInfo.messageLength;
1022: selfPerfInfo->numReductions -= childPerfInfo.numReductions;
1024: if ((children[i].val / totalTime) < THRESHOLD) {
1025: /* Add them to 'other' if the time is ignored in the output */
1026: otherPerfInfo->time += childPerfInfo.time;
1027: otherPerfInfo->flops += childPerfInfo.flops;
1028: otherPerfInfo->numMessages += childPerfInfo.numMessages;
1029: otherPerfInfo->messageLength += childPerfInfo.messageLength;
1030: otherPerfInfo->numReductions += childPerfInfo.numReductions;
1031: }
1032: }
1033: }
1034: PetscFunctionReturn(PETSC_SUCCESS);
1035: }
1037: /*
1038: * Set max times across ranks for 'self' and 'other'.
1039: */
1040: static PetscErrorCode PetscLogNestedTreeSetMaxTimes(MPI_Comm comm, int nChildren, const PetscEventPerfInfo selfPerfInfo, const PetscEventPerfInfo otherPerfInfo, PetscSortItem *children)
1041: {
1042: PetscLogDouble times[2], maxTimes[2];
1044: PetscFunctionBegin;
1045: times[0] = selfPerfInfo.time;
1046: times[1] = otherPerfInfo.time;
1048: PetscCall(MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1049: children[nChildren + 0].id = -1;
1050: children[nChildren + 0].val = maxTimes[0];
1051: children[nChildren + 1].id = -2;
1052: children[nChildren + 1].val = maxTimes[1];
1053: PetscFunctionReturn(PETSC_SUCCESS);
1054: }
1056: static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, int iStart, PetscLogDouble totalTime)
1057: {
1058: int depth = tree[iStart].depth;
1059: const char *name;
1060: int parentCount = 1, nChildren;
1061: PetscSortItem *children;
1062: PetscStageLog stageLog;
1063: PetscEventRegInfo *eventRegInfo;
1064: PetscEventPerfInfo myPerfInfo = {0}, selfPerfInfo = {0}, otherPerfInfo = {0};
1065: PetscLogDouble countsPerCall = 0;
1066: PetscBool wasPrinted;
1067: PetscBool childWasPrinted;
1068: MPI_Comm comm;
1070: PetscFunctionBegin;
1071: /* Look up the name of the event and its PerfInfo */
1072: PetscCall(PetscLogGetStageLog(&stageLog));
1073: eventRegInfo = stageLog->eventLog->eventInfo;
1074: name = eventRegInfo[(PetscLogEvent)tree[iStart].nstEvent].name;
1075: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1077: PetscCall(PetscLogNestedTreeGetChildrenCount(tree, nTimers, iStart, depth, &nChildren));
1078: PetscCall(PetscLogNestedTreeSetChildrenSortItems(viewer, tree, nTimers, iStart, depth, nChildren, &children));
1079: PetscCall(PetscLogNestedTreeSetSelfOtherPerfInfo(tree, iStart, totalTime, children, nChildren, &myPerfInfo, &selfPerfInfo, &otherPerfInfo, &parentCount, &countsPerCall));
1081: /* Main output for this timer */
1082: PetscCall(PetscLogNestedTreePrintLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted));
1084: /* Now print the lines for the children */
1085: if (nChildren > 0) {
1086: int i;
1088: /* Calculate max-times for 'self' and 'other' */
1089: PetscCall(PetscLogNestedTreeSetMaxTimes(comm, nChildren, selfPerfInfo, otherPerfInfo, children));
1091: /* Now sort the children (including 'self' and 'other') on total time */
1092: qsort(children, nChildren + 2, sizeof(PetscSortItem), compareSortItems);
1094: /* Print (or ignore) the children in ascending order of total time */
1095: PetscCall(PetscViewerXMLStartSection(viewer, "events", NULL));
1096: for (i = 0; i < nChildren + 2; i++) {
1097: if ((children[i].val / totalTime) < THRESHOLD) {
1098: /* ignored: no output */
1099: } else if (children[i].id == -1) {
1100: PetscCall(PetscLogNestedTreePrintLine(viewer, selfPerfInfo, 1, parentCount, depth + 1, "self", totalTime, &childWasPrinted));
1101: if (childWasPrinted) PetscCall(PetscViewerXMLEndSection(viewer, "event"));
1102: } else if (children[i].id == -2) {
1103: size_t len;
1104: char *otherName;
1106: PetscCall(PetscStrlen(name, &len));
1107: PetscCall(PetscMalloc1(len + 16, &otherName));
1108: PetscCall(PetscSNPrintf(otherName, len + 16, "%s: other-timed", name));
1109: PetscCall(PetscLogNestedTreePrintLine(viewer, otherPerfInfo, 1, 1, depth + 1, otherName, totalTime, &childWasPrinted));
1110: PetscCall(PetscFree(otherName));
1111: if (childWasPrinted) PetscCall(PetscViewerXMLEndSection(viewer, "event"));
1112: } else {
1113: /* Print the child with a recursive call to this function */
1114: PetscCall(PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime));
1115: }
1116: }
1117: PetscCall(PetscViewerXMLEndSection(viewer, "events"));
1118: PetscCall(PetscFree(children));
1119: }
1121: if (wasPrinted) PetscCall(PetscViewerXMLEndSection(viewer, "event"));
1122: PetscFunctionReturn(PETSC_SUCCESS);
1123: }
1125: static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, PetscLogDouble totalTime)
1126: {
1127: int i, nChildren;
1128: PetscSortItem *children;
1129: MPI_Comm comm;
1131: PetscFunctionBegin;
1132: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1134: PetscCall(PetscLogNestedTreeGetChildrenCount(tree, nTimers, -1, 0, &nChildren));
1135: PetscCall(PetscLogNestedTreeSetChildrenSortItems(viewer, tree, nTimers, -1, 0, nChildren, &children));
1137: if (nChildren > 0) {
1138: /* Now sort the children on total time */
1139: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1140: /* Print (or ignore) the children in ascending order of total time */
1141: PetscCall(PetscViewerXMLStartSection(viewer, "timertree", "Timings tree"));
1142: PetscCall(PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f"));
1143: PetscCall(PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, thresholdTime, "%f"));
1145: for (i = 0; i < nChildren; i++) {
1146: if ((children[i].val / totalTime) < THRESHOLD) {
1147: /* ignored: no output */
1148: } else {
1149: /* Print the child with a recursive call to this function */
1150: PetscCall(PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime));
1151: }
1152: }
1153: PetscCall(PetscViewerXMLEndSection(viewer, "timertree"));
1154: PetscCall(PetscFree(children));
1155: }
1156: PetscFunctionReturn(PETSC_SUCCESS);
1157: }
1159: typedef struct {
1160: char *name;
1161: PetscLogDouble time;
1162: PetscLogDouble flops;
1163: PetscLogDouble numMessages;
1164: PetscLogDouble messageLength;
1165: PetscLogDouble numReductions;
1166: } PetscSelfTimer;
1168: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1169: {
1170: const int stage = MAINSTAGE;
1171: PetscStageLog stageLog;
1172: PetscEventRegInfo *eventRegInfo;
1173: PetscEventPerfInfo *eventPerfInfo;
1174: PetscSelfTimer *selftimes;
1175: PetscSelfTimer *totaltimes;
1176: NestedEventId *nstEvents;
1177: int i, j, maxDefaultTimer;
1178: NestedEventId nst;
1179: PetscLogEvent dft;
1180: int nstMax, nstMax_local;
1181: MPI_Comm comm;
1183: PetscFunctionBegin;
1184: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1185: PetscCall(PetscLogGetStageLog(&stageLog));
1186: eventRegInfo = stageLog->eventLog->eventInfo;
1187: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1189: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1190: maxDefaultTimer = 0;
1191: for (i = 0; i < nNestedEvents; i++) {
1192: int nParents = nestedEvents[i].nParents;
1193: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1194: for (j = 0; j < nParents; j++) maxDefaultTimer = PetscMax(dftEvents[j], maxDefaultTimer);
1195: }
1196: PetscCall(PetscMalloc1(maxDefaultTimer + 1, &nstEvents));
1197: for (dft = 0; dft < maxDefaultTimer; dft++) nstEvents[dft] = 0;
1198: for (i = 0; i < nNestedEvents; i++) {
1199: int nParents = nestedEvents[i].nParents;
1200: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1201: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1202: for (j = 0; j < nParents; j++) nstEvents[dftEvents[j]] = nstEvent;
1203: }
1205: /* Calculate largest nested event-ID */
1206: nstMax_local = 0;
1207: for (i = 0; i < nNestedEvents; i++) nstMax_local = PetscMax(nestedEvents[i].nstEvent, nstMax_local);
1208: PetscCall(MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm));
1210: /* Initialize all total-times with zero */
1211: PetscCall(PetscMalloc1(nstMax + 1, &selftimes));
1212: PetscCall(PetscMalloc1(nstMax + 1, &totaltimes));
1213: for (nst = 0; nst <= nstMax; nst++) {
1214: totaltimes[nst].time = 0;
1215: totaltimes[nst].flops = 0;
1216: totaltimes[nst].numMessages = 0;
1217: totaltimes[nst].messageLength = 0;
1218: totaltimes[nst].numReductions = 0;
1219: totaltimes[nst].name = NULL;
1220: }
1222: /* Calculate total-times */
1223: for (i = 0; i < nNestedEvents; i++) {
1224: const int nParents = nestedEvents[i].nParents;
1225: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1226: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1227: for (j = 0; j < nParents; j++) {
1228: const PetscLogEvent dftEvent = dftEvents[j];
1229: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1230: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1231: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1232: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1233: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1234: }
1235: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1236: }
1238: /* Initialize: self-times := totaltimes */
1239: for (nst = 0; nst <= nstMax; nst++) selftimes[nst] = totaltimes[nst];
1241: /* Subtract timed subprocesses from self-times */
1242: for (i = 0; i < nNestedEvents; i++) {
1243: const int nParents = nestedEvents[i].nParents;
1244: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1245: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1246: for (j = 0; j < nParents; j++) {
1247: if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1248: const PetscLogEvent dftEvent = dftEvents[j];
1249: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1250: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1251: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1252: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1253: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1254: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1255: }
1256: }
1257: }
1259: PetscCall(PetscFree(nstEvents));
1260: PetscCall(PetscFree(totaltimes));
1262: /* Set outputs */
1263: *p_self = selftimes;
1264: *p_nstMax = nstMax;
1265: PetscFunctionReturn(PETSC_SUCCESS);
1266: }
1268: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1269: {
1270: int i;
1271: NestedEventId nst;
1272: PetscSortItem *sortSelfTimes;
1273: PetscLogDouble *times, *maxTimes;
1274: PetscStageLog stageLog;
1275: PetscEventRegInfo *eventRegInfo;
1276: const int dum_depth = 1, dum_count = 1, dum_parentcount = 1;
1277: PetscBool wasPrinted;
1278: MPI_Comm comm;
1280: PetscFunctionBegin;
1281: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1282: PetscCall(PetscLogGetStageLog(&stageLog));
1283: eventRegInfo = stageLog->eventLog->eventInfo;
1285: PetscCall(PetscMalloc1(nstMax + 1, ×));
1286: PetscCall(PetscMalloc1(nstMax + 1, &maxTimes));
1287: for (nst = 0; nst <= nstMax; nst++) times[nst] = selftimes[nst].time;
1288: PetscCall(MPIU_Allreduce(times, maxTimes, nstMax + 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1289: PetscCall(PetscFree(times));
1291: PetscCall(PetscMalloc1(nstMax + 1, &sortSelfTimes));
1293: /* Sort the self-timers on basis of the largest time needed */
1294: for (nst = 0; nst <= nstMax; nst++) {
1295: sortSelfTimes[nst].id = nst;
1296: sortSelfTimes[nst].val = maxTimes[nst];
1297: }
1298: PetscCall(PetscFree(maxTimes));
1299: qsort(sortSelfTimes, nstMax + 1, sizeof(PetscSortItem), compareSortItems);
1301: PetscCall(PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings"));
1302: PetscCall(PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f"));
1304: for (i = 0; i <= nstMax; i++) {
1305: if ((sortSelfTimes[i].val / totalTime) >= THRESHOLD) {
1306: NestedEventId nstEvent = sortSelfTimes[i].id;
1307: const char *name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1308: PetscEventPerfInfo selfPerfInfo;
1310: selfPerfInfo.time = selftimes[nstEvent].time;
1311: selfPerfInfo.flops = selftimes[nstEvent].flops;
1312: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1313: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1314: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1316: PetscCall(PetscLogNestedTreePrintLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted));
1317: if (wasPrinted) PetscCall(PetscViewerXMLEndSection(viewer, "event"));
1318: }
1319: }
1320: PetscCall(PetscViewerXMLEndSection(viewer, "selftimertable"));
1321: PetscCall(PetscFree(sortSelfTimes));
1322: PetscFunctionReturn(PETSC_SUCCESS);
1323: }
1325: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1326: {
1327: PetscLogDouble locTotalTime, globTotalTime;
1328: PetscNestedEventTree *tree = NULL;
1329: PetscSelfTimer *selftimers = NULL;
1330: int nTimers = 0, nstMax = 0;
1331: MPI_Comm comm;
1333: PetscFunctionBegin;
1334: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1335: PetscCall(PetscViewerInitASCII_XML(viewer));
1336: PetscCall(PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n"));
1337: PetscCall(PetscViewerXMLStartSection(viewer, "petscroot", NULL));
1339: /* Get the total elapsed time, local and global maximum */
1340: PetscCall(PetscTime(&locTotalTime));
1341: locTotalTime -= petsc_BaseTime;
1342: PetscCall(MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1344: /* Print global information about this run */
1345: PetscCall(PetscPrintExeSpecs(viewer));
1346: PetscCall(PetscPrintGlobalPerformance(viewer, locTotalTime));
1347: /* Collect nested timer tree info from all processes */
1348: PetscCall(PetscLogNestedTreeCreate(viewer, &tree, &nTimers));
1349: PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nTimers, globTotalTime));
1350: PetscCall(PetscLogNestedTreeDestroy(tree, nTimers));
1352: /* Calculate self-time for all (not-nested) events */
1353: PetscCall(PetscCalcSelfTime(viewer, &selftimers, &nstMax));
1354: PetscCall(PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime));
1355: PetscCall(PetscFree(selftimers));
1357: PetscCall(PetscViewerXMLEndSection(viewer, "petscroot"));
1358: PetscCall(PetscViewerFinalASCII_XML(viewer));
1359: PetscFunctionReturn(PETSC_SUCCESS);
1360: }
1362: /*
1363: * Get the name of a nested event.
1364: */
1365: static PetscErrorCode PetscGetNestedEventName(const PetscNestedEventTree *tree, int id, char **name)
1366: {
1367: PetscStageLog stageLog;
1369: PetscFunctionBegin;
1370: PetscCall(PetscLogGetStageLog(&stageLog));
1371: *name = stageLog->eventLog->eventInfo[(PetscLogEvent)tree[id].nstEvent].name;
1372: PetscFunctionReturn(PETSC_SUCCESS);
1373: }
1375: /*
1376: * Get the total time elapsed.
1377: */
1378: static PetscErrorCode PetscGetTotalTime(const PetscViewer viewer, PetscLogDouble *totalTime)
1379: {
1380: PetscLogDouble locTotalTime;
1381: MPI_Comm comm;
1383: PetscFunctionBegin;
1384: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1385: PetscCall(PetscTime(&locTotalTime));
1386: locTotalTime -= petsc_BaseTime;
1387: PetscCall(MPIU_Allreduce(&locTotalTime, totalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1388: PetscFunctionReturn(PETSC_SUCCESS);
1389: }
1391: /*
1392: * Write a line to the flame graph output and then recurse into child events.
1393: */
1394: static PetscErrorCode PetscLogNestedTreePrintFlamegraph(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, int iStart, PetscLogDouble totalTime, PetscIntStack eventStack)
1395: {
1396: int depth = tree[iStart].depth, parentCount = 1, i, nChildren;
1397: char *name = NULL;
1398: PetscEventPerfInfo myPerfInfo = {0}, selfPerfInfo = {0}, otherPerfInfo = {0};
1399: PetscLogDouble countsPerCall = 0, locTime, globTime;
1400: PetscSortItem *children;
1401: PetscStageLog stageLog;
1402: MPI_Comm comm;
1404: PetscFunctionBegin;
1405: PetscCall(PetscLogGetStageLog(&stageLog));
1406: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1408: /* Determine information about the child events as well as 'self' and 'other' */
1409: PetscCall(PetscLogNestedTreeGetChildrenCount(tree, nTimers, iStart, depth, &nChildren));
1410: PetscCall(PetscLogNestedTreeSetChildrenSortItems(viewer, tree, nTimers, iStart, depth, nChildren, &children));
1411: PetscCall(PetscLogNestedTreeSetSelfOtherPerfInfo(tree, iStart, totalTime, children, nChildren, &myPerfInfo, &selfPerfInfo, &otherPerfInfo, &parentCount, &countsPerCall));
1413: /* Write line to the file. The time shown is 'self' + 'other' because each entry in the output
1414: * is the total time spent in the event minus the amount spent in child events. */
1415: locTime = selfPerfInfo.time + otherPerfInfo.time;
1416: PetscCall(MPIU_Allreduce(&locTime, &globTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1417: if (globTime / totalTime > THRESHOLD && tree[iStart].own) {
1418: /* Iterate over parent events in the stack and write them */
1419: for (i = 0; i <= eventStack->top; i++) {
1420: PetscCall(PetscGetNestedEventName(tree, eventStack->stack[i], &name));
1421: PetscCall(PetscViewerASCIIPrintf(viewer, "%s;", name));
1422: }
1423: PetscCall(PetscGetNestedEventName(tree, iStart, &name));
1424: /* The output is given as an integer in microseconds because otherwise the file cannot be read
1425: * by apps such as speedscope (https://speedscope.app/). */
1426: PetscCall(PetscViewerASCIIPrintf(viewer, "%s %" PetscInt64_FMT "\n", name, (PetscInt64)(globTime * 1e6)));
1427: }
1429: /* Add the current event to the parent stack and write the child events */
1430: PetscCall(PetscIntStackPush(eventStack, iStart));
1431: for (i = 0; i < nChildren; i++) PetscCall(PetscLogNestedTreePrintFlamegraph(viewer, tree, nTimers, children[i].id, totalTime, eventStack));
1432: /* Pop the top item from the stack and immediately discard it */
1433: {
1434: int tmp;
1435: PetscCall(PetscIntStackPop(eventStack, &tmp));
1436: }
1437: PetscFunctionReturn(PETSC_SUCCESS);
1438: }
1440: /*
1441: * Print nested logging information to a file suitable for reading into a Flame Graph.
1442: *
1443: * The format consists of a semicolon-separated list of events and the event duration in microseconds (which must be an integer).
1444: * An example output would look like:
1445: * MatAssemblyBegin 1
1446: * MatAssemblyEnd 10
1447: * MatView 302
1448: * KSPSetUp 98
1449: * KSPSetUp;VecSet 5
1450: * KSPSolve 150
1451: *
1452: * This option may be requested from the command line by passing in the flag `-log_view :<somefile>.txt:ascii_flamegraph`.
1453: */
1454: PetscErrorCode PetscLogView_Flamegraph(PetscViewer viewer)
1455: {
1456: int nTimers = 0, i, nChildren;
1457: PetscIntStack eventStack;
1458: PetscLogDouble totalTime;
1459: PetscNestedEventTree *tree = NULL;
1460: PetscSortItem *children;
1462: PetscFunctionBegin;
1463: PetscCall(PetscGetTotalTime(viewer, &totalTime));
1464: PetscCall(PetscLogNestedTreeCreate(viewer, &tree, &nTimers));
1465: /* We use an integer stack to keep track of parent event IDs */
1466: PetscCall(PetscIntStackCreate(&eventStack));
1468: /* Initialize the child events and write them recursively */
1469: PetscCall(PetscLogNestedTreeGetChildrenCount(tree, nTimers, -1, 0, &nChildren));
1470: PetscCall(PetscLogNestedTreeSetChildrenSortItems(viewer, tree, nTimers, -1, 0, nChildren, &children));
1471: for (i = 0; i < nChildren; i++) PetscCall(PetscLogNestedTreePrintFlamegraph(viewer, tree, nTimers, children[i].id, totalTime, eventStack));
1473: PetscCall(PetscLogNestedTreeDestroy(tree, nTimers));
1474: PetscCall(PetscIntStackDestroy(eventStack));
1475: PetscFunctionReturn(PETSC_SUCCESS);
1476: }
1478: PETSC_EXTERN PetscErrorCode PetscASend(int count, int datatype)
1479: {
1480: PetscFunctionBegin;
1481: PetscCall(PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1));
1482: #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO)
1483: PetscCall(PetscMPITypeSize(count, MPI_Type_f2c((MPI_Fint)datatype), &petsc_send_len, &petsc_send_len_th));
1484: #endif
1485: PetscFunctionReturn(PETSC_SUCCESS);
1486: }
1488: PETSC_EXTERN PetscErrorCode PetscARecv(int count, int datatype)
1489: {
1490: PetscFunctionBegin;
1491: PetscCall(PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1));
1492: #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO)
1493: PetscCall(PetscMPITypeSize(count, MPI_Type_f2c((MPI_Fint)datatype), &petsc_recv_len, &petsc_recv_len_th));
1494: #endif
1495: PetscFunctionReturn(PETSC_SUCCESS);
1496: }
1498: PETSC_EXTERN PetscErrorCode PetscAReduce(void)
1499: {
1500: PetscFunctionBegin;
1501: PetscCall(PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, 1));
1502: PetscFunctionReturn(PETSC_SUCCESS);
1503: }
1505: #endif