Actual source code: plog.c
2: /*
3: PETSc code to log object creation and destruction and PETSc events.
5: This provides the public API used by the rest of PETSc and by users.
7: These routines use a private API that is not used elsewhere in PETSc and is not
8: accessible to users. The private API is defined in logimpl.h and the utils directory.
10: */
11: #include <petsc/private/logimpl.h>
12: #include <petsctime.h>
13: #include <petscviewer.h>
14: #include <petscdevice.h>
15: #include <petsc/private/deviceimpl.h>
16: #if defined(PETSC_HAVE_TAU_PERFSTUBS)
17: #include <../src/sys/perfstubs/timer.h>
18: #endif
20: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;
22: #if defined(PETSC_USE_LOG)
23: #include <petscmachineinfo.h>
24: #include <petscconfiginfo.h>
26: #if defined(PETSC_HAVE_THREADSAFETY)
28: PetscInt petsc_log_gid = -1; /* Global threadId counter */
29: PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */
31: /* shared variables */
32: PetscSpinlock PetscLogSpinLock;
33: PetscHMapEvent eventInfoMap_th = NULL;
35: #endif
37: /* used in the MPI_XXX() count macros in petsclog.h */
39: /* Action and object logging variables */
40: Action *petsc_actions = NULL;
41: Object *petsc_objects = NULL;
42: PetscBool petsc_logActions = PETSC_FALSE;
43: PetscBool petsc_logObjects = PETSC_FALSE;
44: int petsc_numActions = 0, petsc_maxActions = 100;
45: int petsc_numObjects = 0, petsc_maxObjects = 100;
46: int petsc_numObjectsDestroyed = 0;
48: /* Global counters */
49: PetscLogDouble petsc_BaseTime = 0.0;
50: PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */
51: PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */
52: PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */
53: PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */
54: PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */
55: PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */
56: PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */
57: PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */
58: PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */
59: PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */
60: PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */
61: PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */
62: PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
63: PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */
64: PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */
65: PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */
67: /* Thread Local storage */
68: PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0;
69: PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0;
70: PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0;
71: PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0;
72: PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0;
73: PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0;
74: PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0;
75: PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0;
76: PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0;
77: PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0;
78: PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0;
79: PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0;
80: PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0;
81: PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0;
82: PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0;
83: PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0;
85: #if defined(PETSC_HAVE_DEVICE)
86: PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */
87: PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */
88: PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */
89: PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */
90: PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */
91: PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */
92: PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */
93: PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */
94: PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */
95: PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */
97: PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0;
98: PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0;
99: PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0;
100: PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0;
101: PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0;
102: PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0;
103: PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0;
104: PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0;
105: PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0;
106: PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0;
107: #endif
109: #if defined(PETSC_HAVE_THREADSAFETY)
110: PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp)
111: {
112: *tot_th += tmp;
113: PetscCall(PetscSpinlockLock(&PetscLogSpinLock));
114: *tot += tmp;
115: PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock));
116: return PETSC_SUCCESS;
117: }
119: PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp)
120: {
121: *cnt_th = *cnt_th + 1;
122: *tot_th += tmp;
123: PetscCall(PetscSpinlockLock(&PetscLogSpinLock));
124: *tot += (PetscLogDouble)(tmp);
125: *cnt += *cnt + 1;
126: PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock));
127: return PETSC_SUCCESS;
128: }
130: PetscInt PetscLogGetTid(void)
131: {
132: if (petsc_log_tid < 0) {
133: PetscCall(PetscSpinlockLock(&PetscLogSpinLock));
134: petsc_log_tid = ++petsc_log_gid;
135: PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock));
136: }
137: return petsc_log_tid;
138: }
140: #endif
142: /* Logging functions */
143: PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL;
144: PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL;
145: PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
146: PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
148: /* Tracing event logging variables */
149: FILE *petsc_tracefile = NULL;
150: int petsc_tracelevel = 0;
151: const char *petsc_traceblanks = " ";
152: char petsc_tracespace[128] = " ";
153: PetscLogDouble petsc_tracetime = 0.0;
154: static PetscBool PetscLogInitializeCalled = PETSC_FALSE;
156: static PetscIntStack current_log_event_stack = NULL;
158: PETSC_INTERN PetscErrorCode PetscLogInitialize(void)
159: {
160: int stage;
161: PetscBool opt;
163: PetscFunctionBegin;
164: if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS);
165: PetscLogInitializeCalled = PETSC_TRUE;
167: PetscCall(PetscIntStackCreate(¤t_log_event_stack));
168: PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt));
169: if (opt) petsc_logActions = PETSC_FALSE;
170: PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt));
171: if (opt) petsc_logObjects = PETSC_FALSE;
172: if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions));
173: if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects));
174: PetscLogPHC = PetscLogObjCreateDefault;
175: PetscLogPHD = PetscLogObjDestroyDefault;
176: /* Setup default logging structures */
177: PetscCall(PetscStageLogCreate(&petsc_stageLog));
178: PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage));
180: PetscCall(PetscSpinlockCreate(&PetscLogSpinLock));
181: #if defined(PETSC_HAVE_THREADSAFETY)
182: petsc_log_tid = 0;
183: petsc_log_gid = 0;
184: PetscCall(PetscHMapEventCreate(&eventInfoMap_th));
185: #endif
187: /* All processors sync here for more consistent logging */
188: PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
189: PetscCall(PetscTime(&petsc_BaseTime));
190: PetscCall(PetscLogStagePush(stage));
191: #if defined(PETSC_HAVE_TAU_PERFSTUBS)
192: PetscStackCallExternalVoid("ps_initialize_", ps_initialize_());
193: #endif
194: PetscFunctionReturn(PETSC_SUCCESS);
195: }
197: PETSC_INTERN PetscErrorCode PetscLogFinalize(void)
198: {
199: PetscStageLog stageLog;
201: PetscFunctionBegin;
202: #if defined(PETSC_HAVE_THREADSAFETY)
203: if (eventInfoMap_th) {
204: PetscEventPerfInfo **array;
205: PetscInt n, off = 0;
207: PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n));
208: PetscCall(PetscMalloc1(n, &array));
209: PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array));
210: for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i]));
211: PetscCall(PetscFree(array));
212: PetscCall(PetscHMapEventDestroy(&eventInfoMap_th));
213: }
214: #endif
215: PetscCall(PetscFree(petsc_actions));
216: PetscCall(PetscFree(petsc_objects));
217: PetscCall(PetscLogNestedEnd());
218: PetscCall(PetscLogSet(NULL, NULL));
220: /* Resetting phase */
221: PetscCall(PetscLogGetStageLog(&stageLog));
222: PetscCall(PetscStageLogDestroy(stageLog));
223: PetscCall(PetscIntStackDestroy(current_log_event_stack));
224: current_log_event_stack = NULL;
226: petsc_TotalFlops = 0.0;
227: petsc_numActions = 0;
228: petsc_numObjects = 0;
229: petsc_numObjectsDestroyed = 0;
230: petsc_maxActions = 100;
231: petsc_maxObjects = 100;
232: petsc_actions = NULL;
233: petsc_objects = NULL;
234: petsc_logActions = PETSC_FALSE;
235: petsc_logObjects = PETSC_FALSE;
236: petsc_BaseTime = 0.0;
237: petsc_TotalFlops = 0.0;
238: petsc_send_ct = 0.0;
239: petsc_recv_ct = 0.0;
240: petsc_send_len = 0.0;
241: petsc_recv_len = 0.0;
242: petsc_isend_ct = 0.0;
243: petsc_irecv_ct = 0.0;
244: petsc_isend_len = 0.0;
245: petsc_irecv_len = 0.0;
246: petsc_wait_ct = 0.0;
247: petsc_wait_any_ct = 0.0;
248: petsc_wait_all_ct = 0.0;
249: petsc_sum_of_waits_ct = 0.0;
250: petsc_allreduce_ct = 0.0;
251: petsc_gather_ct = 0.0;
252: petsc_scatter_ct = 0.0;
253: petsc_TotalFlops_th = 0.0;
254: petsc_send_ct_th = 0.0;
255: petsc_recv_ct_th = 0.0;
256: petsc_send_len_th = 0.0;
257: petsc_recv_len_th = 0.0;
258: petsc_isend_ct_th = 0.0;
259: petsc_irecv_ct_th = 0.0;
260: petsc_isend_len_th = 0.0;
261: petsc_irecv_len_th = 0.0;
262: petsc_wait_ct_th = 0.0;
263: petsc_wait_any_ct_th = 0.0;
264: petsc_wait_all_ct_th = 0.0;
265: petsc_sum_of_waits_ct_th = 0.0;
266: petsc_allreduce_ct_th = 0.0;
267: petsc_gather_ct_th = 0.0;
268: petsc_scatter_ct_th = 0.0;
270: #if defined(PETSC_HAVE_DEVICE)
271: petsc_ctog_ct = 0.0;
272: petsc_gtoc_ct = 0.0;
273: petsc_ctog_sz = 0.0;
274: petsc_gtoc_sz = 0.0;
275: petsc_gflops = 0.0;
276: petsc_gtime = 0.0;
277: petsc_ctog_ct_th = 0.0;
278: petsc_gtoc_ct_th = 0.0;
279: petsc_ctog_sz_th = 0.0;
280: petsc_gtoc_sz_th = 0.0;
281: petsc_gflops_th = 0.0;
282: petsc_gtime_th = 0.0;
283: #endif
285: PETSC_LARGEST_EVENT = PETSC_EVENT;
286: PetscLogPHC = NULL;
287: PetscLogPHD = NULL;
288: petsc_tracefile = NULL;
289: petsc_tracelevel = 0;
290: petsc_traceblanks = " ";
291: petsc_tracespace[0] = ' ';
292: petsc_tracespace[1] = 0;
293: petsc_tracetime = 0.0;
294: PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
295: PETSC_OBJECT_CLASSID = 0;
296: petsc_stageLog = NULL;
297: PetscLogInitializeCalled = PETSC_FALSE;
298: PetscFunctionReturn(PETSC_SUCCESS);
299: }
301: /*@C
302: PetscLogSet - Sets the logging functions called at the beginning and ending of every event.
304: Not Collective
306: Input Parameters:
307: + b - The function called at beginning of event
308: - e - The function called at end of event
310: Level: developer
312: Developer Note:
313: The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`.
315: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()`
316: @*/
317: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
318: {
319: PetscFunctionBegin;
320: PetscLogPLB = b;
321: PetscLogPLE = e;
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
325: /*@C
326: PetscLogIsActive - Check if logging is currently in progress.
328: Not Collective
330: Output Parameter:
331: . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise
333: Level: beginner
335: .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()`
336: @*/
337: PetscErrorCode PetscLogIsActive(PetscBool *isActive)
338: {
339: PetscFunctionBegin;
340: *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE;
341: PetscFunctionReturn(PETSC_SUCCESS);
342: }
344: /*@C
345: PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop
346: rates and object creation and should not slow programs down too much.
347: This routine may be called more than once.
349: Logically Collective over `PETSC_COMM_WORLD`
351: Options Database Key:
352: . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the
353: screen (for code configured with --with-log=1 (which is the default))
355: Usage:
356: .vb
357: PetscInitialize(...);
358: PetscLogDefaultBegin();
359: ... code ...
360: PetscLogView(viewer); or PetscLogDump();
361: PetscFinalize();
362: .ve
364: Level: advanced
366: Note:
367: `PetscLogView()` or `PetscLogDump()` actually cause the printing of
368: the logging information.
370: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()`
371: @*/
372: PetscErrorCode PetscLogDefaultBegin(void)
373: {
374: PetscFunctionBegin;
375: PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault));
376: PetscFunctionReturn(PETSC_SUCCESS);
377: }
379: /*@C
380: PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
381: all events. This creates large log files and slows the program down.
383: Logically Collective on `PETSC_COMM_WORLD`
385: Options Database Key:
386: . -log_all - Prints extensive log information
388: Usage:
389: .vb
390: PetscInitialize(...);
391: PetscLogAllBegin();
392: ... code ...
393: PetscLogDump(filename);
394: PetscFinalize();
395: .ve
397: Level: advanced
399: Note:
400: A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
401: intended for production runs since it logs only flop rates and object
402: creation (and shouldn't significantly slow the programs).
404: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()`
405: @*/
406: PetscErrorCode PetscLogAllBegin(void)
407: {
408: PetscFunctionBegin;
409: PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete));
410: PetscFunctionReturn(PETSC_SUCCESS);
411: }
413: /*@C
414: PetscLogTraceBegin - Activates trace logging. Every time a PETSc event
415: begins or ends, the event name is printed.
417: Logically Collective on `PETSC_COMM_WORLD`
419: Input Parameter:
420: . file - The file to print trace in (e.g. stdout)
422: Options Database Key:
423: . -log_trace [filename] - Activates `PetscLogTraceBegin()`
425: Level: intermediate
427: Notes:
428: `PetscLogTraceBegin()` prints the processor number, the execution time (sec),
429: then "Event begin:" or "Event end:" followed by the event name.
431: `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful
432: to determine where a program is hanging without running in the
433: debugger. Can be used in conjunction with the -info option.
435: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()`
436: @*/
437: PetscErrorCode PetscLogTraceBegin(FILE *file)
438: {
439: PetscFunctionBegin;
440: petsc_tracefile = file;
442: PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: /*@
447: PetscLogActions - Determines whether actions are logged for the graphical viewer.
449: Not Collective
451: Input Parameter:
452: . flag - `PETSC_TRUE` if actions are to be logged
454: Options Database Key:
455: . -log_exclude_actions - Turns off actions logging
457: Level: intermediate
459: Note:
460: Logging of actions continues to consume more memory as the program
461: runs. Long running programs should consider turning this feature off.
462: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()`
463: @*/
464: PetscErrorCode PetscLogActions(PetscBool flag)
465: {
466: PetscFunctionBegin;
467: petsc_logActions = flag;
468: PetscFunctionReturn(PETSC_SUCCESS);
469: }
471: /*@
472: PetscLogObjects - Determines whether objects are logged for the graphical viewer.
474: Not Collective
476: Input Parameter:
477: . flag - `PETSC_TRUE` if objects are to be logged
479: Options Database Key:
480: . -log_exclude_objects - Turns off objects logging
482: Level: intermediate
484: Note:
485: Logging of objects continues to consume more memory as the program
486: runs. Long running programs should consider turning this feature off.
488: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()`
489: @*/
490: PetscErrorCode PetscLogObjects(PetscBool flag)
491: {
492: PetscFunctionBegin;
493: petsc_logObjects = flag;
494: PetscFunctionReturn(PETSC_SUCCESS);
495: }
497: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
498: /*@C
499: PetscLogStageRegister - Attaches a character string name to a logging stage.
501: Not Collective
503: Input Parameter:
504: . sname - The name to associate with that stage
506: Output Parameter:
507: . stage - The stage number
509: Level: intermediate
511: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()`
512: @*/
513: PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage)
514: {
515: PetscStageLog stageLog;
516: PetscLogEvent event;
518: PetscFunctionBegin;
519: PetscCall(PetscLogGetStageLog(&stageLog));
520: PetscCall(PetscStageLogRegister(stageLog, sname, stage));
521: /* Copy events already changed in the main stage, this sucks */
522: PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents));
523: for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event]));
524: PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses));
525: #if defined(PETSC_HAVE_TAU_PERFSTUBS)
526: if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname));
527: #endif
528: PetscFunctionReturn(PETSC_SUCCESS);
529: }
531: /*@C
532: PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage
534: Not Collective
536: Input Parameter:
537: . stage - The stage on which to log
539: Usage:
540: If the option -log_view is used to run the program containing the
541: following code, then 2 sets of summary data will be printed during
542: PetscFinalize().
543: .vb
544: PetscInitialize(int *argc,char ***args,0,0);
545: [stage 0 of code]
546: PetscLogStagePush(1);
547: [stage 1 of code]
548: PetscLogStagePop();
549: PetscBarrier(...);
550: [more stage 0 of code]
551: PetscFinalize();
552: .ve
554: Level: intermediate
556: Note:
557: Use `PetscLogStageRegister()` to register a stage.
559: .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()`
560: @*/
561: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
562: {
563: PetscStageLog stageLog;
565: PetscFunctionBegin;
566: PetscCall(PetscLogGetStageLog(&stageLog));
567: PetscCall(PetscStageLogPush(stageLog, stage));
568: #if defined(PETSC_HAVE_TAU_PERFSTUBS)
569: if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer));
570: #endif
571: PetscFunctionReturn(PETSC_SUCCESS);
572: }
574: /*@C
575: PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()`
577: Not Collective
579: Usage:
580: If the option -log_view is used to run the program containing the
581: following code, then 2 sets of summary data will be printed during
582: PetscFinalize().
583: .vb
584: PetscInitialize(int *argc,char ***args,0,0);
585: [stage 0 of code]
586: PetscLogStagePush(1);
587: [stage 1 of code]
588: PetscLogStagePop();
589: PetscBarrier(...);
590: [more stage 0 of code]
591: PetscFinalize();
592: .ve
594: Level: intermediate
596: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()`
597: @*/
598: PetscErrorCode PetscLogStagePop(void)
599: {
600: PetscStageLog stageLog;
602: PetscFunctionBegin;
603: PetscCall(PetscLogGetStageLog(&stageLog));
604: #if defined(PETSC_HAVE_TAU_PERFSTUBS)
605: if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer));
606: #endif
607: PetscCall(PetscStageLogPop(stageLog));
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
611: /*@
612: PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.
614: Not Collective
616: Input Parameters:
617: + stage - The stage
618: - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
620: Level: intermediate
622: Note:
623: If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist
625: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
626: @*/
627: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
628: {
629: PetscStageLog stageLog;
631: PetscFunctionBegin;
632: PetscCall(PetscLogGetStageLog(&stageLog));
633: PetscCall(PetscStageLogSetActive(stageLog, stage, isActive));
634: PetscFunctionReturn(PETSC_SUCCESS);
635: }
637: /*@
638: PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.
640: Not Collective
642: Input Parameter:
643: . stage - The stage
645: Output Parameter:
646: . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
648: Level: intermediate
650: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
651: @*/
652: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive)
653: {
654: PetscStageLog stageLog;
656: PetscFunctionBegin;
657: PetscCall(PetscLogGetStageLog(&stageLog));
658: PetscCall(PetscStageLogGetActive(stageLog, stage, isActive));
659: PetscFunctionReturn(PETSC_SUCCESS);
660: }
662: /*@
663: PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()`
665: Not Collective
667: Input Parameters:
668: + stage - The stage
669: - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
671: Level: intermediate
673: Developer Note:
674: What does visible mean, needs to be documented.
676: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
677: @*/
678: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
679: {
680: PetscStageLog stageLog;
682: PetscFunctionBegin;
683: PetscCall(PetscLogGetStageLog(&stageLog));
684: PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible));
685: PetscFunctionReturn(PETSC_SUCCESS);
686: }
688: /*@
689: PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()`
691: Not Collective
693: Input Parameter:
694: . stage - The stage
696: Output Parameter:
697: . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
699: Level: intermediate
701: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
702: @*/
703: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible)
704: {
705: PetscStageLog stageLog;
707: PetscFunctionBegin;
708: PetscCall(PetscLogGetStageLog(&stageLog));
709: PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible));
710: PetscFunctionReturn(PETSC_SUCCESS);
711: }
713: /*@C
714: PetscLogStageGetId - Returns the stage id when given the stage name.
716: Not Collective
718: Input Parameter:
719: . name - The stage name
721: Output Parameter:
722: . stage - The stage, , or -1 if no stage with that name exists
724: Level: intermediate
726: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
727: @*/
728: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
729: {
730: PetscStageLog stageLog;
732: PetscFunctionBegin;
733: PetscCall(PetscLogGetStageLog(&stageLog));
734: PetscCall(PetscStageLogGetStage(stageLog, name, stage));
735: PetscFunctionReturn(PETSC_SUCCESS);
736: }
738: /*------------------------------------------------ Event Functions --------------------------------------------------*/
740: /*@C
741: PetscLogEventRegister - Registers an event name for logging operations
743: Not Collective
745: Input Parameters:
746: + name - The name associated with the event
747: - classid - The classid associated to the class for this event, obtain either with
748: `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones
749: are only available in C code
751: Output Parameter:
752: . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`.
754: Example of Usage:
755: .vb
756: PetscLogEvent USER_EVENT;
757: PetscClassId classid;
758: PetscLogDouble user_event_flops;
759: PetscClassIdRegister("class name",&classid);
760: PetscLogEventRegister("User event name",classid,&USER_EVENT);
761: PetscLogEventBegin(USER_EVENT,0,0,0,0);
762: [code segment to monitor]
763: PetscLogFlops(user_event_flops);
764: PetscLogEventEnd(USER_EVENT,0,0,0,0);
765: .ve
767: Level: intermediate
769: Notes:
770: PETSc automatically logs library events if the code has been
771: configured with --with-log (which is the default) and
772: -log_view or -log_all is specified. `PetscLogEventRegister()` is
773: intended for logging user events to supplement this PETSc
774: information.
776: PETSc can gather data for use with the utilities Jumpshot
777: (part of the MPICH distribution). If PETSc has been compiled
778: with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
779: MPICH), the user can employ another command line option, -log_mpe,
780: to create a logfile, "mpe.log", which can be visualized
781: Jumpshot.
783: The classid is associated with each event so that classes of events
784: can be disabled simultaneously, such as all matrix events. The user
785: can either use an existing classid, such as `MAT_CLASSID`, or create
786: their own as shown in the example.
788: If an existing event with the same name exists, its event handle is
789: returned instead of creating a new event.
791: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`,
792: `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()`
793: @*/
794: PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event)
795: {
796: PetscStageLog stageLog;
797: int stage;
799: PetscFunctionBegin;
800: *event = PETSC_DECIDE;
801: PetscCall(PetscLogGetStageLog(&stageLog));
802: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event));
803: if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS);
804: PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event));
805: for (stage = 0; stage < stageLog->numStages; stage++) {
806: PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents));
807: PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses));
808: }
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: /*@
813: PetscLogEventSetCollective - Indicates that a particular event is collective.
815: Not Collective
817: Input Parameters:
818: + event - The event id
819: - collective - Boolean flag indicating whether a particular event is collective
821: Level: developer
823: Notes:
824: New events returned from `PetscLogEventRegister()` are collective by default.
826: Collective events are handled specially if the -log_sync is used. In that case the logging saves information about
827: two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication
828: to be performed. This option is useful to debug imbalance within the computations or communications
830: .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()`
831: @*/
832: PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective)
833: {
834: PetscStageLog stageLog;
835: PetscEventRegLog eventRegLog;
837: PetscFunctionBegin;
838: PetscCall(PetscLogGetStageLog(&stageLog));
839: PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog));
840: PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id");
841: eventRegLog->eventInfo[event].collective = collective;
842: PetscFunctionReturn(PETSC_SUCCESS);
843: }
845: /*@
846: PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage.
848: Not Collective
850: Input Parameter:
851: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
853: Level: developer
855: .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
856: @*/
857: PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid)
858: {
859: PetscStageLog stageLog;
860: int stage;
862: PetscFunctionBegin;
863: PetscCall(PetscLogGetStageLog(&stageLog));
864: for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid));
865: PetscFunctionReturn(PETSC_SUCCESS);
866: }
868: /*@
869: PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage.
871: Not Collective
873: Input Parameter:
874: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
876: Level: developer
878: Note:
879: If a class is excluded then events associated with that class are not logged.
881: .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()`
882: @*/
883: PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid)
884: {
885: PetscStageLog stageLog;
886: int stage;
888: PetscFunctionBegin;
889: PetscCall(PetscLogGetStageLog(&stageLog));
890: for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid));
891: PetscFunctionReturn(PETSC_SUCCESS);
892: }
894: /*@
895: PetscLogEventActivate - Indicates that a particular event should be logged.
897: Not Collective
899: Input Parameter:
900: . event - The event id
902: Usage:
903: .vb
904: PetscLogEventDeactivate(VEC_SetValues);
905: [code where you do not want to log VecSetValues()]
906: PetscLogEventActivate(VEC_SetValues);
907: [code where you do want to log VecSetValues()]
908: .ve
910: Level: advanced
912: Note:
913: The event may be either a pre-defined PETSc event (found in include/petsclog.h)
914: or an event number obtained with `PetscLogEventRegister()`.
916: .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
917: @*/
918: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
919: {
920: PetscStageLog stageLog;
921: int stage;
923: PetscFunctionBegin;
924: PetscCall(PetscLogGetStageLog(&stageLog));
925: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
926: PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event));
927: PetscFunctionReturn(PETSC_SUCCESS);
928: }
930: /*@
931: PetscLogEventDeactivate - Indicates that a particular event should not be logged.
933: Not Collective
935: Input Parameter:
936: . event - The event id
938: Usage:
939: .vb
940: PetscLogEventDeactivate(VEC_SetValues);
941: [code where you do not want to log VecSetValues()]
942: PetscLogEventActivate(VEC_SetValues);
943: [code where you do want to log VecSetValues()]
944: .ve
946: Level: advanced
948: Note:
949: The event may be either a pre-defined PETSc event (found in
950: include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).
952: .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
953: @*/
954: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
955: {
956: PetscStageLog stageLog;
957: int stage;
959: PetscFunctionBegin;
960: PetscCall(PetscLogGetStageLog(&stageLog));
961: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
962: PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event));
963: PetscFunctionReturn(PETSC_SUCCESS);
964: }
966: /*@
967: PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called
969: Not Collective
971: Input Parameter:
972: . event - The event id
974: Usage:
975: .vb
976: PetscLogEventDeactivatePush(VEC_SetValues);
977: [code where you do not want to log VecSetValues()]
978: PetscLogEventDeactivatePop(VEC_SetValues);
979: [code where you do want to log VecSetValues()]
980: .ve
982: Level: advanced
984: Note:
985: The event may be either a pre-defined PETSc event (found in
986: include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).
988: .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()`
989: @*/
990: PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event)
991: {
992: PetscStageLog stageLog;
993: int stage;
995: PetscFunctionBegin;
996: PetscCall(PetscLogGetStageLog(&stageLog));
997: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
998: PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event));
999: PetscFunctionReturn(PETSC_SUCCESS);
1000: }
1002: /*@
1003: PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()`
1005: Not Collective
1007: Input Parameter:
1008: . event - The event id
1010: Usage:
1011: .vb
1012: PetscLogEventDeactivatePush(VEC_SetValues);
1013: [code where you do not want to log VecSetValues()]
1014: PetscLogEventDeactivatePop(VEC_SetValues);
1015: [code where you do want to log VecSetValues()]
1016: .ve
1018: Level: advanced
1020: Note:
1021: The event may be either a pre-defined PETSc event (found in
1022: include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).
1024: .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`
1025: @*/
1026: PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event)
1027: {
1028: PetscStageLog stageLog;
1029: int stage;
1031: PetscFunctionBegin;
1032: PetscCall(PetscLogGetStageLog(&stageLog));
1033: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
1034: PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event));
1035: PetscFunctionReturn(PETSC_SUCCESS);
1036: }
1038: /*@
1039: PetscLogEventSetActiveAll - Turns on logging of all events
1041: Not Collective
1043: Input Parameters:
1044: + event - The event id
1045: - isActive - The activity flag determining whether the event is logged
1047: Level: advanced
1049: .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()`
1050: @*/
1051: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
1052: {
1053: PetscStageLog stageLog;
1054: int stage;
1056: PetscFunctionBegin;
1057: PetscCall(PetscLogGetStageLog(&stageLog));
1058: for (stage = 0; stage < stageLog->numStages; stage++) {
1059: if (isActive) {
1060: PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event));
1061: } else {
1062: PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event));
1063: }
1064: }
1065: PetscFunctionReturn(PETSC_SUCCESS);
1066: }
1068: /*@
1069: PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage
1071: Not Collective
1073: Input Parameter:
1074: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
1076: Level: developer
1078: .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
1079: @*/
1080: PetscErrorCode PetscLogEventActivateClass(PetscClassId classid)
1081: {
1082: PetscStageLog stageLog;
1083: int stage;
1085: PetscFunctionBegin;
1086: PetscCall(PetscLogGetStageLog(&stageLog));
1087: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
1088: PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid));
1089: PetscFunctionReturn(PETSC_SUCCESS);
1090: }
1092: /*@
1093: PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage
1095: Not Collective
1097: Input Parameter:
1098: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
1100: Level: developer
1102: .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
1103: @*/
1104: PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid)
1105: {
1106: PetscStageLog stageLog;
1107: int stage;
1109: PetscFunctionBegin;
1110: PetscCall(PetscLogGetStageLog(&stageLog));
1111: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
1112: PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid));
1113: PetscFunctionReturn(PETSC_SUCCESS);
1114: }
1116: /*MC
1117: PetscLogEventSync - Synchronizes the beginning of a user event.
1119: Synopsis:
1120: #include <petsclog.h>
1121: PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm)
1123: Collective
1125: Input Parameters:
1126: + e - integer associated with the event obtained from PetscLogEventRegister()
1127: - comm - an MPI communicator
1129: Usage:
1130: .vb
1131: PetscLogEvent USER_EVENT;
1132: PetscLogEventRegister("User event",0,&USER_EVENT);
1133: PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD);
1134: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1135: [code segment to monitor]
1136: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1137: .ve
1139: Level: developer
1141: Note:
1142: This routine should be called only if there is not a
1143: `PetscObject` available to pass to `PetscLogEventBegin()`.
1145: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`
1146: M*/
1148: /*MC
1149: PetscLogEventBegin - Logs the beginning of a user event.
1151: Synopsis:
1152: #include <petsclog.h>
1153: PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)
1155: Not Collective
1157: Input Parameters:
1158: + e - integer associated with the event obtained from PetscLogEventRegister()
1159: - o1,o2,o3,o4 - objects associated with the event, or 0
1161: Fortran Synopsis:
1162: void PetscLogEventBegin(int e,PetscErrorCode ierr)
1164: Usage:
1165: .vb
1166: PetscLogEvent USER_EVENT;
1167: PetscLogDouble user_event_flops;
1168: PetscLogEventRegister("User event",0,&USER_EVENT);
1169: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1170: [code segment to monitor]
1171: PetscLogFlops(user_event_flops);
1172: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1173: .ve
1175: Level: intermediate
1177: Developer Note:
1178: `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the
1179: errors that occur in the macro directly because other packages that use this macros have used them in their
1180: own functions or methods that do not return error codes and it would be disruptive to change the current
1181: behavior.
1183: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()`
1184: M*/
1186: /*MC
1187: PetscLogEventEnd - Log the end of a user event.
1189: Synopsis:
1190: #include <petsclog.h>
1191: PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)
1193: Not Collective
1195: Input Parameters:
1196: + e - integer associated with the event obtained with PetscLogEventRegister()
1197: - o1,o2,o3,o4 - objects associated with the event, or 0
1199: Fortran Synopsis:
1200: void PetscLogEventEnd(int e,PetscErrorCode ierr)
1202: Usage:
1203: .vb
1204: PetscLogEvent USER_EVENT;
1205: PetscLogDouble user_event_flops;
1206: PetscLogEventRegister("User event",0,&USER_EVENT,);
1207: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1208: [code segment to monitor]
1209: PetscLogFlops(user_event_flops);
1210: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1211: .ve
1213: Level: intermediate
1215: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()`
1216: M*/
1218: /*@C
1219: PetscLogEventGetId - Returns the event id when given the event name.
1221: Not Collective
1223: Input Parameter:
1224: . name - The event name
1226: Output Parameter:
1227: . event - The event, or -1 if no event with that name exists
1229: Level: intermediate
1231: .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()`
1232: @*/
1233: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1234: {
1235: PetscStageLog stageLog;
1237: PetscFunctionBegin;
1238: PetscCall(PetscLogGetStageLog(&stageLog));
1239: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event));
1240: PetscFunctionReturn(PETSC_SUCCESS);
1241: }
1243: PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event)
1244: {
1245: PetscFunctionBegin;
1246: if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event));
1247: PetscFunctionReturn(PETSC_SUCCESS);
1248: }
1250: PetscErrorCode PetscLogPopCurrentEvent_Internal(void)
1251: {
1252: PetscFunctionBegin;
1253: if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL));
1254: PetscFunctionReturn(PETSC_SUCCESS);
1255: }
1257: PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event)
1258: {
1259: PetscBool empty;
1261: PetscFunctionBegin;
1263: *event = PETSC_DECIDE;
1264: PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty));
1265: if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event));
1266: PetscFunctionReturn(PETSC_SUCCESS);
1267: }
1269: PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event)
1270: {
1271: PetscFunctionBegin;
1272: if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL));
1273: PetscFunctionReturn(PETSC_SUCCESS);
1274: }
1276: PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event)
1277: {
1278: PetscStageLog stageLog;
1279: PetscEventPerfLog eventLog;
1280: int stage;
1282: PetscFunctionBegin;
1283: if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS);
1284: PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL));
1285: PetscCall(PetscLogGetStageLog(&stageLog));
1286: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
1287: PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog));
1288: eventLog->eventInfo[event].count--;
1289: PetscFunctionReturn(PETSC_SUCCESS);
1290: }
1292: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1293: /*@C
1294: PetscLogDump - Dumps logs of objects to a file. This file is intended to
1295: be read by bin/petscview. This program no longer exists.
1297: Collective on `PETSC_COMM_WORLD`
1299: Input Parameter:
1300: . name - an optional file name
1302: Usage:
1303: .vb
1304: PetscInitialize(...);
1305: PetscLogDefaultBegin(); or PetscLogAllBegin();
1306: ... code ...
1307: PetscLogDump(filename);
1308: PetscFinalize();
1309: .ve
1311: Level: advanced
1313: Note:
1314: The default file name is Log.<rank> where <rank> is the MPI process rank. If no name is specified,
1315: this file will be used.
1317: .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()`
1318: @*/
1319: PetscErrorCode PetscLogDump(const char sname[])
1320: {
1321: PetscStageLog stageLog;
1322: PetscEventPerfInfo *eventInfo;
1323: FILE *fd;
1324: char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1325: PetscLogDouble flops, _TotalTime;
1326: PetscMPIInt rank;
1327: int action, object, curStage;
1328: PetscLogEvent event;
1330: PetscFunctionBegin;
1331: /* Calculate the total elapsed time */
1332: PetscCall(PetscTime(&_TotalTime));
1333: _TotalTime -= petsc_BaseTime;
1334: /* Open log file */
1335: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
1336: PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank));
1337: PetscCall(PetscFixFilename(file, fname));
1338: PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd));
1339: PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
1340: /* Output totals */
1341: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime));
1342: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0));
1343: /* Output actions */
1344: if (petsc_logActions) {
1345: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions));
1346: for (action = 0; action < petsc_numActions; action++) {
1347: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1,
1348: petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem));
1349: }
1350: }
1351: /* Output objects */
1352: if (petsc_logObjects) {
1353: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed));
1354: for (object = 0; object < petsc_numObjects; object++) {
1355: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem));
1356: if (!petsc_objects[object].name[0]) {
1357: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n"));
1358: } else {
1359: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name));
1360: }
1361: if (petsc_objects[object].info[0] != 0) {
1362: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n"));
1363: } else {
1364: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info));
1365: }
1366: }
1367: }
1368: /* Output events */
1369: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n"));
1370: PetscCall(PetscLogGetStageLog(&stageLog));
1371: PetscCall(PetscIntStackTop(stageLog->stack, &curStage));
1372: eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1373: for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1374: if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time;
1375: else flops = 0.0;
1376: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops));
1377: }
1378: PetscCall(PetscFClose(PETSC_COMM_WORLD, fd));
1379: PetscFunctionReturn(PETSC_SUCCESS);
1380: }
1382: /*
1383: PetscLogView_Detailed - Each process prints the times for its own events
1385: */
1386: PetscErrorCode PetscLogView_Detailed(PetscViewer viewer)
1387: {
1388: PetscStageLog stageLog;
1389: PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL;
1390: PetscLogDouble locTotalTime, numRed, maxMem;
1391: int numStages, numEvents, stage, event;
1392: MPI_Comm comm = PetscObjectComm((PetscObject)viewer);
1393: PetscMPIInt rank, size;
1395: PetscFunctionBegin;
1396: PetscCallMPI(MPI_Comm_size(comm, &size));
1397: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1398: /* Must preserve reduction count before we go on */
1399: numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1400: /* Get the total elapsed time */
1401: PetscCall(PetscTime(&locTotalTime));
1402: locTotalTime -= petsc_BaseTime;
1403: PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size));
1404: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n"));
1405: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n"));
1406: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n"));
1407: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n"));
1408: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n"));
1409: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n"));
1410: PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n"));
1411: PetscCall(PetscLogGetStageLog(&stageLog));
1412: PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm));
1413: PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n"));
1414: for (stage = 0; stage < numStages; stage++) {
1415: PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name));
1416: PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name));
1417: PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm));
1418: for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name));
1419: }
1420: PetscCall(PetscMallocGetMaximumUsage(&maxMem));
1421: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1422: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime));
1423: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct)));
1424: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len)));
1425: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed));
1426: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops));
1427: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects));
1428: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem));
1429: PetscCall(PetscViewerFlush(viewer));
1430: for (stage = 0; stage < numStages; stage++) {
1431: stageInfo = &stageLog->stageInfo[stage].perfInfo;
1432: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time,
1433: stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops));
1434: PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm));
1435: for (event = 0; event < numEvents; event++) {
1436: eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1437: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g",
1438: stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions,
1439: eventInfo->flops));
1440: if (eventInfo->dof[0] >= 0.) {
1441: PetscInt d, e;
1443: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : ["));
1444: for (d = 0; d < 8; ++d) {
1445: if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1446: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]));
1447: }
1448: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1449: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : ["));
1450: for (e = 0; e < 8; ++e) {
1451: if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1452: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]));
1453: }
1454: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1455: }
1456: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n"));
1457: }
1458: }
1459: PetscCall(PetscViewerFlush(viewer));
1460: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1461: PetscFunctionReturn(PETSC_SUCCESS);
1462: }
1464: /*
1465: PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1466: */
1467: PetscErrorCode PetscLogView_CSV(PetscViewer viewer)
1468: {
1469: PetscStageLog stageLog;
1470: PetscEventPerfInfo *eventInfo = NULL;
1471: PetscLogDouble locTotalTime, maxMem;
1472: int numStages, numEvents, stage, event;
1473: MPI_Comm comm = PetscObjectComm((PetscObject)viewer);
1474: PetscMPIInt rank, size;
1476: PetscFunctionBegin;
1477: PetscCallMPI(MPI_Comm_size(comm, &size));
1478: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1479: /* Must preserve reduction count before we go on */
1480: /* Get the total elapsed time */
1481: PetscCall(PetscTime(&locTotalTime));
1482: locTotalTime -= petsc_BaseTime;
1483: PetscCall(PetscLogGetStageLog(&stageLog));
1484: PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm));
1485: PetscCall(PetscMallocGetMaximumUsage(&maxMem));
1486: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1487: PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size));
1488: PetscCall(PetscViewerFlush(viewer));
1489: for (stage = 0; stage < numStages; stage++) {
1490: PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo;
1492: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops));
1493: PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm));
1494: for (event = 0; event < numEvents; event++) {
1495: eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1496: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength,
1497: eventInfo->numReductions, eventInfo->flops));
1498: if (eventInfo->dof[0] >= 0.) {
1499: PetscInt d, e;
1501: for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]));
1502: for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]));
1503: }
1504: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1505: }
1506: }
1507: PetscCall(PetscViewerFlush(viewer));
1508: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1509: PetscFunctionReturn(PETSC_SUCCESS);
1510: }
1512: static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd)
1513: {
1514: PetscFunctionBegin;
1515: if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS);
1516: PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1517: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n"));
1518: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1519: PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n"));
1520: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1521: PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n"));
1522: PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n"));
1523: PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n"));
1524: PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n"));
1525: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1526: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n"));
1527: PetscFunctionReturn(PETSC_SUCCESS);
1528: }
1530: static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd)
1531: {
1532: PetscFunctionBegin;
1533: if (PetscDefined(USE_DEBUG)) {
1534: PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1535: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n"));
1536: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1537: PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n"));
1538: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1539: PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n"));
1540: PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n"));
1541: PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n"));
1542: PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n"));
1543: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1544: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n"));
1545: }
1546: PetscFunctionReturn(PETSC_SUCCESS);
1547: }
1549: static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd)
1550: {
1551: #if defined(PETSC_HAVE_DEVICE)
1552: PetscMPIInt size;
1553: PetscBool deviceInitialized = PETSC_FALSE;
1555: PetscFunctionBegin;
1556: PetscCallMPI(MPI_Comm_size(comm, &size));
1557: for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) {
1558: const PetscDeviceType dtype = PetscDeviceTypeCast(i);
1559: if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */
1560: deviceInitialized = PETSC_TRUE;
1561: break;
1562: }
1563: }
1564: /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */
1565: if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS);
1566: PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1567: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n"));
1568: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1569: PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n"));
1570: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1571: PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n"));
1572: PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n"));
1573: PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n"));
1574: PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n"));
1575: PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n"));
1576: PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n"));
1577: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1578: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n"));
1579: PetscFunctionReturn(PETSC_SUCCESS);
1580: #else
1581: return PETSC_SUCCESS;
1582: #endif
1583: }
1585: static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd)
1586: {
1587: #if defined(PETSC_HAVE_DEVICE)
1589: PetscFunctionBegin;
1590: if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS);
1591: PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1592: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n"));
1593: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1594: PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n"));
1595: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1596: PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n"));
1597: PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n"));
1598: PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n"));
1599: PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n"));
1600: PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n"));
1601: PetscCall(PetscFPrintf(comm, fd, " # #\n"));
1602: PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n"));
1603: PetscFunctionReturn(PETSC_SUCCESS);
1604: #else
1605: return PETSC_SUCCESS;
1606: #endif
1607: }
1609: PetscErrorCode PetscLogView_Default(PetscViewer viewer)
1610: {
1611: FILE *fd;
1612: PetscLogDouble zero = 0.0;
1613: PetscStageLog stageLog;
1614: PetscStageInfo *stageInfo = NULL;
1615: PetscEventPerfInfo *eventInfo = NULL;
1616: PetscClassPerfInfo *classInfo;
1617: char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1618: const char *name;
1619: PetscLogDouble locTotalTime, TotalTime, TotalFlops;
1620: PetscLogDouble numMessages, messageLength, avgMessLen, numReductions;
1621: PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red;
1622: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1623: PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1624: PetscLogDouble min, max, tot, ratio, avg, x, y;
1625: PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1626: #if defined(PETSC_HAVE_DEVICE)
1627: PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1628: PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1629: #endif
1630: PetscMPIInt minC, maxC;
1631: PetscMPIInt size, rank;
1632: PetscBool *localStageUsed, *stageUsed;
1633: PetscBool *localStageVisible, *stageVisible;
1634: int numStages, localNumEvents, numEvents;
1635: int stage, oclass;
1636: PetscLogEvent event;
1637: char version[256];
1638: MPI_Comm comm;
1639: #if defined(PETSC_HAVE_DEVICE)
1640: PetscLogEvent eventid;
1641: PetscInt64 nas = 0x7FF0000000000002;
1642: #endif
1644: PetscFunctionBegin;
1645: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1646: PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1647: PetscCall(PetscViewerASCIIGetPointer(viewer, &fd));
1648: PetscCallMPI(MPI_Comm_size(comm, &size));
1649: PetscCallMPI(MPI_Comm_rank(comm, &rank));
1650: /* Get the total elapsed time */
1651: PetscCall(PetscTime(&locTotalTime));
1652: locTotalTime -= petsc_BaseTime;
1654: PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n"));
1655: PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n"));
1656: PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n"));
1657: PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n"));
1658: PetscCall(PetscLogViewWarnSync(comm, fd));
1659: PetscCall(PetscLogViewWarnDebugging(comm, fd));
1660: PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd));
1661: PetscCall(PetscLogViewWarnGpuTime(comm, fd));
1662: PetscCall(PetscGetArchType(arch, sizeof(arch)));
1663: PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
1664: PetscCall(PetscGetUserName(username, sizeof(username)));
1665: PetscCall(PetscGetProgramName(pname, sizeof(pname)));
1666: PetscCall(PetscGetDate(date, sizeof(date)));
1667: PetscCall(PetscGetVersion(version, sizeof(version)));
1668: if (size == 1) {
1669: PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date));
1670: } else {
1671: PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date));
1672: }
1673: #if defined(PETSC_HAVE_OPENMP)
1674: PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads));
1675: #endif
1676: PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version));
1678: /* Must preserve reduction count before we go on */
1679: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1681: /* Calculate summary information */
1682: PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n"));
1683: /* Time */
1684: PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1685: PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1686: PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1687: avg = tot / ((PetscLogDouble)size);
1688: if (min != 0.0) ratio = max / min;
1689: else ratio = 0.0;
1690: PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg));
1691: TotalTime = tot;
1692: /* Objects */
1693: avg = (PetscLogDouble)petsc_numObjects;
1694: PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1695: PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1696: PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1697: avg = tot / ((PetscLogDouble)size);
1698: if (min != 0.0) ratio = max / min;
1699: else ratio = 0.0;
1700: PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg));
1701: /* Flops */
1702: PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1703: PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1704: PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1705: avg = tot / ((PetscLogDouble)size);
1706: if (min != 0.0) ratio = max / min;
1707: else ratio = 0.0;
1708: PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1709: TotalFlops = tot;
1710: /* Flops/sec -- Must talk to Barry here */
1711: if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1712: else flops = 0.0;
1713: PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1714: PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1715: PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1716: avg = tot / ((PetscLogDouble)size);
1717: if (min != 0.0) ratio = max / min;
1718: else ratio = 0.0;
1719: PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1720: /* Memory */
1721: PetscCall(PetscMallocGetMaximumUsage(&mem));
1722: if (mem > 0.0) {
1723: PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1724: PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1725: PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1726: avg = tot / ((PetscLogDouble)size);
1727: if (min != 0.0) ratio = max / min;
1728: else ratio = 0.0;
1729: PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1730: }
1731: /* Messages */
1732: mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1733: PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1734: PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1735: PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1736: avg = tot / ((PetscLogDouble)size);
1737: if (min != 0.0) ratio = max / min;
1738: else ratio = 0.0;
1739: PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1740: numMessages = tot;
1741: /* Message Lengths */
1742: mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1743: PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1744: PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1745: PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1746: if (numMessages != 0) avg = tot / numMessages;
1747: else avg = 0.0;
1748: if (min != 0.0) ratio = max / min;
1749: else ratio = 0.0;
1750: PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1751: messageLength = tot;
1752: /* Reductions */
1753: PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1754: PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1755: PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1756: if (min != 0.0) ratio = max / min;
1757: else ratio = 0.0;
1758: PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio));
1759: numReductions = red; /* wrong because uses count from process zero */
1760: PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n"));
1761: PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n"));
1762: PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n"));
1764: /* Get total number of stages --
1765: Currently, a single processor can register more stages than another, but stages must all be registered in order.
1766: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1767: This seems best accomplished by assoicating a communicator with each stage.
1768: */
1769: PetscCall(PetscLogGetStageLog(&stageLog));
1770: PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm));
1771: PetscCall(PetscMalloc1(numStages, &localStageUsed));
1772: PetscCall(PetscMalloc1(numStages, &stageUsed));
1773: PetscCall(PetscMalloc1(numStages, &localStageVisible));
1774: PetscCall(PetscMalloc1(numStages, &stageVisible));
1775: if (numStages > 0) {
1776: stageInfo = stageLog->stageInfo;
1777: for (stage = 0; stage < numStages; stage++) {
1778: if (stage < stageLog->numStages) {
1779: localStageUsed[stage] = stageInfo[stage].used;
1780: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1781: } else {
1782: localStageUsed[stage] = PETSC_FALSE;
1783: localStageVisible[stage] = PETSC_TRUE;
1784: }
1785: }
1786: PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm));
1787: PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm));
1788: for (stage = 0; stage < numStages; stage++) {
1789: if (stageUsed[stage]) {
1790: PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n"));
1791: PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n"));
1792: break;
1793: }
1794: }
1795: for (stage = 0; stage < numStages; stage++) {
1796: if (!stageUsed[stage]) continue;
1797: /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1798: if (localStageUsed[stage]) {
1799: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1800: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1801: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1802: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1803: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1804: name = stageInfo[stage].name;
1805: } else {
1806: PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1807: PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1808: PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1809: PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1810: PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1811: name = "";
1812: }
1813: mess *= 0.5;
1814: messLen *= 0.5;
1815: red /= size;
1816: if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1817: else fracTime = 0.0;
1818: if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1819: else fracFlops = 0.0;
1820: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
1821: if (numMessages != 0.0) fracMessages = mess / numMessages;
1822: else fracMessages = 0.0;
1823: if (mess != 0.0) avgMessLen = messLen / mess;
1824: else avgMessLen = 0.0;
1825: if (messageLength != 0.0) fracLength = messLen / messageLength;
1826: else fracLength = 0.0;
1827: if (numReductions != 0.0) fracReductions = red / numReductions;
1828: else fracReductions = 0.0;
1829: PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions));
1830: }
1831: }
1833: PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n"));
1834: PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n"));
1835: PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n"));
1836: PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n"));
1837: PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n"));
1838: PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n"));
1839: PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n"));
1840: PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n"));
1841: PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n"));
1842: PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n"));
1843: PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n"));
1844: PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n"));
1845: PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n"));
1846: PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n"));
1847: PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n"));
1848: if (PetscLogMemory) {
1849: PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n"));
1850: PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n"));
1851: PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n"));
1852: PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n"));
1853: PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n"));
1854: }
1855: #if defined(PETSC_HAVE_DEVICE)
1856: PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n"));
1857: PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n"));
1858: PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n"));
1859: PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n"));
1860: PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n"));
1861: PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n"));
1862: #endif
1863: PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n"));
1865: PetscCall(PetscLogViewWarnDebugging(comm, fd));
1867: /* Report events */
1868: PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total"));
1869: if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI"));
1870: #if defined(PETSC_HAVE_DEVICE)
1871: PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU"));
1872: #endif
1873: PetscCall(PetscFPrintf(comm, fd, "\n"));
1874: PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s"));
1875: if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes"));
1876: #if defined(PETSC_HAVE_DEVICE)
1877: PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F"));
1878: #endif
1879: PetscCall(PetscFPrintf(comm, fd, "\n"));
1880: PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------"));
1881: if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------"));
1882: #if defined(PETSC_HAVE_DEVICE)
1883: PetscCall(PetscFPrintf(comm, fd, "---------------------------------------"));
1884: #endif
1885: PetscCall(PetscFPrintf(comm, fd, "\n"));
1887: #if defined(PETSC_HAVE_DEVICE)
1888: /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1889: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve));
1890: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step));
1891: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve));
1892: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve));
1893: #endif
1895: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1896: for (stage = 0; stage < numStages; stage++) {
1897: if (!stageVisible[stage]) continue;
1898: /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1899: if (localStageUsed[stage]) {
1900: PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name));
1901: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1902: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1903: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1904: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1905: PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1906: } else {
1907: PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage));
1908: PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1909: PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1910: PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1911: PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1912: PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1913: }
1914: mess *= 0.5;
1915: messLen *= 0.5;
1916: red /= size;
1918: /* Get total number of events in this stage --
1919: Currently, a single processor can register more events than another, but events must all be registered in order,
1920: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1921: on the event ID. This seems best accomplished by associating a communicator with each stage.
1923: Problem: If the event did not happen on proc 1, its name will not be available.
1924: Problem: Event visibility is not implemented
1925: */
1926: if (localStageUsed[stage]) {
1927: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1928: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1929: } else localNumEvents = 0;
1930: PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm));
1931: for (event = 0; event < numEvents; event++) {
1932: /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1933: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1934: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops;
1935: else flopr = 0.0;
1936: PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1937: PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1938: PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1939: PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1940: PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1941: PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1942: PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1943: PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1944: PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1945: PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm));
1946: PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm));
1947: if (PetscLogMemory) {
1948: PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1949: PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1950: PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1951: PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1952: }
1953: #if defined(PETSC_HAVE_DEVICE)
1954: PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1955: PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1956: PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1957: PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1958: PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1959: PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1960: #endif
1961: name = stageLog->eventLog->eventInfo[event].name;
1962: } else {
1963: int ierr = 0;
1965: flopr = 0.0;
1966: PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1967: PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1968: PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1969: PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1970: PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1971: PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1972: PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1973: PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1974: PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1975: PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm));
1976: PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm));
1977: if (PetscLogMemory) {
1978: PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1979: PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1980: PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1981: PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1982: }
1983: #if defined(PETSC_HAVE_DEVICE)
1984: PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1985: PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1986: PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1987: PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1988: PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1989: PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1990: #endif
1991: name = "";
1992: }
1993: if (mint < 0.0) {
1994: PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name));
1995: mint = 0;
1996: }
1997: PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name);
1998: /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1999: #if defined(PETSC_HAVE_DEVICE)
2000: if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
2001: memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
2002: PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid));
2003: if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) {
2004: memcpy(&mint, &nas, sizeof(PetscLogDouble));
2005: memcpy(&maxt, &nas, sizeof(PetscLogDouble));
2006: }
2007: }
2008: #endif
2009: totm *= 0.5;
2010: totml *= 0.5;
2011: totr /= size;
2013: if (maxC != 0) {
2014: if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
2015: else ratC = 0.0;
2016: if (mint != 0.0) ratt = maxt / mint;
2017: else ratt = 0.0;
2018: if (minf != 0.0) ratf = maxf / minf;
2019: else ratf = 0.0;
2020: if (TotalTime != 0.0) fracTime = tott / TotalTime;
2021: else fracTime = 0.0;
2022: if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
2023: else fracFlops = 0.0;
2024: if (stageTime != 0.0) fracStageTime = tott / stageTime;
2025: else fracStageTime = 0.0;
2026: if (flops != 0.0) fracStageFlops = totf / flops;
2027: else fracStageFlops = 0.0;
2028: if (numMessages != 0.0) fracMess = totm / numMessages;
2029: else fracMess = 0.0;
2030: if (messageLength != 0.0) fracMessLen = totml / messageLength;
2031: else fracMessLen = 0.0;
2032: if (numReductions != 0.0) fracRed = totr / numReductions;
2033: else fracRed = 0.0;
2034: if (mess != 0.0) fracStageMess = totm / mess;
2035: else fracStageMess = 0.0;
2036: if (messLen != 0.0) fracStageMessLen = totml / messLen;
2037: else fracStageMessLen = 0.0;
2038: if (red != 0.0) fracStageRed = totr / red;
2039: else fracStageRed = 0.0;
2040: if (totm != 0.0) totml /= totm;
2041: else totml = 0.0;
2042: if (maxt != 0.0) flopr = totf / maxt;
2043: else flopr = 0.0;
2044: if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0)
2045: PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6));
2046: else
2047: PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6));
2048: if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6));
2049: #if defined(PETSC_HAVE_DEVICE)
2050: if (totf != 0.0) fracgflops = gflops / totf;
2051: else fracgflops = 0.0;
2052: if (gmaxt != 0.0) gflopr = gflops / gmaxt;
2053: else gflopr = 0.0;
2054: PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops));
2055: #endif
2056: PetscCall(PetscFPrintf(comm, fd, "\n"));
2057: }
2058: }
2059: }
2061: /* Memory usage and object creation */
2062: PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------"));
2063: if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------"));
2064: #if defined(PETSC_HAVE_DEVICE)
2065: PetscCall(PetscFPrintf(comm, fd, "---------------------------------------"));
2066: #endif
2067: PetscCall(PetscFPrintf(comm, fd, "\n"));
2068: PetscCall(PetscFPrintf(comm, fd, "\n"));
2070: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
2071: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
2072: stats for stages local to processor sets.
2073: */
2074: /* We should figure out the longest object name here (now 20 characters) */
2075: PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n"));
2076: for (stage = 0; stage < numStages; stage++) {
2077: if (localStageUsed[stage]) {
2078: classInfo = stageLog->stageInfo[stage].classLog->classInfo;
2079: PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name));
2080: for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
2081: if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
2082: PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions));
2083: }
2084: }
2085: } else {
2086: if (!localStageVisible[stage]) continue;
2087: PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage));
2088: }
2089: }
2091: PetscCall(PetscFree(localStageUsed));
2092: PetscCall(PetscFree(stageUsed));
2093: PetscCall(PetscFree(localStageVisible));
2094: PetscCall(PetscFree(stageVisible));
2096: /* Information unrelated to this particular run */
2097: PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n"));
2098: PetscCall(PetscTime(&y));
2099: PetscCall(PetscTime(&x));
2100: PetscCall(PetscTime(&y));
2101: PetscCall(PetscTime(&y));
2102: PetscCall(PetscTime(&y));
2103: PetscCall(PetscTime(&y));
2104: PetscCall(PetscTime(&y));
2105: PetscCall(PetscTime(&y));
2106: PetscCall(PetscTime(&y));
2107: PetscCall(PetscTime(&y));
2108: PetscCall(PetscTime(&y));
2109: PetscCall(PetscTime(&y));
2110: PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0));
2111: /* MPI information */
2112: if (size > 1) {
2113: MPI_Status status;
2114: PetscMPIInt tag;
2115: MPI_Comm newcomm;
2117: PetscCallMPI(MPI_Barrier(comm));
2118: PetscCall(PetscTime(&x));
2119: PetscCallMPI(MPI_Barrier(comm));
2120: PetscCallMPI(MPI_Barrier(comm));
2121: PetscCallMPI(MPI_Barrier(comm));
2122: PetscCallMPI(MPI_Barrier(comm));
2123: PetscCallMPI(MPI_Barrier(comm));
2124: PetscCall(PetscTime(&y));
2125: PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0));
2126: PetscCall(PetscCommDuplicate(comm, &newcomm, &tag));
2127: PetscCallMPI(MPI_Barrier(comm));
2128: if (rank) {
2129: PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status));
2130: PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm));
2131: } else {
2132: PetscCall(PetscTime(&x));
2133: PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm));
2134: PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status));
2135: PetscCall(PetscTime(&y));
2136: PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size));
2137: }
2138: PetscCall(PetscCommDestroy(&newcomm));
2139: }
2140: PetscCall(PetscOptionsView(NULL, viewer));
2142: /* Machine and compile information */
2143: #if defined(PETSC_USE_FORTRAN_KERNELS)
2144: PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n"));
2145: #else
2146: PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n"));
2147: #endif
2148: #if defined(PETSC_USE_64BIT_INDICES)
2149: PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n"));
2150: #elif defined(PETSC_USE___FLOAT128)
2151: PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n"));
2152: #endif
2153: #if defined(PETSC_USE_REAL_SINGLE)
2154: PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n"));
2155: #elif defined(PETSC_USE___FLOAT128)
2156: PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n"));
2157: #endif
2158: #if defined(PETSC_USE_REAL_MAT_SINGLE)
2159: PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n"));
2160: #else
2161: PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n"));
2162: #endif
2163: PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt)));
2165: PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions));
2166: PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo));
2167: PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo));
2168: PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo));
2169: PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo));
2171: /* Cleanup */
2172: PetscCall(PetscFPrintf(comm, fd, "\n"));
2173: PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd));
2174: PetscCall(PetscLogViewWarnDebugging(comm, fd));
2175: PetscCall(PetscFPTrapPop());
2176: PetscFunctionReturn(PETSC_SUCCESS);
2177: }
2179: /*@C
2180: PetscLogView - Prints a summary of the logging.
2182: Collective over MPI_Comm
2184: Input Parameter:
2185: . viewer - an ASCII viewer
2187: Options Database Keys:
2188: + -log_view [:filename] - Prints summary of log information
2189: . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file
2190: . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it)
2191: . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it)
2192: . -log_view_memory - Also display memory usage in each event
2193: . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation)
2194: . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation
2195: - -log_trace [filename] - Displays a trace of what each process is doing
2197: Level: beginner
2199: Notes:
2200: It is possible to control the logging programmatically but we recommend using the options database approach whenever possible
2201: By default the summary is printed to stdout.
2203: Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin()
2205: If PETSc is configured with --with-logging=0 then this functionality is not available
2207: To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current
2208: directory then open filename.xml with your browser. Specific notes for certain browsers
2209: $ Firefox and Internet explorer - simply open the file
2210: $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files
2211: $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access
2212: or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with
2213: your browser.
2214: Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser
2215: window and render the XML log file contents.
2217: The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS
2219: The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph)
2220: or using speedscope (https://www.speedscope.app).
2221: Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py.
2223: .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()`
2224: @*/
2225: PetscErrorCode PetscLogView(PetscViewer viewer)
2226: {
2227: PetscBool isascii;
2228: PetscViewerFormat format;
2229: int stage, lastStage;
2230: PetscStageLog stageLog;
2232: PetscFunctionBegin;
2233: PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine");
2234: /* Pop off any stages the user forgot to remove */
2235: lastStage = 0;
2236: PetscCall(PetscLogGetStageLog(&stageLog));
2237: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
2238: while (stage >= 0) {
2239: lastStage = stage;
2240: PetscCall(PetscStageLogPop(stageLog));
2241: PetscCall(PetscStageLogGetCurrent(stageLog, &stage));
2242: }
2243: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2244: PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII");
2245: PetscCall(PetscViewerGetFormat(viewer, &format));
2246: if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
2247: PetscCall(PetscLogView_Default(viewer));
2248: } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2249: PetscCall(PetscLogView_Detailed(viewer));
2250: } else if (format == PETSC_VIEWER_ASCII_CSV) {
2251: PetscCall(PetscLogView_CSV(viewer));
2252: } else if (format == PETSC_VIEWER_ASCII_XML) {
2253: PetscCall(PetscLogView_Nested(viewer));
2254: } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) {
2255: PetscCall(PetscLogView_Flamegraph(viewer));
2256: }
2257: PetscCall(PetscStageLogPush(stageLog, lastStage));
2258: PetscFunctionReturn(PETSC_SUCCESS);
2259: }
2261: /*@C
2262: PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed.
2264: Collective on `PETSC_COMM_WORLD`
2266: Level: developer
2268: .seealso: [](ch_profiling), `PetscLogView()`
2269: @*/
2270: PetscErrorCode PetscLogViewFromOptions(void)
2271: {
2272: PetscViewer viewer;
2273: PetscBool flg;
2274: PetscViewerFormat format;
2276: PetscFunctionBegin;
2277: PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg));
2278: if (flg) {
2279: PetscCall(PetscViewerPushFormat(viewer, format));
2280: PetscCall(PetscLogView(viewer));
2281: PetscCall(PetscViewerPopFormat(viewer));
2282: PetscCall(PetscViewerDestroy(&viewer));
2283: }
2284: PetscFunctionReturn(PETSC_SUCCESS);
2285: }
2287: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
2288: /*@C
2289: PetscGetFlops - Returns the number of flops used on this processor
2290: since the program began.
2292: Not Collective
2294: Output Parameter:
2295: flops - number of floating point operations
2297: Level: intermediate
2299: Notes:
2300: A global counter logs all PETSc flop counts. The user can use
2301: `PetscLogFlops()` to increment this counter to include flops for the
2302: application code.
2304: A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank
2306: .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()`
2307: @*/
2308: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
2309: {
2310: PetscFunctionBegin;
2311: *flops = petsc_TotalFlops;
2312: PetscFunctionReturn(PETSC_SUCCESS);
2313: }
2315: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2316: {
2317: size_t fullLength;
2318: va_list Argp;
2320: PetscFunctionBegin;
2321: if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS);
2322: va_start(Argp, format);
2323: PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp));
2324: va_end(Argp);
2325: PetscFunctionReturn(PETSC_SUCCESS);
2326: }
2328: /*MC
2329: PetscLogFlops - Adds floating point operations to the global counter.
2331: Synopsis:
2332: #include <petsclog.h>
2333: PetscErrorCode PetscLogFlops(PetscLogDouble f)
2335: Not Collective
2337: Input Parameter:
2338: . f - flop counter
2340: Usage:
2341: .vb
2342: PetscLogEvent USER_EVENT;
2343: PetscLogEventRegister("User event",0,&USER_EVENT);
2344: PetscLogEventBegin(USER_EVENT,0,0,0,0);
2345: [code segment to monitor]
2346: PetscLogFlops(user_flops)
2347: PetscLogEventEnd(USER_EVENT,0,0,0,0);
2348: .ve
2350: Level: intermediate
2352: Note:
2353: A global counter logs all PETSc flop counts. The user can use
2354: PetscLogFlops() to increment this counter to include flops for the
2355: application code.
2357: .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()`
2358: M*/
2360: /*MC
2361: PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
2362: to get accurate timings
2364: Synopsis:
2365: #include <petsclog.h>
2366: void PetscPreLoadBegin(PetscBool flag,char *name);
2368: Not Collective
2370: Input Parameters:
2371: + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden
2372: with command line option -preload true or -preload false
2373: - name - name of first stage (lines of code timed separately with `-log_view`) to
2374: be preloaded
2376: Usage:
2377: .vb
2378: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2379: lines of code
2380: PetscPreLoadStage("second stage");
2381: lines of code
2382: PetscPreLoadEnd();
2383: .ve
2385: Level: intermediate
2387: Note:
2388: Only works in C/C++, not Fortran
2390: Flags available within the macro.
2391: + PetscPreLoadingUsed - true if we are or have done preloading
2392: . PetscPreLoadingOn - true if it is CURRENTLY doing preload
2393: . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2394: - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
2395: The first two variables are available throughout the program, the second two only between the `PetscPreLoadBegin()`
2396: and `PetscPreLoadEnd()`
2398: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
2399: M*/
2401: /*MC
2402: PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2403: to get accurate timings
2405: Synopsis:
2406: #include <petsclog.h>
2407: void PetscPreLoadEnd(void);
2409: Not Collective
2411: Usage:
2412: .vb
2413: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2414: lines of code
2415: PetscPreLoadStage("second stage");
2416: lines of code
2417: PetscPreLoadEnd();
2418: .ve
2420: Level: intermediate
2422: Note:
2423: Only works in C/C++ not fortran
2425: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()`
2426: M*/
2428: /*MC
2429: PetscPreLoadStage - Start a new segment of code to be timed separately.
2430: to get accurate timings
2432: Synopsis:
2433: #include <petsclog.h>
2434: void PetscPreLoadStage(char *name);
2436: Not Collective
2438: Usage:
2439: .vb
2440: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2441: lines of code
2442: PetscPreLoadStage("second stage");
2443: lines of code
2444: PetscPreLoadEnd();
2445: .ve
2447: Level: intermediate
2449: Note:
2450: Only works in C/C++ not fortran
2452: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`
2453: M*/
2455: #if PetscDefined(HAVE_DEVICE)
2456: #include <petsc/private/deviceimpl.h>
2458: PetscBool PetscLogGpuTimeFlag = PETSC_FALSE;
2460: /*
2461: This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code
2462: because it will result in timing results that cannot be interpreted.
2463: */
2464: static PetscErrorCode PetscLogGpuTime_Off(void)
2465: {
2466: PetscLogGpuTimeFlag = PETSC_FALSE;
2467: return PETSC_SUCCESS;
2468: }
2470: /*@C
2471: PetscLogGpuTime - turn on the logging of GPU time for GPU kernels
2473: Options Database Key:
2474: . -log_view_gpu_time - provide the GPU times in the -log_view output
2476: Level: advanced
2478: Notes:
2479: Turning on the timing of the
2480: GPU kernels can slow down the entire computation and should only be used when studying the performance
2481: of operations on GPU such as vector operations and matrix-vector operations.
2483: This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off.
2485: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()`
2486: @*/
2487: PetscErrorCode PetscLogGpuTime(void)
2488: {
2489: if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off));
2490: PetscLogGpuTimeFlag = PETSC_TRUE;
2491: return PETSC_SUCCESS;
2492: }
2494: /*@C
2495: PetscLogGpuTimeBegin - Start timer for device
2497: Level: intermediate
2499: Notes:
2500: When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times).
2502: When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times).
2504: There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()`
2506: This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space.
2508: The regular logging captures the time for data transfers and any CPU activities during the event
2510: It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel.
2512: Developer Notes:
2513: The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`.
2515: `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the
2516: event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution,
2517: but not continued GPU execution, until the timer event is recorded.
2519: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()`
2520: @*/
2521: PetscErrorCode PetscLogGpuTimeBegin(void)
2522: {
2523: PetscFunctionBegin;
2524: if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS);
2525: if (PetscDefined(HAVE_DEVICE)) {
2526: PetscDeviceContext dctx;
2528: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
2529: PetscCall(PetscDeviceContextBeginTimer_Internal(dctx));
2530: } else {
2531: PetscCall(PetscTimeSubtract(&petsc_gtime));
2532: }
2533: PetscFunctionReturn(PETSC_SUCCESS);
2534: }
2536: /*@C
2537: PetscLogGpuTimeEnd - Stop timer for device
2539: Level: intermediate
2541: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()`
2542: @*/
2543: PetscErrorCode PetscLogGpuTimeEnd(void)
2544: {
2545: PetscFunctionBegin;
2546: if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS);
2547: if (PetscDefined(HAVE_DEVICE)) {
2548: PetscDeviceContext dctx;
2549: PetscLogDouble elapsed;
2551: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
2552: PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed));
2553: petsc_gtime += (elapsed / 1000.0);
2554: } else {
2555: PetscCall(PetscTimeAdd(&petsc_gtime));
2556: }
2557: PetscFunctionReturn(PETSC_SUCCESS);
2558: }
2560: #endif /* end of PETSC_HAVE_DEVICE */
2562: #else /* end of -DPETSC_USE_LOG section */
2564: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2565: {
2566: PetscFunctionBegin;
2567: PetscFunctionReturn(PETSC_SUCCESS);
2568: }
2570: #endif /* PETSC_USE_LOG*/
2572: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2573: PetscClassId PETSC_OBJECT_CLASSID = 0;
2575: /*@C
2576: PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.
2578: Not Collective
2580: Input Parameter:
2581: . name - The class name
2583: Output Parameter:
2584: . oclass - The class id or classid
2586: Level: developer
2588: .seealso: [](ch_profiling), `PetscLogEventRegister()`
2589: @*/
2590: PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass)
2591: {
2592: #if defined(PETSC_USE_LOG)
2593: PetscStageLog stageLog;
2594: PetscInt stage;
2595: #endif
2597: PetscFunctionBegin;
2598: *oclass = ++PETSC_LARGEST_CLASSID;
2599: #if defined(PETSC_USE_LOG)
2600: PetscCall(PetscLogGetStageLog(&stageLog));
2601: PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass));
2602: for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses));
2603: #endif
2604: PetscFunctionReturn(PETSC_SUCCESS);
2605: }
2607: #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE)
2608: #include <mpe.h>
2610: PetscBool PetscBeganMPE = PETSC_FALSE;
2612: PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
2613: PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
2615: /*@C
2616: PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files
2617: and slows the program down.
2619: Collective over `PETSC_COMM_WORLD`
2621: Options Database Key:
2622: . -log_mpe - Prints extensive log information
2624: Level: advanced
2626: Note:
2627: A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
2628: intended for production runs since it logs only flop rates and object
2629: creation (and should not significantly slow the programs).
2631: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`,
2632: `PetscLogEventDeactivate()`
2633: @*/
2634: PetscErrorCode PetscLogMPEBegin(void)
2635: {
2636: PetscFunctionBegin;
2637: /* Do MPE initialization */
2638: if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */
2639: PetscCall(PetscInfo(0, "Initializing MPE.\n"));
2640: PetscCall(MPE_Init_log());
2642: PetscBeganMPE = PETSC_TRUE;
2643: } else {
2644: PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n"));
2645: }
2646: PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE));
2647: PetscFunctionReturn(PETSC_SUCCESS);
2648: }
2650: /*@C
2651: PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot.
2653: Collective over `PETSC_COMM_WORLD`
2655: Level: advanced
2657: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()`
2658: @*/
2659: PetscErrorCode PetscLogMPEDump(const char sname[])
2660: {
2661: char name[PETSC_MAX_PATH_LEN];
2663: PetscFunctionBegin;
2664: if (PetscBeganMPE) {
2665: PetscCall(PetscInfo(0, "Finalizing MPE.\n"));
2666: if (sname) {
2667: PetscCall(PetscStrncpy(name, sname, sizeof(name)));
2668: } else {
2669: PetscCall(PetscGetProgramName(name, sizeof(name)));
2670: }
2671: PetscCall(MPE_Finish_log(name));
2672: } else {
2673: PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n"));
2674: }
2675: PetscFunctionReturn(PETSC_SUCCESS);
2676: }
2678: #define PETSC_RGB_COLORS_MAX 39
2679: static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ",
2680: "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ",
2681: "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ",
2682: "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ",
2683: "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "};
2685: /*@C
2686: PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()`
2688: Not collective. Maybe it should be?
2690: Output Parameter:
2691: . str - character string representing the color
2693: Level: developer
2695: .seealso: [](ch_profiling), `PetscLogEventRegister()`
2696: @*/
2697: PetscErrorCode PetscLogMPEGetRGBColor(const char *str[])
2698: {
2699: static int idx = 0;
2701: PetscFunctionBegin;
2702: *str = PetscLogMPERGBColors[idx];
2703: idx = (idx + 1) % PETSC_RGB_COLORS_MAX;
2704: PetscFunctionReturn(PETSC_SUCCESS);
2705: }
2707: #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */