Actual source code: dcontext.cxx
1: #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/cpp/object_pool.hpp>
5: #include <petsc/private/cpp/utility.hpp>
6: #include <petsc/private/cpp/array.hpp>
8: #include <vector>
9: #include <string> // std::to_string among other things
11: /* Define the allocator */
12: class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13: public:
14: PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15: {
16: PetscFunctionBegin;
17: PetscCall(PetscArrayzero(dctx, 1));
18: PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView));
19: PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData());
20: PetscCall(underlying().reset(dctx, false));
21: PetscFunctionReturn(PETSC_SUCCESS);
22: }
24: static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
25: {
26: PetscFunctionBegin;
27: PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
28: PetscTryTypeMethod(dctx, destroy);
29: PetscCall(PetscDeviceDestroy(&dctx->device));
30: PetscCall(PetscFree(dctx->childIDs));
31: delete CxxDataCast(dctx);
32: PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
33: PetscFunctionReturn(PETSC_SUCCESS);
34: }
36: static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
37: {
38: PetscFunctionBegin;
39: if (zero) {
40: // reset the device if the user set it
41: if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
42: PetscTryTypeMethod(dctx, destroy);
43: PetscCall(PetscDeviceDestroy(&dctx->device));
44: PetscCall(PetscArrayzero(dctx->ops, 1));
45: dctx->data = nullptr;
46: }
47: PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
48: dctx->numChildren = 0;
49: dctx->setup = PETSC_FALSE;
50: // don't deallocate the child array, rather just zero it out
51: PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
52: PetscCall(CxxDataCast(dctx)->clear());
53: }
54: dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; }
59: };
61: static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
63: /*@C
64: PetscDeviceContextCreate - Creates a `PetscDeviceContext`
66: Not Collective
68: Output Parameter:
69: . dctx - The `PetscDeviceContext`
71: Level: beginner
73: Note:
74: Unlike almost every other PETSc class it is advised that most users use
75: `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
76: different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
77: ensures compatible types.
79: DAG representation:
80: .vb
81: time ->
83: |= CALL =| - dctx ->
84: .ve
86: .N ASYNC_API
88: .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
89: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
90: `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
91: @*/
92: PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
93: {
94: PetscFunctionBegin;
96: PetscCall(PetscDeviceInitializePackage());
97: PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
98: PetscCall(contextPool.allocate(dctx));
99: PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: /*@C
104: PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
106: Not Collective
108: Input Parameter:
109: . dctx - The `PetscDeviceContext`
111: Level: beginner
113: Notes:
114: No implicit synchronization occurs due to this routine, all resources are released completely
115: asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
116: `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
117: calling this routine.
119: DAG representation:
120: .vb
121: time ->
123: -> dctx - |= CALL =|
124: .ve
126: Developer Notes:
127: `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
128: growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
129: this should perhaps be implemented.
131: .N ASYNC_API
133: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
134: `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
135: @*/
136: PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
137: {
138: PetscFunctionBegin;
140: if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
141: PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
142: if (--(PetscObjectCast(*dctx)->refct) <= 0) {
143: PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
144: PetscCall(contextPool.deallocate(dctx));
145: }
146: PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
147: *dctx = nullptr;
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }
151: /*@C
152: PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
153: `PetscDeviceContext`
155: Not Collective
157: Input Parameters:
158: + dctx - The `PetscDeviceContext`
159: - type - The `PetscStreamType`
161: Level: beginner
163: Note:
164: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
165: types and their interactions. If the `PetscDeviceContext` was previously set up and stream
166: type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
168: .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
169: `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
170: @*/
171: PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
172: {
173: PetscFunctionBegin;
174: // do not use getoptionalnullcontext here since we do not want the user to change the stream
175: // type
178: // only need to do complex swapping if the object has already been setup
179: if (dctx->setup && (dctx->streamType != type)) {
180: dctx->setup = PETSC_FALSE;
181: PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
182: PetscUseTypeMethod(dctx, changestreamtype, type);
183: PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
184: }
185: dctx->streamType = type;
186: PetscFunctionReturn(PETSC_SUCCESS);
187: }
189: /*@C
190: PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
191: `PetscDeviceContext`
193: Not Collective
195: Input Parameter:
196: . dctx - The `PetscDeviceContext`
198: Output Parameter:
199: . type - The `PetscStreamType`
201: Level: beginner
203: Note:
204: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
205: types and their interactions
207: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
208: `PetscDeviceContextSetFromOptions()`
209: @*/
210: PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
211: {
212: PetscFunctionBegin;
213: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
215: *type = dctx->streamType;
216: PetscFunctionReturn(PETSC_SUCCESS);
217: }
219: /*
220: Actual function to set the device.
222: 1. Repeatedly destroying and recreating internal data structures (like streams and events)
223: for recycled PetscDeviceContexts is not free. If done often, it does add up.
224: 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
225: default contexts. The default contexts *never* change type, and the children are extremely
226: unlikely to (chances are if you fork once, you will fork again very soon).
227: 3. The only time this calculus changes is if the user themselves sets the device type. In
228: this case we do not know what the user has changed, so must always wipe the slate clean.
230: Thus we need to keep track whether the user explicitly sets the device contexts device.
231: */
232: static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
233: {
234: PetscFunctionBegin;
235: // do not use getoptionalnullcontext here since we do not want the user to change its device
238: if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
239: PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
240: PetscTryTypeMethod(dctx, destroy);
241: PetscCall(PetscDeviceDestroy(&dctx->device));
242: PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
243: PetscCall(PetscDeviceReference_Internal(device));
244: // set it before calling the method
245: dctx->device = device;
246: PetscCall((*device->ops->createcontext)(dctx));
247: PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
248: dctx->setup = PETSC_FALSE;
249: dctx->usersetdevice = user_set;
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
254: {
255: PetscDevice device;
257: PetscFunctionBegin;
258: PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
259: PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: /*@C
264: PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
266: Not Collective
268: Input Parameters:
269: + dctx - The `PetscDeviceContext`
270: - device - The `PetscDevice`
272: Level: intermediate
274: Notes:
275: This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
276: also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
277: necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
278: always come equipped with the "default" device.
280: This routine is a no-op if `device` is already attached to `dctx`.
282: This routine may (but is very unlikely to) initialize the backend device and may incur
283: synchronization.
285: .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
286: `PetscDeviceContextGetDeviceType()`
287: @*/
288: PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
289: {
290: PetscFunctionBegin;
291: PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
292: PetscFunctionReturn(PETSC_SUCCESS);
293: }
295: /*@C
296: PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
298: Not Collective
300: Input Parameter:
301: . dctx - the `PetscDeviceContext`
303: Output Parameter:
304: . device - The `PetscDevice`
306: Level: intermediate
308: Note:
309: This is a borrowed reference, the user should not destroy `device`.
311: .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
312: @*/
313: PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
314: {
315: PetscFunctionBegin;
316: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
318: PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
319: *device = dctx->device;
320: PetscFunctionReturn(PETSC_SUCCESS);
321: }
323: /*@C
324: PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
326: Not Collective
328: Input Parameter:
329: . dctx - The `PetscDeviceContext`
331: Output Parameter:
332: . type - The `PetscDeviceType`
334: Level: beginner
336: Note:
337: This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
338: `PetscDeviceGetType()`.
340: .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
341: @*/
342: PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
343: {
344: PetscDevice device = nullptr;
346: PetscFunctionBegin;
347: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
349: PetscCall(PetscDeviceContextGetDevice(dctx, &device));
350: PetscCall(PetscDeviceGetType(device, type));
351: PetscFunctionReturn(PETSC_SUCCESS);
352: }
354: /*@C
355: PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
357: Not Collective
359: Input Parameter:
360: . dctx - The `PetscDeviceContext`
362: Level: beginner
364: Developer Note:
365: This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
366: structures such as streams, events, and (possibly) handles.
368: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
369: `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
370: @*/
371: PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
372: {
373: PetscFunctionBegin;
374: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
375: if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
376: if (!dctx->device) {
377: const auto default_dtype = PETSC_DEVICE_DEFAULT();
379: PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
380: PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
381: }
382: PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
383: PetscUseTypeMethod(dctx, setup);
384: PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
385: dctx->setup = PETSC_TRUE;
386: PetscFunctionReturn(PETSC_SUCCESS);
387: }
389: static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
390: {
391: PetscFunctionBegin;
392: PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
393: PetscCall(PetscDeviceContextCreate(dctxdup));
394: PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
395: if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
396: PetscCall(PetscDeviceContextSetUp(*dctxdup));
397: PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
398: PetscFunctionReturn(PETSC_SUCCESS);
399: }
401: /*@C
402: PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
404: Not Collective
406: Input Parameter:
407: . dctx - The `PetscDeviceContext` to duplicate
409: Output Parameter:
410: . dctxdup - The duplicated `PetscDeviceContext`
412: Level: beginner
414: Notes:
415: This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
416: another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
417: (including its current stream-state) they are completely separate objects.
419: There is no implied ordering between `dctx` or `dctxdup`.
421: DAG representation:
422: .vb
423: time ->
425: -> dctx - |= CALL =| - dctx ---->
426: - dctxdup ->
427: .ve
429: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
430: `PetscDeviceContextSetStreamType()`
431: @*/
432: PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
433: {
434: auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
436: PetscFunctionBegin;
437: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
439: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
440: PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
444: /*@C
445: PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
447: Not Collective
449: Input Parameter:
450: . dctx - The `PetscDeviceContext`
452: Output Parameter:
453: . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
455: Level: intermediate
457: Note:
458: This routine only refers a singular context and does NOT take any of its children into
459: account. That is, if `dctx` is idle but has dependents who do have work this routine still
460: returns `PETSC_TRUE`.
462: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
463: @*/
464: PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
465: {
466: PetscFunctionBegin;
467: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
469: PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
470: PetscUseTypeMethod(dctx, query, idle);
471: PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
472: PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
473: PetscFunctionReturn(PETSC_SUCCESS);
474: }
476: /*@C
477: PetscDeviceContextWaitForContext - Make one context wait for another context to finish
479: Not Collective
481: Input Parameters:
482: + dctxa - The `PetscDeviceContext` object that is waiting
483: - dctxb - The `PetscDeviceContext` object that is being waited on
485: Level: beginner
487: Notes:
488: Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
489: does not wait for the serialization to actually occur.
491: This routine uses only the state of `dctxb` at the moment this routine was called, so any
492: future work queued will not affect `dctxa`. It is safe to pass the same context to both
493: arguments (in which case this routine does nothing).
495: DAG representation:
496: .vb
497: time ->
499: -> dctxa ---/- |= CALL =| - dctxa ->
500: /
501: -> dctxb -/------------------------>
502: .ve
504: .N ASYNC_API
506: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
507: @*/
508: PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
509: {
510: PetscObject aobj;
512: PetscFunctionBegin;
513: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
514: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
515: PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
516: if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
517: aobj = PetscObjectCast(dctxa);
518: PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
519: PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
520: PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb));
521: PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
522: PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id));
523: PetscCall(PetscObjectStateIncrease(aobj));
524: PetscFunctionReturn(PETSC_SUCCESS);
525: }
527: /*@C
528: PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
529: context with a prescribed `PetscStreamType`
531: Not Collective, Asynchronous
533: Input Parameters:
534: + dctx - The parent `PetscDeviceContext`
535: . stype - The prescribed `PetscStreamType`
536: - n - The number of children to create
538: Output Parameter:
539: . dsub - The created child context(s)
541: Level: intermediate
543: Notes:
544: This routine creates `n` edges of a DAG from a source node which are causally dependent on the
545: source node. This causal dependency is established as-if by calling
546: `PetscDeviceContextWaitForContext()` on every child.
548: `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
549: expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
551: This routine only accounts for work queued on `dctx` up until calling this routine, any
552: subsequent work enqueued on `dctx` has no effect on `dsub`.
554: The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
555: that they are different. This is useful in cases where a routine can locally exploit stream
556: parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
557: carries.
559: DAG representation:
560: .vb
561: time ->
563: -> dctx - |= CALL =| -\----> dctx ------>
564: \---> dsub[0] --->
565: \--> ... ------->
566: \-> dsub[n-1] ->
567: .ve
569: .N ASYNC_API
571: .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
572: `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
573: @*/
574: PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
575: {
576: // debugging only
577: std::string idList;
578: auto ninput = n;
580: PetscFunctionBegin;
581: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
582: PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
584: *dsub = nullptr;
585: /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
586: if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
587: PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
588: /* update child totals */
589: dctx->numChildren += n;
590: /* now to find out if we have room */
591: if (dctx->numChildren > dctx->maxNumChildren) {
592: const auto numChildren = dctx->numChildren;
593: auto &maxNumChildren = dctx->maxNumChildren;
594: auto numAllocated = numChildren;
596: /* no room, either from having too many kids or not having any */
597: if (auto &childIDs = dctx->childIDs) {
598: // the difference is backwards because we have not updated maxNumChildren yet
599: numAllocated -= maxNumChildren;
600: /* have existing children, must reallocate them */
601: PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
602: /* clear the extra memory since realloc doesn't do it for us */
603: PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
604: } else {
605: /* have no children */
606: PetscCall(PetscCalloc1(numChildren, &childIDs));
607: }
608: /* update total number of children */
609: maxNumChildren = numChildren;
610: }
611: PetscCall(PetscMalloc1(n, dsub));
612: for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
613: auto &childID = dctx->childIDs[i];
614: /* empty child slot */
615: if (!childID) {
616: auto &childctx = (*dsub)[i];
618: /* create the child context in the image of its parent */
619: PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
620: PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
621: /* register the child with its parent */
622: PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
623: if (PetscDefined(USE_DEBUG_AND_INFO)) {
624: PetscCallCXX(idList += std::to_string(childID));
625: if (ninput != 1) PetscCallCXX(idList += ", ");
626: }
627: --ninput;
628: }
629: }
630: PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
631: PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
632: PetscFunctionReturn(PETSC_SUCCESS);
633: }
635: /*@C
636: PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
638: Not Collective, Asynchronous
640: Input Parameters:
641: + dctx - The parent `PetscDeviceContext`
642: - n - The number of children to create
644: Output Parameter:
645: . dsub - The created child context(s)
647: Level: beginner
649: Notes:
650: Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
651: `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
653: .vb
654: PetscStreamType stype;
656: PetscDeviceContextGetStreamType(dctx, &stype);
657: PetscDeviceContextForkWithStreamType(dctx, stype, ...);
658: .ve
660: .N ASYNC_API
662: .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
663: `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
664: @*/
665: PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
666: {
667: auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
669: PetscFunctionBegin;
670: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
671: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
672: PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
673: PetscFunctionReturn(PETSC_SUCCESS);
674: }
676: /*@C
677: PetscDeviceContextJoin - Converge a set of child contexts
679: Not Collective, Asynchronous
681: Input Parameters:
682: + dctx - A `PetscDeviceContext` to converge on
683: . n - The number of sub contexts to converge
684: . joinMode - The type of join to perform
685: - dsub - The sub contexts to converge
687: Level: beginner
689: Notes:
690: If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
691: node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
692: which receives `n` edges (and optionally destroys them) which is dependent on the completion
693: of all incoming edges.
695: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
696: destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
697: passed to this routine.
699: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
700: `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
701: sync suffix does NOT refer to the host, i.e. this routine does NOT call
702: `PetscDeviceSynchronize()`.
704: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
705: the sub contexts do not wait for one another or `dctx` afterwards.
707: DAG representations:
708: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
709: .vb
710: time ->
712: -> dctx ---------/- |= CALL =| - dctx ->
713: -> dsub[0] -----/
714: -> ... -------/
715: -> dsub[n-1] -/
716: .ve
717: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
718: .vb
719: time ->
721: -> dctx ---------/- |= CALL =| -\----> dctx ------>
722: -> dsub[0] -----/ \---> dsub[0] --->
723: -> ... -------/ \--> ... ------->
724: -> dsub[n-1] -/ \-> dsub[n-1] ->
725: .ve
726: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
727: .vb
728: time ->
730: -> dctx ----------/- |= CALL =| - dctx ->
731: -> dsub[0] ------/----------------------->
732: -> ... --------/------------------------>
733: -> dsub[n-1] --/------------------------->
734: .ve
736: .N ASYNC_API
738: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
739: `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
740: @*/
741: PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
742: {
743: // debugging only
744: std::string idList;
746: PetscFunctionBegin;
747: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
748: /* validity of dctx is checked in the wait-for loop */
750: PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
751: /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
752: if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
753: /* first dctx waits on all the incoming edges */
754: PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
755: for (PetscInt i = 0; i < n; ++i) {
756: PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
757: PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
758: if (PetscDefined(USE_DEBUG_AND_INFO)) {
759: PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
760: if (i + 1 < n) PetscCallCXX(idList += ", ");
761: }
762: }
764: /* now we handle the aftermath */
765: switch (joinMode) {
766: case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
767: const auto children = dctx->childIDs;
768: const auto maxchild = dctx->maxNumChildren;
769: auto &nchild = dctx->numChildren;
770: PetscInt j = 0;
772: PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
773: /* update child count while it's still fresh in memory */
774: nchild -= n;
775: for (PetscInt i = 0; i < maxchild; ++i) {
776: if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
777: /* child is one of ours, can destroy it */
778: PetscCall(PetscDeviceContextDestroy((*dsub) + j));
779: /* reset the child slot */
780: children[i] = 0;
781: if (++j == n) break;
782: }
783: }
784: /* gone through the loop but did not find every child */
785: PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j);
786: PetscCall(PetscFree(*dsub));
787: } break;
788: case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
789: for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
790: case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
791: break;
792: default:
793: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
794: }
795: PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
797: PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
798: PetscFunctionReturn(PETSC_SUCCESS);
799: }
801: /*@C
802: PetscDeviceContextSynchronize - Block the host until all work queued on a
803: `PetscDeviceContext` has finished
805: Not Collective
807: Input Parameter:
808: . dctx - The `PetscDeviceContext` to synchronize
810: Level: beginner
812: Notes:
813: The host will not return from this routine until `dctx` is idle. Any and all memory
814: operations queued on or otherwise associated with (either explicitly or implicitly via
815: dependencies) are guaranteed to have finished and be globally visible on return.
817: In effect, this routine serves as memory and execution barrier.
819: DAG representation:
820: .vb
821: time ->
823: -> dctx - |= CALL =| - dctx ->
824: .ve
826: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
827: @*/
828: PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
829: {
830: PetscFunctionBegin;
831: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
832: PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
833: /* if it isn't setup there is nothing to sync on */
834: if (dctx->setup) {
835: PetscUseTypeMethod(dctx, synchronize);
836: PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
837: }
838: PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
839: PetscFunctionReturn(PETSC_SUCCESS);
840: }
842: /* every device type has a vector of null PetscDeviceContexts -- one for each device */
843: static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
844: static auto nullContextsFinalizer = false;
846: static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
847: {
848: PetscInt devid;
849: PetscDeviceType dtype;
851: PetscFunctionBegin;
854: if (PetscUnlikely(!nullContextsFinalizer)) {
855: const auto finalizer = [] {
856: PetscFunctionBegin;
857: for (auto &&dvec : nullContexts) {
858: for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
859: PetscCallCXX(dvec.clear());
860: }
861: nullContextsFinalizer = false;
862: PetscFunctionReturn(PETSC_SUCCESS);
863: };
865: nullContextsFinalizer = true;
866: PetscCall(PetscRegisterFinalize(std::move(finalizer)));
867: }
868: PetscCall(PetscDeviceGetDeviceId(device, &devid));
869: PetscCall(PetscDeviceGetType(device, &dtype));
870: {
871: auto &ctxlist = nullContexts[dtype];
873: PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
874: // need to resize the container if not big enough because incrementing the iterator in
875: // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
876: // current size of the container.
877: if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
878: if (PetscUnlikely(!ctxlist[devid])) {
879: // we have not seen this device before
880: PetscCall(PetscDeviceContextCreate(dctx));
881: PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
882: {
883: const auto pobj = PetscObjectCast(*dctx);
884: const auto name = "null context " + std::to_string(devid);
885: const auto prefix = "null_context_" + std::to_string(devid) + '_';
887: PetscCall(PetscObjectSetName(pobj, name.c_str()));
888: PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
889: }
890: PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING));
891: PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
892: PetscCall(PetscDeviceContextSetUp(*dctx));
893: // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
894: PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
895: } else *dctx = ctxlist[devid];
896: }
897: PetscFunctionReturn(PETSC_SUCCESS);
898: }
900: /*
901: Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
902: guaranteed to always be globally blocking.
903: */
904: PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
905: {
906: PetscDeviceContext gctx;
907: PetscDevice gdev = nullptr;
909: PetscFunctionBegin;
911: PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
912: PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
913: PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
914: PetscFunctionReturn(PETSC_SUCCESS);
915: }
917: /*@C
918: PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
920: Collective on `comm` or `dctx`
922: Input Parameters:
923: + comm - MPI communicator on which to query the options database (optional)
924: - dctx - The `PetscDeviceContext` to configure
926: Output Parameter:
927: . dctx - The `PetscDeviceContext`
929: Options Database Keys:
930: + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
931: `PetscDeviceContextSetStreamType()`
932: - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
934: Level: beginner
936: Note:
937: The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
938: used (which is always `PETSC_COMM_SELF`).
940: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
941: `PetscDeviceContextView()`
942: @*/
943: PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
944: {
945: const auto pobj = PetscObjectCast(dctx);
946: auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
947: auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
948: MPI_Comm old_comm = PETSC_COMM_SELF;
950: PetscFunctionBegin;
951: // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
953: /* set the device type first */
954: if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
955: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
957: if (comm == MPI_COMM_NULL) {
958: PetscCall(PetscObjectGetComm(pobj, &comm));
959: } else {
960: // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
961: // PetscObjectOptionsBegin() behaves as if dctx had comm
962: old_comm = Petsc::util::exchange(pobj->comm, comm);
963: }
965: PetscObjectOptionsBegin(pobj);
966: PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
967: PetscOptionsEnd();
968: // reset the comm (should be PETSC_COMM_SELF)
969: if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
970: if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
971: if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
972: PetscCall(PetscDeviceContextSetUp(dctx));
973: PetscFunctionReturn(PETSC_SUCCESS);
974: }
976: /*@C
977: PetscDeviceContextView - View a `PetscDeviceContext`
979: Collective on `viewer`
981: Input Parameters:
982: + dctx - The `PetscDeviceContext`
983: - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
985: Level: beginner
987: Note:
988: If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
989: routine is collective on `PETSC_COMM_WORLD`.
991: .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
992: @*/
993: PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
994: {
995: PetscBool iascii;
997: PetscFunctionBegin;
998: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
999: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1001: PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1002: if (iascii) {
1003: auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
1004: PetscViewer sub;
1006: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1007: PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1008: PetscCall(PetscViewerASCIIPushTab(sub));
1009: PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1010: PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1011: PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1012: if (const auto nchild = dctx->numChildren) {
1013: PetscCall(PetscViewerASCIIPushTab(sub));
1014: for (PetscInt i = 0; i < nchild; ++i) {
1015: if (i == nchild - 1) {
1016: PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1017: } else {
1018: PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1019: }
1020: }
1021: }
1022: PetscCall(PetscViewerASCIIPopTab(sub));
1023: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1024: PetscCall(PetscViewerFlush(viewer));
1025: PetscCall(PetscViewerASCIIPushTab(viewer));
1026: }
1027: if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1028: if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1029: PetscFunctionReturn(PETSC_SUCCESS);
1030: }
1032: /*@C
1033: PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1035: Input Parameters:
1036: + dctx - The `PetscDeviceContext` to view
1037: . obj - Optional `PetscObject` to associate (may be `NULL`)
1038: - name - The command line option
1040: Level: beginner
1042: .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1043: @*/
1044: PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1045: {
1046: PetscFunctionBegin;
1047: PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1050: PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1051: PetscFunctionReturn(PETSC_SUCCESS);
1052: }