Actual source code: mpits.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL};
9: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
11: /*@
12: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
14: Logically Collective
16: Input Parameters:
17: + comm - `PETSC_COMM_WORLD`
18: - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()`
20: Level: developer
22: Note:
23: This option is currently global, but could be made per-communicator.
25: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType`
26: @*/
27: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided)
28: {
29: PetscFunctionBegin;
31: PetscMPIInt b1[2], b2[2];
32: b1[0] = -(PetscMPIInt)twosided;
33: b1[1] = (PetscMPIInt)twosided;
34: PetscCall(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm));
35: PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes");
36: }
37: _twosided_type = twosided;
38: PetscFunctionReturn(PETSC_SUCCESS);
39: }
41: /*@
42: PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication
44: Logically Collective
46: Output Parameters:
47: + comm - communicator on which to query algorithm
48: - twosided - algorithm to use for `PetscCommBuildTwoSided()`
50: Level: developer
52: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType`
53: @*/
54: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided)
55: {
56: PetscMPIInt size;
58: PetscFunctionBegin;
59: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
60: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
61: PetscCallMPI(MPI_Comm_size(comm, &size));
62: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
63: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
64: if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
65: #endif
66: PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL));
67: }
68: *twosided = _twosided_type;
69: PetscFunctionReturn(PETSC_SUCCESS);
70: }
72: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
73: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
74: {
75: PetscMPIInt nrecvs, tag, done, i;
76: MPI_Aint lb, unitbytes;
77: char *tdata;
78: MPI_Request *sendreqs, barrier;
79: PetscSegBuffer segrank, segdata;
80: PetscBool barrier_started;
82: PetscFunctionBegin;
83: PetscCall(PetscCommDuplicate(comm, &comm, &tag));
84: PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
85: PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
86: tdata = (char *)todata;
87: PetscCall(PetscMalloc1(nto, &sendreqs));
88: for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
89: PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank));
90: PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata));
92: nrecvs = 0;
93: barrier = MPI_REQUEST_NULL;
94: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
95: * but we need to work around it. */
96: barrier_started = PETSC_FALSE;
97: for (done = 0; !done;) {
98: PetscMPIInt flag;
99: MPI_Status status;
100: PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status));
101: if (flag) { /* incoming message */
102: PetscMPIInt *recvrank;
103: void *buf;
104: PetscCall(PetscSegBufferGet(segrank, 1, &recvrank));
105: PetscCall(PetscSegBufferGet(segdata, count, &buf));
106: *recvrank = status.MPI_SOURCE;
107: PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE));
108: nrecvs++;
109: }
110: if (!barrier_started) {
111: PetscMPIInt sent, nsends;
112: PetscCall(PetscMPIIntCast(nto, &nsends));
113: PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE));
114: if (sent) {
115: PetscCallMPI(MPI_Ibarrier(comm, &barrier));
116: barrier_started = PETSC_TRUE;
117: PetscCall(PetscFree(sendreqs));
118: }
119: } else {
120: PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE));
121: }
122: }
123: *nfrom = nrecvs;
124: PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks));
125: PetscCall(PetscSegBufferDestroy(&segrank));
126: PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata));
127: PetscCall(PetscSegBufferDestroy(&segdata));
128: PetscCall(PetscCommDestroy(&comm));
129: PetscFunctionReturn(PETSC_SUCCESS);
130: }
131: #endif
133: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
134: {
135: PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg;
136: MPI_Aint lb, unitbytes;
137: char *tdata, *fdata;
138: MPI_Request *reqs, *sendreqs;
139: MPI_Status *statuses;
140: PetscCommCounter *counter;
142: PetscFunctionBegin;
143: PetscCallMPI(MPI_Comm_size(comm, &size));
144: PetscCallMPI(MPI_Comm_rank(comm, &rank));
145: PetscCall(PetscCommDuplicate(comm, &comm, &tag));
146: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
147: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
148: if (!counter->iflags) {
149: PetscCall(PetscCalloc1(size, &counter->iflags));
150: iflags = counter->iflags;
151: } else {
152: iflags = counter->iflags;
153: PetscCall(PetscArrayzero(iflags, size));
154: }
155: for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
156: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm));
157: nrecvs = iflags[rank];
158: PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
159: PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
160: PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata));
161: tdata = (char *)todata;
162: PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses));
163: sendreqs = reqs + nrecvs;
164: for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i));
165: for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
166: PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses));
167: PetscCall(PetscMalloc1(nrecvs, &franks));
168: for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
169: PetscCall(PetscFree2(reqs, statuses));
170: PetscCall(PetscCommDestroy(&comm));
172: *nfrom = nrecvs;
173: *fromranks = franks;
174: *(void **)fromdata = fdata;
175: PetscFunctionReturn(PETSC_SUCCESS);
176: }
178: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
179: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
180: {
181: PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg;
182: MPI_Aint lb, unitbytes;
183: char *tdata, *fdata;
184: MPI_Request *reqs, *sendreqs;
185: MPI_Status *statuses;
186: PetscCommCounter *counter;
188: PetscFunctionBegin;
189: PetscCallMPI(MPI_Comm_size(comm, &size));
190: PetscCall(PetscCommDuplicate(comm, &comm, &tag));
191: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
192: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
193: if (!counter->iflags) {
194: PetscCall(PetscCalloc1(size, &counter->iflags));
195: iflags = counter->iflags;
196: } else {
197: iflags = counter->iflags;
198: PetscCall(PetscArrayzero(iflags, size));
199: }
200: for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
201: PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm));
202: PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
203: PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
204: PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata));
205: tdata = (char *)todata;
206: PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses));
207: sendreqs = reqs + nrecvs;
208: for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i));
209: for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
210: PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses));
211: PetscCall(PetscMalloc1(nrecvs, &franks));
212: for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
213: PetscCall(PetscFree2(reqs, statuses));
214: PetscCall(PetscCommDestroy(&comm));
216: *nfrom = nrecvs;
217: *fromranks = franks;
218: *(void **)fromdata = fdata;
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
221: #endif
223: /*@C
224: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
226: Collective
228: Input Parameters:
229: + comm - communicator
230: . count - number of entries to send/receive (must match on all ranks)
231: . dtype - datatype to send/receive from each rank (must match on all ranks)
232: . nto - number of ranks to send data to
233: . toranks - ranks to send to (array of length nto)
234: - todata - data to send to each rank (packed)
236: Output Parameters:
237: + nfrom - number of ranks receiving messages from
238: . fromranks - ranks receiving messages from (length `nfrom`, caller should `PetscFree()`)
239: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
241: Options Database Key:
242: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks,
243: otherwise ibarrier.
245: Level: developer
247: Notes:
248: This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
249: `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data.
251: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
253: References:
254: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
255: Scalable communication protocols for dynamic sparse data exchange, 2010.
257: .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType`
258: @*/
259: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
260: {
261: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
263: PetscFunctionBegin;
264: PetscCall(PetscSysInitializePackage());
265: PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm));
266: PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0));
267: PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype));
268: switch (buildtype) {
269: case PETSC_BUILDTWOSIDED_IBARRIER:
270: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
271: PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
272: break;
273: #else
274: SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
275: #endif
276: case PETSC_BUILDTWOSIDED_ALLREDUCE:
277: PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
278: break;
279: case PETSC_BUILDTWOSIDED_REDSCATTER:
280: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
281: PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
282: break;
283: #else
284: SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
285: #endif
286: default:
287: SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
288: }
289: PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
294: {
295: PetscMPIInt i, *tag;
296: MPI_Aint lb, unitbytes;
297: MPI_Request *sendreq, *recvreq;
299: PetscFunctionBegin;
300: PetscCall(PetscMalloc1(ntags, &tag));
301: if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0]));
302: for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
304: /* Perform complete initial rendezvous */
305: PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
307: PetscCall(PetscMalloc1(nto * ntags, &sendreq));
308: PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq));
310: PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
311: PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
312: for (i = 0; i < nto; i++) {
313: PetscMPIInt k;
314: for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL;
315: PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx));
316: }
317: for (i = 0; i < *nfrom; i++) {
318: void *header = (*(char **)fromdata) + count * unitbytes * i;
319: PetscMPIInt k;
320: for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL;
321: PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx));
322: }
323: PetscCall(PetscFree(tag));
324: PetscCall(PetscCommDestroy(&comm));
325: *toreqs = sendreq;
326: *fromreqs = recvreq;
327: PetscFunctionReturn(PETSC_SUCCESS);
328: }
330: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
332: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
333: {
334: PetscMPIInt nrecvs, tag, *tags, done, i;
335: MPI_Aint lb, unitbytes;
336: char *tdata;
337: MPI_Request *sendreqs, *usendreqs, *req, barrier;
338: PetscSegBuffer segrank, segdata, segreq;
339: PetscBool barrier_started;
341: PetscFunctionBegin;
342: PetscCall(PetscCommDuplicate(comm, &comm, &tag));
343: PetscCall(PetscMalloc1(ntags, &tags));
344: for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i]));
345: PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
346: PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
347: tdata = (char *)todata;
348: PetscCall(PetscMalloc1(nto, &sendreqs));
349: PetscCall(PetscMalloc1(nto * ntags, &usendreqs));
350: /* Post synchronous sends */
351: for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
352: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
353: * synchronous messages above. */
354: for (i = 0; i < nto; i++) {
355: PetscMPIInt k;
356: for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL;
357: PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx));
358: }
360: PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank));
361: PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata));
362: PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq));
364: nrecvs = 0;
365: barrier = MPI_REQUEST_NULL;
366: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
367: * but we need to work around it. */
368: barrier_started = PETSC_FALSE;
369: for (done = 0; !done;) {
370: PetscMPIInt flag;
371: MPI_Status status;
372: PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status));
373: if (flag) { /* incoming message */
374: PetscMPIInt *recvrank, k;
375: void *buf;
376: PetscCall(PetscSegBufferGet(segrank, 1, &recvrank));
377: PetscCall(PetscSegBufferGet(segdata, count, &buf));
378: *recvrank = status.MPI_SOURCE;
379: PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE));
380: PetscCall(PetscSegBufferGet(segreq, ntags, &req));
381: for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL;
382: PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx));
383: nrecvs++;
384: }
385: if (!barrier_started) {
386: PetscMPIInt sent, nsends;
387: PetscCall(PetscMPIIntCast(nto, &nsends));
388: PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE));
389: if (sent) {
390: PetscCallMPI(MPI_Ibarrier(comm, &barrier));
391: barrier_started = PETSC_TRUE;
392: }
393: } else {
394: PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE));
395: }
396: }
397: *nfrom = nrecvs;
398: PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks));
399: PetscCall(PetscSegBufferDestroy(&segrank));
400: PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata));
401: PetscCall(PetscSegBufferDestroy(&segdata));
402: *toreqs = usendreqs;
403: PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs));
404: PetscCall(PetscSegBufferDestroy(&segreq));
405: PetscCall(PetscFree(sendreqs));
406: PetscCall(PetscFree(tags));
407: PetscCall(PetscCommDestroy(&comm));
408: PetscFunctionReturn(PETSC_SUCCESS);
409: }
410: #endif
412: /*@C
413: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
415: Collective
417: Input Parameters:
418: + comm - communicator
419: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
420: . dtype - datatype to send/receive from each rank (must match on all ranks)
421: . nto - number of ranks to send data to
422: . toranks - ranks to send to (array of length nto)
423: . todata - data to send to each rank (packed)
424: . ntags - number of tags needed by send/recv callbacks
425: . send - callback invoked on sending process when ready to send primary payload
426: . recv - callback invoked on receiving process after delivery of rendezvous message
427: - ctx - context for callbacks
429: Output Parameters:
430: + nfrom - number of ranks receiving messages from
431: . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
432: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
434: Level: developer
436: Notes:
437: This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
438: `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data.
440: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
442: References:
443: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
444: Scalable communication protocols for dynamic sparse data exchange, 2010.
446: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
447: @*/
448: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
449: {
450: MPI_Request *toreqs, *fromreqs;
452: PetscFunctionBegin;
453: PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx));
454: PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE));
455: PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE));
456: PetscCall(PetscFree(toreqs));
457: PetscCall(PetscFree(fromreqs));
458: PetscFunctionReturn(PETSC_SUCCESS);
459: }
461: /*@C
462: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
464: Collective
466: Input Parameters:
467: + comm - communicator
468: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
469: . dtype - datatype to send/receive from each rank (must match on all ranks)
470: . nto - number of ranks to send data to
471: . toranks - ranks to send to (array of length nto)
472: . todata - data to send to each rank (packed)
473: . ntags - number of tags needed by send/recv callbacks
474: . send - callback invoked on sending process when ready to send primary payload
475: . recv - callback invoked on receiving process after delivery of rendezvous message
476: - ctx - context for callbacks
478: Output Parameters:
479: + nfrom - number of ranks receiving messages from
480: . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
481: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
482: . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`)
483: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`)
485: Level: developer
487: Notes:
488: This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
489: `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data.
491: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
493: References:
494: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
495: Scalable communication protocols for dynamic sparse data exchange, 2010.
497: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
498: @*/
499: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
500: {
501: PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx);
502: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
503: PetscMPIInt i, size;
505: PetscFunctionBegin;
506: PetscCall(PetscSysInitializePackage());
507: PetscCallMPI(MPI_Comm_size(comm, &size));
508: for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size);
509: PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm));
510: PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0));
511: PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype));
512: switch (buildtype) {
513: case PETSC_BUILDTWOSIDED_IBARRIER:
514: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
515: f = PetscCommBuildTwoSidedFReq_Ibarrier;
516: break;
517: #else
518: SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
519: #endif
520: case PETSC_BUILDTWOSIDED_ALLREDUCE:
521: case PETSC_BUILDTWOSIDED_REDSCATTER:
522: f = PetscCommBuildTwoSidedFReq_Reference;
523: break;
524: default:
525: SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
526: }
527: PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx));
528: PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0));
529: PetscFunctionReturn(PETSC_SUCCESS);
530: }