Actual source code: mpimesg.c


  2: #include <petscsys.h>
  3: #include <petsc/private/mpiutils.h>

  5: /*@C
  6:   PetscGatherNumberOfMessages -  Computes the number of messages an MPI rank expects to receive during a neighbor communication

  8:   Collective

 10:   Input Parameters:
 11: + comm     - Communicator
 12: . iflags   - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a
 13:              message from current node to ith node. Optionally `NULL`
 14: - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i].
 15:              Optionally `NULL`.

 17:   Output Parameter:
 18: . nrecvs    - number of messages received

 20:   Level: developer

 22:   Notes:
 23:   With this info, the correct message lengths can be determined using
 24:   `PetscGatherMessageLengths()`

 26:   Either `iflags` or `ilengths` should be provided.  If `iflags` is not
 27:   provided (`NULL`) it can be computed from `ilengths`. If `iflags` is
 28:   provided, `ilengths` is not required.

 30: .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
 31: @*/
 32: PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs)
 33: {
 34:   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;

 36:   PetscFunctionBegin;
 37:   PetscCallMPI(MPI_Comm_size(comm, &size));
 38:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

 40:   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));

 42:   /* If iflags not provided, compute iflags from ilengths */
 43:   if (!iflags) {
 44:     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
 45:     iflags_local = iflags_localm;
 46:     for (i = 0; i < size; i++) {
 47:       if (ilengths[i]) iflags_local[i] = 1;
 48:       else iflags_local[i] = 0;
 49:     }
 50:   } else iflags_local = (PetscMPIInt *)iflags;

 52:   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
 53:   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
 54:   *nrecvs = recv_buf[rank];

 56:   PetscCall(PetscFree2(recv_buf, iflags_localm));
 57:   PetscFunctionReturn(PETSC_SUCCESS);
 58: }

 60: /*@C
 61:   PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive,
 62:   including (from-id,length) pairs for each message.

 64:   Collective

 66:   Input Parameters:
 67: + comm      - Communicator
 68: . nsends    - number of messages that are to be sent.
 69: . nrecvs    - number of messages being received
 70: - ilengths  - an array of integers of length sizeof(comm)
 71:               a non zero `ilengths`[i] represent a message to i of length `ilengths`[i]

 73:   Output Parameters:
 74: + onodes    - list of ranks from which messages are expected
 75: - olengths  - corresponding message lengths

 77:   Level: developer

 79:   Notes:
 80:   With this info, the correct `MPI_Irecv()` can be posted with the correct
 81:   from-id, with a buffer with the right amount of memory required.

 83:   The calling function deallocates the memory in onodes and olengths

 85:   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`

 87: .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
 88: @*/
 89: PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths)
 90: {
 91:   PetscMPIInt  size, rank, tag, i, j;
 92:   MPI_Request *s_waits = NULL, *r_waits = NULL;
 93:   MPI_Status  *w_status = NULL;

 95:   PetscFunctionBegin;
 96:   PetscCallMPI(MPI_Comm_size(comm, &size));
 97:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 98:   PetscCall(PetscCommGetNewTag(comm, &tag));

100:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
101:   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
102:   s_waits = r_waits + nrecvs;

104:   /* Post the Irecv to get the message length-info */
105:   PetscCall(PetscMalloc1(nrecvs, olengths));
106:   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));

108:   /* Post the Isends with the message length-info */
109:   for (i = 0, j = 0; i < size; ++i) {
110:     if (ilengths[i]) {
111:       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j));
112:       j++;
113:     }
114:   }

116:   /* Post waits on sends and receives */
117:   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));

119:   /* Pack up the received data */
120:   PetscCall(PetscMalloc1(nrecvs, onodes));
121:   for (i = 0; i < nrecvs; ++i) {
122:     (*onodes)[i] = w_status[i].MPI_SOURCE;
123: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
124:     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
125:        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
126:        does not put correct value in recv buffer. See also
127:        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
128:        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
129:      */
130:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
131: #endif
132:   }
133:   PetscCall(PetscFree2(r_waits, w_status));
134:   PetscFunctionReturn(PETSC_SUCCESS);
135: }

137: /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
138: PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs)
139: {
140:   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;

142:   PetscFunctionBegin;
143:   PetscCallMPI(MPI_Comm_size(comm, &size));
144:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

146:   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));

148:   /* If iflags not provided, compute iflags from ilengths */
149:   if (!iflags) {
150:     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
151:     iflags_local = iflags_localm;
152:     for (i = 0; i < size; i++) {
153:       if (ilengths[i]) iflags_local[i] = 1;
154:       else iflags_local[i] = 0;
155:     }
156:   } else iflags_local = (PetscMPIInt *)iflags;

158:   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
159:   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
160:   *nrecvs = recv_buf[rank];

162:   PetscCall(PetscFree2(recv_buf, iflags_localm));
163:   PetscFunctionReturn(PETSC_SUCCESS);
164: }

166: /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
167: PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths)
168: {
169:   PetscMPIInt  size, rank, tag, i, j;
170:   MPI_Request *s_waits = NULL, *r_waits = NULL;
171:   MPI_Status  *w_status = NULL;

173:   PetscFunctionBegin;
174:   PetscCallMPI(MPI_Comm_size(comm, &size));
175:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
176:   PetscCall(PetscCommGetNewTag(comm, &tag));

178:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
179:   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
180:   s_waits = r_waits + nrecvs;

182:   /* Post the Irecv to get the message length-info */
183:   PetscCall(PetscMalloc1(nrecvs, olengths));
184:   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));

186:   /* Post the Isends with the message length-info */
187:   for (i = 0, j = 0; i < size; ++i) {
188:     if (ilengths[i]) {
189:       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j));
190:       j++;
191:     }
192:   }

194:   /* Post waits on sends and receives */
195:   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));

197:   /* Pack up the received data */
198:   PetscCall(PetscMalloc1(nrecvs, onodes));
199:   for (i = 0; i < nrecvs; ++i) {
200:     (*onodes)[i] = w_status[i].MPI_SOURCE;
201:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
202:   }
203:   PetscCall(PetscFree2(r_waits, w_status));
204:   PetscFunctionReturn(PETSC_SUCCESS);
205: }

207: /*@C
208:   PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive,
209:   including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()`
210:   except it takes TWO ilenths and output TWO olengths.

212:   Collective

214:   Input Parameters:
215: + comm      - Communicator
216: . nsends    - number of messages that are to be sent.
217: . nrecvs    - number of messages being received
218: . ilengths1 - first array of integers of length sizeof(comm)
219: - ilengths2 - second array of integers of length sizeof(comm)

221:   Output Parameters:
222: + onodes    - list of ranks from which messages are expected
223: . olengths1 - first corresponding message lengths
224: - olengths2 - second  message lengths

226:   Level: developer

228:   Notes:
229:   With this info, the correct `MPI_Irecv()` can be posted with the correct
230:   from-id, with a buffer with the right amount of memory required.

232:   The calling function should `PetscFree()` the memory in `onodes` and `olengths`

234:   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`

236: .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()`
237: @*/
238: PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2)
239: {
240:   PetscMPIInt  size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL;
241:   MPI_Request *s_waits = NULL, *r_waits = NULL;
242:   MPI_Status  *w_status = NULL;

244:   PetscFunctionBegin;
245:   PetscCallMPI(MPI_Comm_size(comm, &size));
246:   PetscCall(PetscCommGetNewTag(comm, &tag));

248:   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
249:   PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status));
250:   s_waits = r_waits + nrecvs;

252:   /* Post the Irecv to get the message length-info */
253:   PetscCall(PetscMalloc1(nrecvs + 1, olengths1));
254:   PetscCall(PetscMalloc1(nrecvs + 1, olengths2));
255:   for (i = 0; i < nrecvs; i++) {
256:     buf_j = buf_r + (2 * i);
257:     PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
258:   }

260:   /* Post the Isends with the message length-info */
261:   for (i = 0, j = 0; i < size; ++i) {
262:     if (ilengths1[i]) {
263:       buf_j    = buf_s + (2 * j);
264:       buf_j[0] = *(ilengths1 + i);
265:       buf_j[1] = *(ilengths2 + i);
266:       PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j));
267:       j++;
268:     }
269:   }
270:   PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends);

272:   /* Post waits on sends and receives */
273:   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));

275:   /* Pack up the received data */
276:   PetscCall(PetscMalloc1(nrecvs + 1, onodes));
277:   for (i = 0; i < nrecvs; ++i) {
278:     (*onodes)[i]    = w_status[i].MPI_SOURCE;
279:     buf_j           = buf_r + (2 * i);
280:     (*olengths1)[i] = buf_j[0];
281:     (*olengths2)[i] = buf_j[1];
282:   }

284:   PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status));
285:   PetscFunctionReturn(PETSC_SUCCESS);
286: }

288: /*
289:   Allocate a buffer sufficient to hold messages of size specified in olengths.
290:   And post Irecvs on these buffers using node info from onodes
291:  */
292: PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits)
293: {
294:   PetscInt   **rbuf_t, i, len = 0;
295:   MPI_Request *r_waits_t;

297:   PetscFunctionBegin;
298:   /* compute memory required for recv buffers */
299:   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */

301:   /* allocate memory for recv buffers */
302:   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
303:   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
304:   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];

306:   /* Post the receives */
307:   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
308:   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i));

310:   *rbuf    = rbuf_t;
311:   *r_waits = r_waits_t;
312:   PetscFunctionReturn(PETSC_SUCCESS);
313: }

315: PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits)
316: {
317:   PetscMPIInt   i;
318:   PetscScalar **rbuf_t;
319:   MPI_Request  *r_waits_t;
320:   PetscInt      len = 0;

322:   PetscFunctionBegin;
323:   /* compute memory required for recv buffers */
324:   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */

326:   /* allocate memory for recv buffers */
327:   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
328:   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
329:   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];

331:   /* Post the receives */
332:   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
333:   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i));

335:   *rbuf    = rbuf_t;
336:   *r_waits = r_waits_t;
337:   PetscFunctionReturn(PETSC_SUCCESS);
338: }