Actual source code: comb.c


  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
 25: {
 26:   PetscFunctionBegin;
 27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 28:   PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
 29: #else
 30:   PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
 31:   *request = MPI_REQUEST_NULL;
 32: #endif
 33:   PetscFunctionReturn(PETSC_SUCCESS);
 34: }

 36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);

 38: /*
 39:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 40: */
 41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
 42: {
 43:   PetscFunctionBegin;
 44:   PetscCall(PetscNew(sr));
 45:   (*sr)->numopsbegin = 0;
 46:   (*sr)->numopsend   = 0;
 47:   (*sr)->state       = STATE_BEGIN;
 48: #define MAXOPS 32
 49:   (*sr)->maxops = MAXOPS;
 50:   PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
 51: #undef MAXOPS
 52:   (*sr)->comm    = comm;
 53:   (*sr)->request = MPI_REQUEST_NULL;
 54:   (*sr)->mix     = PETSC_FALSE;
 55:   (*sr)->async   = PETSC_FALSE;
 56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 57:   (*sr)->async = PETSC_TRUE; /* Enable by default */
 58: #endif
 59:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 60:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
 61:   PetscFunctionReturn(PETSC_SUCCESS);
 62: }

 64: /*
 65:        This function is the MPI reduction operation used when there is
 66:    a combination of sums and max in the reduction. The call below to
 67:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 68:    MPI operator PetscSplitReduction_Op.
 69: */
 70: MPI_Op PetscSplitReduction_Op = 0;

 72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 73: {
 74:   struct PetscScalarInt {
 75:     PetscScalar v;
 76:     PetscInt    i;
 77:   };
 78:   struct PetscScalarInt *xin  = (struct PetscScalarInt *)in;
 79:   struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
 80:   PetscInt               i, count = (PetscInt)*cnt;

 82:   PetscFunctionBegin;
 83:   if (*datatype != MPIU_SCALAR_INT) {
 84:     PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
 85:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 86:   }
 87:   for (i = 0; i < count; i++) {
 88:     if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 89:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 90:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 91:     else {
 92:       PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
 93:       PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 94:     }
 95:   }
 96:   PetscFunctionReturnVoid();
 97: }

 99: /*@
100:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

102:    Collective but not synchronizing

104:    Input Parameter:
105:    comm - communicator on which split reduction has been queued

107:    Level: advanced

109:    Note:
110:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
111:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

113: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
114: @*/
115: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
116: {
117:   PetscSplitReduction *sr;

119:   PetscFunctionBegin;
120:   PetscCall(PetscSplitReductionGet(comm, &sr));
121:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
122:   if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
123:     PetscInt     i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
124:     PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125:     PetscInt     sum_flg = 0, max_flg = 0, min_flg = 0;
126:     MPI_Comm     comm = sr->comm;
127:     PetscMPIInt  size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

129:     PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130:     PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131:     if (size == 1) {
132:       PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133:     } else {
134:       /* determine if all reductions are sum, max, or min */
135:       for (i = 0; i < numops; i++) {
136:         if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139:         else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140:       }
141:       PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142:       if (sum_flg + max_flg + min_flg > 1) {
143:         sr->mix = PETSC_TRUE;
144:         for (i = 0; i < numops; i++) {
145:           sr->lvalues_mix[i].v = lvalues[i];
146:           sr->lvalues_mix[i].i = reducetype[i];
147:         }
148:         PetscCall(MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149:       } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150:         PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151:       } else if (min_flg) {
152:         PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153:       } else {
154:         PetscCall(MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155:       }
156:     }
157:     sr->state     = STATE_PENDING;
158:     sr->numopsend = 0;
159:     PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160:   } else {
161:     PetscCall(PetscSplitReductionApply(sr));
162:   }
163:   PetscFunctionReturn(PETSC_SUCCESS);
164: }

166: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167: {
168:   PetscFunctionBegin;
169:   switch (sr->state) {
170:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171:     PetscCall(PetscSplitReductionApply(sr));
172:     break;
173:   case STATE_PENDING:
174:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175:     PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176:     if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177:     sr->state = STATE_END;
178:     if (sr->mix) {
179:       PetscInt i;
180:       for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
181:       sr->mix = PETSC_FALSE;
182:     }
183:     PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
184:     break;
185:   default:
186:     break; /* everything is already done */
187:   }
188:   PetscFunctionReturn(PETSC_SUCCESS);
189: }

191: /*
192:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
193: */
194: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
195: {
196:   PetscInt     i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
197:   PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198:   PetscInt     sum_flg = 0, max_flg = 0, min_flg = 0;
199:   MPI_Comm     comm = sr->comm;
200:   PetscMPIInt  size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

202:   PetscFunctionBegin;
203:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204:   PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205:   PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206:   if (size == 1) {
207:     PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208:   } else {
209:     /* determine if all reductions are sum, max, or min */
210:     for (i = 0; i < numops; i++) {
211:       if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215:     }
216:     if (sum_flg + max_flg + min_flg > 1) {
217:       PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218:       for (i = 0; i < numops; i++) {
219:         sr->lvalues_mix[i].v = lvalues[i];
220:         sr->lvalues_mix[i].i = reducetype[i];
221:       }
222:       PetscCall(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223:       for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224:     } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225:       PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226:     } else if (min_flg) {
227:       PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228:     } else {
229:       PetscCall(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230:     }
231:   }
232:   sr->state     = STATE_END;
233:   sr->numopsend = 0;
234:   PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }

238: /*
239:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243:   struct PetscScalarInt {
244:     PetscScalar v;
245:     PetscInt    i;
246:   };
247:   PetscInt               maxops = sr->maxops, *reducetype = sr->reducetype;
248:   PetscScalar           *lvalues = sr->lvalues, *gvalues = sr->gvalues;
249:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
250:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
251:   void                 **invecs      = sr->invecs;

253:   PetscFunctionBegin;
254:   sr->maxops = 2 * maxops;
255:   PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
256:   PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
257:   PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
258:   PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
259:   PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
260:   PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
261:   PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
262:   PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
263:   PetscFunctionReturn(PETSC_SUCCESS);
264: }

266: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
267: {
268:   PetscFunctionBegin;
269:   PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
270:   PetscCall(PetscFree(sr));
271:   PetscFunctionReturn(PETSC_SUCCESS);
272: }

274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

276: /*
277:    Private routine to delete internal storage when a communicator is freed.
278:   This is called by MPI, not by users.

280:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281:   it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
284: {
285:   PetscFunctionBegin;
286:   PetscCallMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
287:   PetscCallMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
288:   PetscFunctionReturn(PETSC_SUCCESS);
289: }

291: /*
292:      PetscSplitReductionGet - Gets the split reduction object from a
293:         PETSc vector, creates if it does not exit.

295: */
296: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
297: {
298:   PetscMPIInt flag;

300:   PetscFunctionBegin;
301:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
302:     /*
303:        The calling sequence of the 2nd argument to this function changed
304:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
305:        new standard, if you are using an MPI implementation that uses
306:        the older version you will get a warning message about the next line;
307:        it is only a warning message and should do no harm.
308:     */
309:     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
310:   }
311:   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
312:   if (!flag) { /* doesn't exist yet so create it and put it in */
313:     PetscCall(PetscSplitReductionCreate(comm, sr));
314:     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
315:     PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
316:   }
317:   PetscFunctionReturn(PETSC_SUCCESS);
318: }

320: /* ----------------------------------------------------------------------------------------------------*/

322: /*@
323:    VecDotBegin - Starts a split phase dot product computation.

325:    Input Parameters:
326: +   x - the first vector
327: .   y - the second vector
328: -   result - where the result will go (can be NULL)

330:    Level: advanced

332:    Notes:
333:    Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

335: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
336:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
337: @*/
338: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
339: {
340:   PetscSplitReduction *sr;
341:   MPI_Comm             comm;

343:   PetscFunctionBegin;
346:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
347:   PetscCall(PetscSplitReductionGet(comm, &sr));
348:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
349:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
350:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
351:   sr->invecs[sr->numopsbegin]     = (void *)x;
352:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
353:   PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
354:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
355:   PetscFunctionReturn(PETSC_SUCCESS);
356: }

358: /*@
359:    VecDotEnd - Ends a split phase dot product computation.

361:    Input Parameters:
362: +  x - the first vector (can be `NULL`)
363: .  y - the second vector (can be `NULL`)
364: -  result - where the result will go

366:    Level: advanced

368:    Notes:
369:    Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

371: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
372:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
373: @*/
374: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
375: {
376:   PetscSplitReduction *sr;
377:   MPI_Comm             comm;

379:   PetscFunctionBegin;
380:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
381:   PetscCall(PetscSplitReductionGet(comm, &sr));
382:   PetscCall(PetscSplitReductionEnd(sr));

384:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
385:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
386:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
387:   *result = sr->gvalues[sr->numopsend++];

389:   /*
390:      We are finished getting all the results so reset to no outstanding requests
391:   */
392:   if (sr->numopsend == sr->numopsbegin) {
393:     sr->state       = STATE_BEGIN;
394:     sr->numopsend   = 0;
395:     sr->numopsbegin = 0;
396:     sr->mix         = PETSC_FALSE;
397:   }
398:   PetscFunctionReturn(PETSC_SUCCESS);
399: }

401: /*@
402:    VecTDotBegin - Starts a split phase transpose dot product computation.

404:    Input Parameters:
405: +  x - the first vector
406: .  y - the second vector
407: -  result - where the result will go (can be `NULL`)

409:    Level: advanced

411:    Notes:
412:    Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

414: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
415:           `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
416: @*/
417: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
418: {
419:   PetscSplitReduction *sr;
420:   MPI_Comm             comm;

422:   PetscFunctionBegin;
423:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
424:   PetscCall(PetscSplitReductionGet(comm, &sr));
425:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
426:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
427:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
428:   sr->invecs[sr->numopsbegin]     = (void *)x;
429:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
430:   PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
431:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }

435: /*@
436:    VecTDotEnd - Ends a split phase transpose dot product computation.

438:    Input Parameters:
439: +  x - the first vector (can be `NULL`)
440: .  y - the second vector (can be `NULL`)
441: -  result - where the result will go

443:    Level: advanced

445:    Notes:
446:    Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

448: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
449:           `VecDotBegin()`, `VecDotEnd()`
450: @*/
451: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
452: {
453:   PetscFunctionBegin;
454:   /*
455:       TDotEnd() is the same as DotEnd() so reuse the code
456:   */
457:   PetscCall(VecDotEnd(x, y, result));
458:   PetscFunctionReturn(PETSC_SUCCESS);
459: }

461: /* -------------------------------------------------------------------------*/

463: /*@
464:    VecNormBegin - Starts a split phase norm computation.

466:    Input Parameters:
467: +  x - the first vector
468: .  ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
469: -  result - where the result will go (can be `NULL`)

471:    Level: advanced

473:    Notes:
474:    Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

476: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
477: @*/
478: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
479: {
480:   PetscSplitReduction *sr;
481:   PetscReal            lresult[2];
482:   MPI_Comm             comm;

484:   PetscFunctionBegin;
486:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
487:   PetscCall(PetscSplitReductionGet(comm, &sr));
488:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
489:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));

491:   sr->invecs[sr->numopsbegin] = (void *)x;
492:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
493:   PetscUseTypeMethod(x, norm_local, ntype, lresult);
494:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
495:   if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
496:   if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
497:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
498:   else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
499:   sr->lvalues[sr->numopsbegin++] = lresult[0];
500:   if (ntype == NORM_1_AND_2) {
501:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
502:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
503:   }
504:   PetscFunctionReturn(PETSC_SUCCESS);
505: }

507: /*@
508:    VecNormEnd - Ends a split phase norm computation.

510:    Input Parameters:
511: +  x - the first vector
512: .  ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
513: -  result - where the result will go

515:    Level: advanced

517:    Notes:
518:    Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

520:    The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value

522: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
523: @*/
524: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
525: {
526:   PetscSplitReduction *sr;
527:   MPI_Comm             comm;

529:   PetscFunctionBegin;
531:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
532:   PetscCall(PetscSplitReductionGet(comm, &sr));
533:   PetscCall(PetscSplitReductionEnd(sr));

535:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
536:   PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
537:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
538:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

540:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
541:   else if (ntype == NORM_1_AND_2) {
542:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
543:     result[1] = PetscSqrtReal(result[1]);
544:   }
545:   if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));

547:   if (sr->numopsend == sr->numopsbegin) {
548:     sr->state       = STATE_BEGIN;
549:     sr->numopsend   = 0;
550:     sr->numopsbegin = 0;
551:   }
552:   PetscFunctionReturn(PETSC_SUCCESS);
553: }

555: /*
556:    Possibly add

558:      PetscReductionSumBegin/End()
559:      PetscReductionMaxBegin/End()
560:      PetscReductionMinBegin/End()
561:    or have more like MPI with a single function with flag for Op? Like first better
562: */

564: /*@
565:    VecMDotBegin - Starts a split phase multiple dot product computation.

567:    Input Parameters:
568: +   x - the first vector
569: .   nv - number of vectors
570: .   y - array of vectors
571: -   result - where the result will go (can be `NULL`)

573:    Level: advanced

575:    Notes:
576:    Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

578: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
579:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
580: @*/
581: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
582: {
583:   PetscSplitReduction *sr;
584:   MPI_Comm             comm;
585:   PetscInt             i;

587:   PetscFunctionBegin;
588:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
589:   PetscCall(PetscSplitReductionGet(comm, &sr));
590:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
591:   for (i = 0; i < nv; i++) {
592:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
593:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
594:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
595:   }
596:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
597:   PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
598:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
599:   sr->numopsbegin += nv;
600:   PetscFunctionReturn(PETSC_SUCCESS);
601: }

603: /*@
604:    VecMDotEnd - Ends a split phase multiple dot product computation.

606:    Input Parameters:
607: +   x - the first vector (can be `NULL`)
608: .   nv - number of vectors
609: -   y - array of vectors (can be `NULL`)

611:    Output Parameter:
612: .   result - where the result will go

614:    Level: advanced

616:    Notes:
617:    Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

619: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
620:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
621: @*/
622: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
623: {
624:   PetscSplitReduction *sr;
625:   MPI_Comm             comm;
626:   PetscInt             i;

628:   PetscFunctionBegin;
629:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
630:   PetscCall(PetscSplitReductionGet(comm, &sr));
631:   PetscCall(PetscSplitReductionEnd(sr));

633:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
634:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
635:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
636:   for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];

638:   /*
639:      We are finished getting all the results so reset to no outstanding requests
640:   */
641:   if (sr->numopsend == sr->numopsbegin) {
642:     sr->state       = STATE_BEGIN;
643:     sr->numopsend   = 0;
644:     sr->numopsbegin = 0;
645:   }
646:   PetscFunctionReturn(PETSC_SUCCESS);
647: }

649: /*@
650:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

652:    Input Parameters:
653: +  x - the first vector
654: .  nv - number of vectors
655: .  y - array of  vectors
656: -  result - where the result will go (can be `NULL`)

658:    Level: advanced

660:    Notes:
661:    Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.

663: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
664:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
665: @*/
666: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
667: {
668:   PetscSplitReduction *sr;
669:   MPI_Comm             comm;
670:   PetscInt             i;

672:   PetscFunctionBegin;
673:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
674:   PetscCall(PetscSplitReductionGet(comm, &sr));
675:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
676:   for (i = 0; i < nv; i++) {
677:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
678:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
679:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
680:   }
681:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
682:   PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
683:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
684:   sr->numopsbegin += nv;
685:   PetscFunctionReturn(PETSC_SUCCESS);
686: }

688: /*@
689:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

691:    Input Parameters:
692: +  x - the first vector (can be `NULL`)
693: .  nv - number of vectors
694: -  y - array of  vectors (can be `NULL`)

696:    Output Parameter:
697: .  result - where the result will go

699:    Level: advanced

701:    Notes:
702:    Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

704: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
705:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
706: @*/
707: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
708: {
709:   PetscFunctionBegin;
710:   /*
711:       MTDotEnd() is the same as MDotEnd() so reuse the code
712:   */
713:   PetscCall(VecMDotEnd(x, nv, y, result));
714:   PetscFunctionReturn(PETSC_SUCCESS);
715: }