Actual source code: comb.c
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
25: {
26: PetscFunctionBegin;
27: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
28: PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
29: #else
30: PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
31: *request = MPI_REQUEST_NULL;
32: #endif
33: PetscFunctionReturn(PETSC_SUCCESS);
34: }
36: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
38: /*
39: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
40: */
41: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
42: {
43: PetscFunctionBegin;
44: PetscCall(PetscNew(sr));
45: (*sr)->numopsbegin = 0;
46: (*sr)->numopsend = 0;
47: (*sr)->state = STATE_BEGIN;
48: #define MAXOPS 32
49: (*sr)->maxops = MAXOPS;
50: PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
51: #undef MAXOPS
52: (*sr)->comm = comm;
53: (*sr)->request = MPI_REQUEST_NULL;
54: (*sr)->mix = PETSC_FALSE;
55: (*sr)->async = PETSC_FALSE;
56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
57: (*sr)->async = PETSC_TRUE; /* Enable by default */
58: #endif
59: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
60: PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
61: PetscFunctionReturn(PETSC_SUCCESS);
62: }
64: /*
65: This function is the MPI reduction operation used when there is
66: a combination of sums and max in the reduction. The call below to
67: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
68: MPI operator PetscSplitReduction_Op.
69: */
70: MPI_Op PetscSplitReduction_Op = 0;
72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
73: {
74: struct PetscScalarInt {
75: PetscScalar v;
76: PetscInt i;
77: };
78: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
79: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
80: PetscInt i, count = (PetscInt)*cnt;
82: PetscFunctionBegin;
83: if (*datatype != MPIU_SCALAR_INT) {
84: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
85: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
86: }
87: for (i = 0; i < count; i++) {
88: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
89: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
90: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
91: else {
92: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
93: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
94: }
95: }
96: PetscFunctionReturnVoid();
97: }
99: /*@
100: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
102: Collective but not synchronizing
104: Input Parameter:
105: comm - communicator on which split reduction has been queued
107: Level: advanced
109: Note:
110: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
111: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
113: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
114: @*/
115: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
116: {
117: PetscSplitReduction *sr;
119: PetscFunctionBegin;
120: PetscCall(PetscSplitReductionGet(comm, &sr));
121: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
122: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
123: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
124: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
126: MPI_Comm comm = sr->comm;
127: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
129: PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131: if (size == 1) {
132: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133: } else {
134: /* determine if all reductions are sum, max, or min */
135: for (i = 0; i < numops; i++) {
136: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140: }
141: PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142: if (sum_flg + max_flg + min_flg > 1) {
143: sr->mix = PETSC_TRUE;
144: for (i = 0; i < numops; i++) {
145: sr->lvalues_mix[i].v = lvalues[i];
146: sr->lvalues_mix[i].i = reducetype[i];
147: }
148: PetscCall(MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151: } else if (min_flg) {
152: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153: } else {
154: PetscCall(MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155: }
156: }
157: sr->state = STATE_PENDING;
158: sr->numopsend = 0;
159: PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160: } else {
161: PetscCall(PetscSplitReductionApply(sr));
162: }
163: PetscFunctionReturn(PETSC_SUCCESS);
164: }
166: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167: {
168: PetscFunctionBegin;
169: switch (sr->state) {
170: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171: PetscCall(PetscSplitReductionApply(sr));
172: break;
173: case STATE_PENDING:
174: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175: PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176: if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177: sr->state = STATE_END;
178: if (sr->mix) {
179: PetscInt i;
180: for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
181: sr->mix = PETSC_FALSE;
182: }
183: PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
184: break;
185: default:
186: break; /* everything is already done */
187: }
188: PetscFunctionReturn(PETSC_SUCCESS);
189: }
191: /*
192: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
193: */
194: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
195: {
196: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
197: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
199: MPI_Comm comm = sr->comm;
200: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
202: PetscFunctionBegin;
203: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206: if (size == 1) {
207: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208: } else {
209: /* determine if all reductions are sum, max, or min */
210: for (i = 0; i < numops; i++) {
211: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215: }
216: if (sum_flg + max_flg + min_flg > 1) {
217: PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218: for (i = 0; i < numops; i++) {
219: sr->lvalues_mix[i].v = lvalues[i];
220: sr->lvalues_mix[i].i = reducetype[i];
221: }
222: PetscCall(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223: for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226: } else if (min_flg) {
227: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228: } else {
229: PetscCall(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230: }
231: }
232: sr->state = STATE_END;
233: sr->numopsend = 0;
234: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /*
239: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243: struct PetscScalarInt {
244: PetscScalar v;
245: PetscInt i;
246: };
247: PetscInt maxops = sr->maxops, *reducetype = sr->reducetype;
248: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
249: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
250: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
251: void **invecs = sr->invecs;
253: PetscFunctionBegin;
254: sr->maxops = 2 * maxops;
255: PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
256: PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
257: PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
258: PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
259: PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
260: PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
261: PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
262: PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
267: {
268: PetscFunctionBegin;
269: PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
270: PetscCall(PetscFree(sr));
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
276: /*
277: Private routine to delete internal storage when a communicator is freed.
278: This is called by MPI, not by users.
280: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281: it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
284: {
285: PetscFunctionBegin;
286: PetscCallMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
287: PetscCallMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
288: PetscFunctionReturn(PETSC_SUCCESS);
289: }
291: /*
292: PetscSplitReductionGet - Gets the split reduction object from a
293: PETSc vector, creates if it does not exit.
295: */
296: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
297: {
298: PetscMPIInt flag;
300: PetscFunctionBegin;
301: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
302: /*
303: The calling sequence of the 2nd argument to this function changed
304: between MPI Standard 1.0 and the revisions 1.1 Here we match the
305: new standard, if you are using an MPI implementation that uses
306: the older version you will get a warning message about the next line;
307: it is only a warning message and should do no harm.
308: */
309: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
310: }
311: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
312: if (!flag) { /* doesn't exist yet so create it and put it in */
313: PetscCall(PetscSplitReductionCreate(comm, sr));
314: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
315: PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
316: }
317: PetscFunctionReturn(PETSC_SUCCESS);
318: }
320: /* ----------------------------------------------------------------------------------------------------*/
322: /*@
323: VecDotBegin - Starts a split phase dot product computation.
325: Input Parameters:
326: + x - the first vector
327: . y - the second vector
328: - result - where the result will go (can be NULL)
330: Level: advanced
332: Notes:
333: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
335: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
336: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
337: @*/
338: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
339: {
340: PetscSplitReduction *sr;
341: MPI_Comm comm;
343: PetscFunctionBegin;
346: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
347: PetscCall(PetscSplitReductionGet(comm, &sr));
348: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
349: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
350: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
351: sr->invecs[sr->numopsbegin] = (void *)x;
352: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
353: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
354: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
358: /*@
359: VecDotEnd - Ends a split phase dot product computation.
361: Input Parameters:
362: + x - the first vector (can be `NULL`)
363: . y - the second vector (can be `NULL`)
364: - result - where the result will go
366: Level: advanced
368: Notes:
369: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
371: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
372: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
373: @*/
374: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
375: {
376: PetscSplitReduction *sr;
377: MPI_Comm comm;
379: PetscFunctionBegin;
380: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
381: PetscCall(PetscSplitReductionGet(comm, &sr));
382: PetscCall(PetscSplitReductionEnd(sr));
384: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
385: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
386: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
387: *result = sr->gvalues[sr->numopsend++];
389: /*
390: We are finished getting all the results so reset to no outstanding requests
391: */
392: if (sr->numopsend == sr->numopsbegin) {
393: sr->state = STATE_BEGIN;
394: sr->numopsend = 0;
395: sr->numopsbegin = 0;
396: sr->mix = PETSC_FALSE;
397: }
398: PetscFunctionReturn(PETSC_SUCCESS);
399: }
401: /*@
402: VecTDotBegin - Starts a split phase transpose dot product computation.
404: Input Parameters:
405: + x - the first vector
406: . y - the second vector
407: - result - where the result will go (can be `NULL`)
409: Level: advanced
411: Notes:
412: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
414: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
415: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
416: @*/
417: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
418: {
419: PetscSplitReduction *sr;
420: MPI_Comm comm;
422: PetscFunctionBegin;
423: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
424: PetscCall(PetscSplitReductionGet(comm, &sr));
425: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
426: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
427: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
428: sr->invecs[sr->numopsbegin] = (void *)x;
429: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
430: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
431: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }
435: /*@
436: VecTDotEnd - Ends a split phase transpose dot product computation.
438: Input Parameters:
439: + x - the first vector (can be `NULL`)
440: . y - the second vector (can be `NULL`)
441: - result - where the result will go
443: Level: advanced
445: Notes:
446: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
448: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
449: `VecDotBegin()`, `VecDotEnd()`
450: @*/
451: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
452: {
453: PetscFunctionBegin;
454: /*
455: TDotEnd() is the same as DotEnd() so reuse the code
456: */
457: PetscCall(VecDotEnd(x, y, result));
458: PetscFunctionReturn(PETSC_SUCCESS);
459: }
461: /* -------------------------------------------------------------------------*/
463: /*@
464: VecNormBegin - Starts a split phase norm computation.
466: Input Parameters:
467: + x - the first vector
468: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
469: - result - where the result will go (can be `NULL`)
471: Level: advanced
473: Notes:
474: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
476: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
477: @*/
478: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
479: {
480: PetscSplitReduction *sr;
481: PetscReal lresult[2];
482: MPI_Comm comm;
484: PetscFunctionBegin;
486: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
487: PetscCall(PetscSplitReductionGet(comm, &sr));
488: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
489: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
491: sr->invecs[sr->numopsbegin] = (void *)x;
492: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
493: PetscUseTypeMethod(x, norm_local, ntype, lresult);
494: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
495: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
496: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
497: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
498: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
499: sr->lvalues[sr->numopsbegin++] = lresult[0];
500: if (ntype == NORM_1_AND_2) {
501: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
502: sr->lvalues[sr->numopsbegin++] = lresult[1];
503: }
504: PetscFunctionReturn(PETSC_SUCCESS);
505: }
507: /*@
508: VecNormEnd - Ends a split phase norm computation.
510: Input Parameters:
511: + x - the first vector
512: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
513: - result - where the result will go
515: Level: advanced
517: Notes:
518: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
520: The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value
522: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
523: @*/
524: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
525: {
526: PetscSplitReduction *sr;
527: MPI_Comm comm;
529: PetscFunctionBegin;
531: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
532: PetscCall(PetscSplitReductionGet(comm, &sr));
533: PetscCall(PetscSplitReductionEnd(sr));
535: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
536: PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
537: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
538: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
540: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
541: else if (ntype == NORM_1_AND_2) {
542: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
543: result[1] = PetscSqrtReal(result[1]);
544: }
545: if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
547: if (sr->numopsend == sr->numopsbegin) {
548: sr->state = STATE_BEGIN;
549: sr->numopsend = 0;
550: sr->numopsbegin = 0;
551: }
552: PetscFunctionReturn(PETSC_SUCCESS);
553: }
555: /*
556: Possibly add
558: PetscReductionSumBegin/End()
559: PetscReductionMaxBegin/End()
560: PetscReductionMinBegin/End()
561: or have more like MPI with a single function with flag for Op? Like first better
562: */
564: /*@
565: VecMDotBegin - Starts a split phase multiple dot product computation.
567: Input Parameters:
568: + x - the first vector
569: . nv - number of vectors
570: . y - array of vectors
571: - result - where the result will go (can be `NULL`)
573: Level: advanced
575: Notes:
576: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
578: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
579: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
580: @*/
581: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
582: {
583: PetscSplitReduction *sr;
584: MPI_Comm comm;
585: PetscInt i;
587: PetscFunctionBegin;
588: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
589: PetscCall(PetscSplitReductionGet(comm, &sr));
590: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
591: for (i = 0; i < nv; i++) {
592: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
593: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
594: sr->invecs[sr->numopsbegin + i] = (void *)x;
595: }
596: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
597: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
598: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
599: sr->numopsbegin += nv;
600: PetscFunctionReturn(PETSC_SUCCESS);
601: }
603: /*@
604: VecMDotEnd - Ends a split phase multiple dot product computation.
606: Input Parameters:
607: + x - the first vector (can be `NULL`)
608: . nv - number of vectors
609: - y - array of vectors (can be `NULL`)
611: Output Parameter:
612: . result - where the result will go
614: Level: advanced
616: Notes:
617: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
619: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
620: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
621: @*/
622: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
623: {
624: PetscSplitReduction *sr;
625: MPI_Comm comm;
626: PetscInt i;
628: PetscFunctionBegin;
629: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
630: PetscCall(PetscSplitReductionGet(comm, &sr));
631: PetscCall(PetscSplitReductionEnd(sr));
633: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
634: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
635: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
636: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
638: /*
639: We are finished getting all the results so reset to no outstanding requests
640: */
641: if (sr->numopsend == sr->numopsbegin) {
642: sr->state = STATE_BEGIN;
643: sr->numopsend = 0;
644: sr->numopsbegin = 0;
645: }
646: PetscFunctionReturn(PETSC_SUCCESS);
647: }
649: /*@
650: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
652: Input Parameters:
653: + x - the first vector
654: . nv - number of vectors
655: . y - array of vectors
656: - result - where the result will go (can be `NULL`)
658: Level: advanced
660: Notes:
661: Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.
663: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
664: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
665: @*/
666: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
667: {
668: PetscSplitReduction *sr;
669: MPI_Comm comm;
670: PetscInt i;
672: PetscFunctionBegin;
673: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
674: PetscCall(PetscSplitReductionGet(comm, &sr));
675: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
676: for (i = 0; i < nv; i++) {
677: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
678: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
679: sr->invecs[sr->numopsbegin + i] = (void *)x;
680: }
681: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
682: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
683: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
684: sr->numopsbegin += nv;
685: PetscFunctionReturn(PETSC_SUCCESS);
686: }
688: /*@
689: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
691: Input Parameters:
692: + x - the first vector (can be `NULL`)
693: . nv - number of vectors
694: - y - array of vectors (can be `NULL`)
696: Output Parameter:
697: . result - where the result will go
699: Level: advanced
701: Notes:
702: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
704: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
705: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
706: @*/
707: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
708: {
709: PetscFunctionBegin;
710: /*
711: MTDotEnd() is the same as MDotEnd() so reuse the code
712: */
713: PetscCall(VecMDotEnd(x, nv, y, result));
714: PetscFunctionReturn(PETSC_SUCCESS);
715: }