Actual source code: psort.c
2: #include <petsc/private/petscimpl.h>
3: #include <petscis.h>
5: /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6: static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7: {
8: PetscInt diff;
9: PetscInt split, mid, partner;
11: PetscFunctionBegin;
12: diff = rankEnd - rankStart;
13: if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
14: if (diff == 1) {
15: if (forward) {
16: PetscCall(PetscSortInt((PetscInt)n, keys));
17: } else {
18: PetscCall(PetscSortReverseInt((PetscInt)n, keys));
19: }
20: PetscFunctionReturn(PETSC_SUCCESS);
21: }
22: split = 1;
23: while (2 * split < diff) split *= 2;
24: mid = rankStart + split;
25: if (rank < mid) {
26: partner = rank + split;
27: } else {
28: partner = rank - split;
29: }
30: if (partner < rankEnd) {
31: PetscMPIInt i;
33: PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
34: if ((rank < partner) == (forward == PETSC_TRUE)) {
35: for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
36: } else {
37: for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
38: }
39: }
40: /* divide and conquer */
41: if (rank < mid) {
42: PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
43: } else {
44: PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
45: }
46: PetscFunctionReturn(PETSC_SUCCESS);
47: }
49: /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
50: static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
51: {
52: PetscInt diff;
53: PetscInt mid;
55: PetscFunctionBegin;
56: diff = rankEnd - rankStart;
57: if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
58: if (diff == 1) {
59: if (forward) {
60: PetscCall(PetscSortInt(n, keys));
61: } else {
62: PetscCall(PetscSortReverseInt(n, keys));
63: }
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
66: mid = rankStart + diff / 2;
67: /* divide and conquer */
68: if (rank < mid) {
69: PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
70: } else {
71: PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
72: }
73: /* bitonic merge */
74: PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
79: {
80: PetscMPIInt size, rank, tag, mpin;
81: PetscInt *buffer;
83: PetscFunctionBegin;
85: PetscCall(PetscCommGetNewTag(comm, &tag));
86: PetscCallMPI(MPI_Comm_size(comm, &size));
87: PetscCallMPI(MPI_Comm_rank(comm, &rank));
88: PetscCall(PetscMPIIntCast(n, &mpin));
89: PetscCall(PetscMalloc1(n, &buffer));
90: PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
91: PetscCall(PetscFree(buffer));
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
96: {
97: PetscMPIInt size, rank;
98: PetscInt *pivots, *finalpivots, i;
99: PetscInt non_empty, my_first, count;
100: PetscMPIInt *keys_per, max_keys_per;
102: PetscFunctionBegin;
103: PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
104: PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
106: /* Choose P - 1 pivots that would be ideal for the distribution on this process */
107: PetscCall(PetscMalloc1(size - 1, &pivots));
108: for (i = 0; i < size - 1; i++) {
109: PetscInt index;
111: if (!mapin->n) {
112: /* if this rank is empty, put "infinity" in the list. each process knows how many empty
113: * processes are in the layout, so those values will be ignored from the end of the sorted
114: * pivots */
115: pivots[i] = PETSC_MAX_INT;
116: } else {
117: /* match the distribution to the desired output described by mapout by getting the index
118: * that is approximately the appropriate fraction through the list */
119: index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
120: index = PetscMin(index, (mapin->n - 1));
121: index = PetscMax(index, 0);
122: pivots[i] = keysin[index];
123: }
124: }
125: /* sort the pivots in parallel */
126: PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
127: if (PetscDefined(USE_DEBUG)) {
128: PetscBool sorted;
130: PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
131: PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
132: }
134: /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
135: * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
136: non_empty = size;
137: for (i = 0; i < size; i++)
138: if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
139: PetscCall(PetscCalloc1(size + 1, &keys_per));
140: my_first = -1;
141: if (non_empty) {
142: for (i = 0; i < size - 1; i++) {
143: size_t sample = (i + 1) * non_empty - 1;
144: size_t sample_rank = sample / (size - 1);
146: keys_per[sample_rank]++;
147: if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
148: }
149: }
150: for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
151: PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
152: /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
153: * and allgather them */
154: for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
155: for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
156: PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
157: for (i = 0, count = 0; i < size; i++) {
158: PetscInt j;
160: for (j = 0; j < max_keys_per; j++) {
161: if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
162: }
163: }
164: *outpivots = finalpivots;
165: PetscCall(PetscFree(keys_per));
166: PetscCall(PetscFree(pivots));
167: PetscFunctionReturn(PETSC_SUCCESS);
168: }
170: static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
171: {
172: PetscMPIInt size, rank;
173: PetscInt myOffset, nextOffset;
174: PetscInt i;
175: PetscMPIInt total, filled;
176: PetscMPIInt sender, nfirst, nsecond;
177: PetscMPIInt firsttag, secondtag;
178: MPI_Request firstreqrcv;
179: MPI_Request *firstreqs;
180: MPI_Request *secondreqs;
181: MPI_Status firststatus;
183: PetscFunctionBegin;
184: PetscCallMPI(MPI_Comm_size(map->comm, &size));
185: PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
186: PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
187: PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
188: myOffset = 0;
189: PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
190: PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
191: myOffset = nextOffset - n;
192: total = map->range[rank + 1] - map->range[rank];
193: if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
194: for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
195: PetscInt itotal;
196: PetscInt overlap, oStart, oEnd;
198: itotal = map->range[i + 1] - map->range[i];
199: if (itotal <= 0) continue;
200: oStart = PetscMax(myOffset, map->range[i]);
201: oEnd = PetscMin(nextOffset, map->range[i + 1]);
202: overlap = oEnd - oStart;
203: if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
204: /* send first message */
205: PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++])));
206: } else if (overlap > 0) {
207: /* send second message */
208: PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
209: } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
210: /* send empty second message */
211: PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
212: }
213: }
214: filled = 0;
215: sender = -1;
216: if (total > 0) {
217: PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus));
218: sender = firststatus.MPI_SOURCE;
219: PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled));
220: }
221: while (filled < total) {
222: PetscMPIInt mfilled;
223: MPI_Status stat;
225: sender++;
226: PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat));
227: PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled));
228: filled += mfilled;
229: }
230: PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
231: PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
232: PetscCall(PetscFree2(firstreqs, secondreqs));
233: PetscFunctionReturn(PETSC_SUCCESS);
234: }
236: static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
237: {
238: PetscMPIInt size, rank;
239: PetscInt *pivots = NULL, *buffer;
240: PetscInt i, j;
241: PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
243: PetscFunctionBegin;
244: PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
245: PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
246: PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
247: /* sort locally */
248: PetscCall(PetscSortInt(mapin->n, keysin));
249: /* get P - 1 pivots */
250: PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
251: /* determine which entries in the sorted array go to which other processes based on the pivots */
252: for (i = 0, j = 0; i < size - 1; i++) {
253: PetscInt prev = j;
255: while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
256: offsets_snd[i] = prev;
257: keys_per_snd[i] = j - prev;
258: }
259: offsets_snd[size - 1] = j;
260: keys_per_snd[size - 1] = mapin->n - j;
261: offsets_snd[size] = mapin->n;
262: /* get the incoming sizes */
263: PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
264: offsets_rcv[0] = 0;
265: for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
266: nrecv = offsets_rcv[size];
267: /* all to all exchange */
268: PetscCall(PetscMalloc1(nrecv, &buffer));
269: PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
270: PetscCall(PetscFree(pivots));
271: PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));
273: /* local sort */
274: PetscCall(PetscSortInt(nrecv, buffer));
275: #if defined(PETSC_USE_DEBUG)
276: {
277: PetscBool sorted;
279: PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
280: PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
281: }
282: #endif
284: /* redistribute to the desired order */
285: PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
286: PetscCall(PetscFree(buffer));
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }
290: /*@
291: PetscParallelSortInt - Globally sort a distributed array of integers
293: Collective
295: Input Parameters:
296: + mapin - `PetscLayout` describing the distribution of the input keys
297: . mapout - `PetscLayout` describing the desired distribution of the output keys
298: - keysin - the pre-sorted array of integers
300: Output Parameter:
301: . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout.
303: Level: developer
305: Notes:
306: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
307: .vb
308: - sorts locally
309: - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
310: - using to the pivots to repartition the keys by all-to-all exchange
311: - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
312: - redistributing to match the mapout layout
313: .ve
315: If keysin != keysout, then keysin will not be changed during `PetscParallelSortInt()`.
317: .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
318: @*/
319: PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
320: {
321: PetscMPIInt size;
322: PetscMPIInt result;
323: PetscInt *keysincopy = NULL;
325: PetscFunctionBegin;
328: PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
329: PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
330: PetscCall(PetscLayoutSetUp(mapin));
331: PetscCall(PetscLayoutSetUp(mapout));
334: PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
335: PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
336: if (size == 1) {
337: if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt)));
338: PetscCall(PetscSortInt(mapout->n, keysout));
339: if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
340: }
341: if (keysout != keysin) {
342: PetscCall(PetscMalloc1(mapin->n, &keysincopy));
343: PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt)));
344: keysin = keysincopy;
345: }
346: PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
347: #if defined(PETSC_USE_DEBUG)
348: {
349: PetscBool sorted;
351: PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
352: PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
353: }
354: #endif
355: PetscCall(PetscFree(keysincopy));
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }