Actual source code: psort.c


  2: #include <petsc/private/petscimpl.h>
  3: #include <petscis.h>

  5: /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
  6: static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
  7: {
  8:   PetscInt diff;
  9:   PetscInt split, mid, partner;

 11:   PetscFunctionBegin;
 12:   diff = rankEnd - rankStart;
 13:   if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
 14:   if (diff == 1) {
 15:     if (forward) {
 16:       PetscCall(PetscSortInt((PetscInt)n, keys));
 17:     } else {
 18:       PetscCall(PetscSortReverseInt((PetscInt)n, keys));
 19:     }
 20:     PetscFunctionReturn(PETSC_SUCCESS);
 21:   }
 22:   split = 1;
 23:   while (2 * split < diff) split *= 2;
 24:   mid = rankStart + split;
 25:   if (rank < mid) {
 26:     partner = rank + split;
 27:   } else {
 28:     partner = rank - split;
 29:   }
 30:   if (partner < rankEnd) {
 31:     PetscMPIInt i;

 33:     PetscCallMPI(MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE));
 34:     if ((rank < partner) == (forward == PETSC_TRUE)) {
 35:       for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
 36:     } else {
 37:       for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
 38:     }
 39:   }
 40:   /* divide and conquer */
 41:   if (rank < mid) {
 42:     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward));
 43:   } else {
 44:     PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
 45:   }
 46:   PetscFunctionReturn(PETSC_SUCCESS);
 47: }

 49: /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
 50: static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
 51: {
 52:   PetscInt diff;
 53:   PetscInt mid;

 55:   PetscFunctionBegin;
 56:   diff = rankEnd - rankStart;
 57:   if (diff <= 0) PetscFunctionReturn(PETSC_SUCCESS);
 58:   if (diff == 1) {
 59:     if (forward) {
 60:       PetscCall(PetscSortInt(n, keys));
 61:     } else {
 62:       PetscCall(PetscSortReverseInt(n, keys));
 63:     }
 64:     PetscFunctionReturn(PETSC_SUCCESS);
 65:   }
 66:   mid = rankStart + diff / 2;
 67:   /* divide and conquer */
 68:   if (rank < mid) {
 69:     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward));
 70:   } else {
 71:     PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward));
 72:   }
 73:   /* bitonic merge */
 74:   PetscCall(PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
 79: {
 80:   PetscMPIInt size, rank, tag, mpin;
 81:   PetscInt   *buffer;

 83:   PetscFunctionBegin;
 85:   PetscCall(PetscCommGetNewTag(comm, &tag));
 86:   PetscCallMPI(MPI_Comm_size(comm, &size));
 87:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 88:   PetscCall(PetscMPIIntCast(n, &mpin));
 89:   PetscCall(PetscMalloc1(n, &buffer));
 90:   PetscCall(PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE));
 91:   PetscCall(PetscFree(buffer));
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
 96: {
 97:   PetscMPIInt  size, rank;
 98:   PetscInt    *pivots, *finalpivots, i;
 99:   PetscInt     non_empty, my_first, count;
100:   PetscMPIInt *keys_per, max_keys_per;

102:   PetscFunctionBegin;
103:   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
104:   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));

106:   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
107:   PetscCall(PetscMalloc1(size - 1, &pivots));
108:   for (i = 0; i < size - 1; i++) {
109:     PetscInt index;

111:     if (!mapin->n) {
112:       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
113:        * processes are in the layout, so those values will be ignored from the end of the sorted
114:        * pivots */
115:       pivots[i] = PETSC_MAX_INT;
116:     } else {
117:       /* match the distribution to the desired output described by mapout by getting the index
118:        * that is approximately the appropriate fraction through the list */
119:       index     = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
120:       index     = PetscMin(index, (mapin->n - 1));
121:       index     = PetscMax(index, 0);
122:       pivots[i] = keysin[index];
123:     }
124:   }
125:   /* sort the pivots in parallel */
126:   PetscCall(PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots));
127:   if (PetscDefined(USE_DEBUG)) {
128:     PetscBool sorted;

130:     PetscCall(PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted));
131:     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "bitonic sort failed");
132:   }

134:   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
135:    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
136:   non_empty = size;
137:   for (i = 0; i < size; i++)
138:     if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
139:   PetscCall(PetscCalloc1(size + 1, &keys_per));
140:   my_first = -1;
141:   if (non_empty) {
142:     for (i = 0; i < size - 1; i++) {
143:       size_t sample      = (i + 1) * non_empty - 1;
144:       size_t sample_rank = sample / (size - 1);

146:       keys_per[sample_rank]++;
147:       if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
148:     }
149:   }
150:   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
151:   PetscCall(PetscMalloc1(size * max_keys_per, &finalpivots));
152:   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
153:    * and allgather them */
154:   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
155:   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
156:   PetscCallMPI(MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm));
157:   for (i = 0, count = 0; i < size; i++) {
158:     PetscInt j;

160:     for (j = 0; j < max_keys_per; j++) {
161:       if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
162:     }
163:   }
164:   *outpivots = finalpivots;
165:   PetscCall(PetscFree(keys_per));
166:   PetscCall(PetscFree(pivots));
167:   PetscFunctionReturn(PETSC_SUCCESS);
168: }

170: static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
171: {
172:   PetscMPIInt  size, rank;
173:   PetscInt     myOffset, nextOffset;
174:   PetscInt     i;
175:   PetscMPIInt  total, filled;
176:   PetscMPIInt  sender, nfirst, nsecond;
177:   PetscMPIInt  firsttag, secondtag;
178:   MPI_Request  firstreqrcv;
179:   MPI_Request *firstreqs;
180:   MPI_Request *secondreqs;
181:   MPI_Status   firststatus;

183:   PetscFunctionBegin;
184:   PetscCallMPI(MPI_Comm_size(map->comm, &size));
185:   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
186:   PetscCall(PetscCommGetNewTag(map->comm, &firsttag));
187:   PetscCall(PetscCommGetNewTag(map->comm, &secondtag));
188:   myOffset = 0;
189:   PetscCall(PetscMalloc2(size, &firstreqs, size, &secondreqs));
190:   PetscCallMPI(MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm));
191:   myOffset = nextOffset - n;
192:   total    = map->range[rank + 1] - map->range[rank];
193:   if (total > 0) PetscCallMPI(MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv));
194:   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
195:     PetscInt itotal;
196:     PetscInt overlap, oStart, oEnd;

198:     itotal = map->range[i + 1] - map->range[i];
199:     if (itotal <= 0) continue;
200:     oStart  = PetscMax(myOffset, map->range[i]);
201:     oEnd    = PetscMin(nextOffset, map->range[i + 1]);
202:     overlap = oEnd - oStart;
203:     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
204:       /* send first message */
205:       PetscCallMPI(MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++])));
206:     } else if (overlap > 0) {
207:       /* send second message */
208:       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
209:     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
210:       /* send empty second message */
211:       PetscCallMPI(MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++])));
212:     }
213:   }
214:   filled = 0;
215:   sender = -1;
216:   if (total > 0) {
217:     PetscCallMPI(MPI_Wait(&firstreqrcv, &firststatus));
218:     sender = firststatus.MPI_SOURCE;
219:     PetscCallMPI(MPI_Get_count(&firststatus, MPIU_INT, &filled));
220:   }
221:   while (filled < total) {
222:     PetscMPIInt mfilled;
223:     MPI_Status  stat;

225:     sender++;
226:     PetscCallMPI(MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat));
227:     PetscCallMPI(MPI_Get_count(&stat, MPIU_INT, &mfilled));
228:     filled += mfilled;
229:   }
230:   PetscCallMPI(MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE));
231:   PetscCallMPI(MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE));
232:   PetscCall(PetscFree2(firstreqs, secondreqs));
233:   PetscFunctionReturn(PETSC_SUCCESS);
234: }

236: static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
237: {
238:   PetscMPIInt  size, rank;
239:   PetscInt    *pivots = NULL, *buffer;
240:   PetscInt     i, j;
241:   PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;

243:   PetscFunctionBegin;
244:   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
245:   PetscCallMPI(MPI_Comm_rank(mapin->comm, &rank));
246:   PetscCall(PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv));
247:   /* sort locally */
248:   PetscCall(PetscSortInt(mapin->n, keysin));
249:   /* get P - 1 pivots */
250:   PetscCall(PetscParallelSampleSelect(mapin, mapout, keysin, &pivots));
251:   /* determine which entries in the sorted array go to which other processes based on the pivots */
252:   for (i = 0, j = 0; i < size - 1; i++) {
253:     PetscInt prev = j;

255:     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
256:     offsets_snd[i]  = prev;
257:     keys_per_snd[i] = j - prev;
258:   }
259:   offsets_snd[size - 1]  = j;
260:   keys_per_snd[size - 1] = mapin->n - j;
261:   offsets_snd[size]      = mapin->n;
262:   /* get the incoming sizes */
263:   PetscCallMPI(MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm));
264:   offsets_rcv[0] = 0;
265:   for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
266:   nrecv = offsets_rcv[size];
267:   /* all to all exchange */
268:   PetscCall(PetscMalloc1(nrecv, &buffer));
269:   PetscCallMPI(MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm));
270:   PetscCall(PetscFree(pivots));
271:   PetscCall(PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv));

273:   /* local sort */
274:   PetscCall(PetscSortInt(nrecv, buffer));
275: #if defined(PETSC_USE_DEBUG)
276:   {
277:     PetscBool sorted;

279:     PetscCall(PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted));
280:     PetscCheck(sorted, mapin->comm, PETSC_ERR_PLIB, "samplesort (pre-redistribute) sort failed");
281:   }
282: #endif

284:   /* redistribute to the desired order */
285:   PetscCall(PetscParallelRedistribute(mapout, nrecv, buffer, keysout));
286:   PetscCall(PetscFree(buffer));
287:   PetscFunctionReturn(PETSC_SUCCESS);
288: }

290: /*@
291:   PetscParallelSortInt - Globally sort a distributed array of integers

293:   Collective

295:   Input Parameters:
296: + mapin - `PetscLayout` describing the distribution of the input keys
297: . mapout - `PetscLayout` describing the desired distribution of the output keys
298: - keysin - the pre-sorted array of integers

300:   Output Parameter:
301: . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.

303:   Level: developer

305:   Notes:
306:   This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
307: .vb
308:   - sorts locally
309:   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
310:   - using to the pivots to repartition the keys by all-to-all exchange
311:   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
312:   - redistributing to match the mapout layout
313: .ve

315:   If keysin != keysout, then keysin will not be changed during `PetscParallelSortInt()`.

317: .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
318: @*/
319: PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
320: {
321:   PetscMPIInt size;
322:   PetscMPIInt result;
323:   PetscInt   *keysincopy = NULL;

325:   PetscFunctionBegin;
328:   PetscCallMPI(MPI_Comm_compare(mapin->comm, mapout->comm, &result));
329:   PetscCheck(result == MPI_IDENT || result == MPI_CONGRUENT, mapin->comm, PETSC_ERR_ARG_NOTSAMECOMM, "layouts are not on the same communicator");
330:   PetscCall(PetscLayoutSetUp(mapin));
331:   PetscCall(PetscLayoutSetUp(mapout));
334:   PetscCheck(mapin->N == mapout->N, mapin->comm, PETSC_ERR_ARG_SIZ, "Input and output layouts have different global sizes (%" PetscInt_FMT " != %" PetscInt_FMT ")", mapin->N, mapout->N);
335:   PetscCallMPI(MPI_Comm_size(mapin->comm, &size));
336:   if (size == 1) {
337:     if (keysout != keysin) PetscCall(PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt)));
338:     PetscCall(PetscSortInt(mapout->n, keysout));
339:     if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
340:   }
341:   if (keysout != keysin) {
342:     PetscCall(PetscMalloc1(mapin->n, &keysincopy));
343:     PetscCall(PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt)));
344:     keysin = keysincopy;
345:   }
346:   PetscCall(PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout));
347: #if defined(PETSC_USE_DEBUG)
348:   {
349:     PetscBool sorted;

351:     PetscCall(PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted));
352:     PetscCheck(sorted, mapout->comm, PETSC_ERR_PLIB, "samplesort sort failed");
353:   }
354: #endif
355:   PetscCall(PetscFree(keysincopy));
356:   PetscFunctionReturn(PETSC_SUCCESS);
357: }