View Javadoc
1   /*
2    * Copyright (C) 2014 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.math;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static java.lang.Double.NEGATIVE_INFINITY;
19  import static java.lang.Double.NaN;
20  import static java.lang.Double.POSITIVE_INFINITY;
21  import static java.util.Arrays.sort;
22  import static java.util.Collections.unmodifiableMap;
23  
24  import com.google.common.annotations.Beta;
25  import com.google.common.annotations.GwtIncompatible;
26  import com.google.common.primitives.Doubles;
27  import com.google.common.primitives.Ints;
28  import java.math.RoundingMode;
29  import java.util.Collection;
30  import java.util.HashMap;
31  import java.util.Map;
32  
33  /**
34   * Provides a fluent API for calculating
35   * <a href="http://en.wikipedia.org/wiki/Quantile">quantiles</a>.
36   *
37   * <h3>Examples</h3>
38   *
39   * <p>To compute the median:
40   * <pre>   {@code
41   *
42   *   double myMedian = median().compute(myDataset);}</pre>
43   *
44   * where {@link #median()} has been statically imported.
45   *
46   * <p>To compute the 99th percentile:
47   * <pre>   {@code
48   *
49   *   double myPercentile99 = percentiles().index(99).compute(myDataset);}</pre>
50   *
51   * where {@link #percentiles()} has been statically imported.
52   *
53   * <p>To compute median and the 90th and 99th percentiles:
54   * <pre>   {@code
55   *
56   *   Map<Integer, Double> myPercentiles =
57   *       percentiles().indexes(50, 90, 99).compute(myDataset);}</pre>
58   *
59   * where {@link #percentiles()} has been statically imported: {@code myPercentiles} maps the keys
60   * 50, 90, and 99, to their corresponding quantile values.
61   *
62   * <p>To compute quartiles, use {@link #quartiles()} instead of {@link #percentiles()}. To compute
63   * arbitrary q-quantiles, use {@link #scale scale(q)}.
64   *
65   * <p>These examples all take a copy of your dataset. If you have a double array, you are okay with
66   * it being arbitrarily reordered, and you want to avoid that copy, you can use
67   * {@code computeInPlace} instead of {@code compute}.
68   *
69   * <h3>Definition and notes on interpolation</h3>
70   *
71   * <p>The definition of the kth q-quantile of N values is as follows: define x = k * (N - 1) / q; if
72   * x is an integer, the result is the value which would appear at index x in the sorted dataset
73   * (unless there are {@link Double#NaN NaN} values, see below); otherwise, the result is the average
74   * of the values which would appear at the indexes floor(x) and ceil(x) weighted by (1-frac(x)) and
75   * frac(x) respectively. This is the same definition as used by Excel and by S, it is the Type 7
76   * definition in
77   * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">R</a>, and it is
78   * described by
79   * <a href="http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population">
80   * wikipedia</a> as providing "Linear interpolation of the modes for the order statistics for the
81   * uniform distribution on [0,1]."
82   *
83   * <h3>Handling of non-finite values</h3>
84   *
85   * <p>If any values in the input are {@link Double#NaN NaN} then all values returned are
86   * {@link Double#NaN NaN}. (This is the one occasion when the behaviour is not the same as you'd get
87   * from sorting with {@link java.util.Arrays#sort(double[]) Arrays.sort(double[])} or
88   * {@link java.util.Collections#sort(java.util.List) Collections.sort(List&lt;Double&gt;)} and
89   * selecting the required value(s). Those methods would sort {@link Double#NaN NaN} as if it is
90   * greater than any other value and place them at the end of the dataset, even after
91   * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY}.)
92   *
93   * <p>Otherwise, {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and
94   * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY} sort to the beginning and the end of the
95   * dataset, as you would expect.
96   *
97   * <p>If required to do a weighted average between an infinity and a finite value, or between an
98   * infinite value and itself, the infinite value is returned. If required to do a weighted average
99   * between {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link Double#POSITIVE_INFINITY
100  * POSITIVE_INFINITY}, {@link Double#NaN NaN} is returned (note that this will only happen if the
101  * dataset contains no finite values).
102  *
103  * <h3>Performance</h3>
104  *
105  * <p>The average time complexity of the computation is O(N) in the size of the dataset. There is a
106  * worst case time complexity of O(N^2). You are extremely unlikely to hit this quadratic case on
107  * randomly ordered data (the probability decreases faster than exponentially in N), but if you are
108  * passing in unsanitized user data then a malicious user could force it. A light shuffle of the
109  * data using an unpredictable seed should normally be enough to thwart this attack.
110  *
111  * <p>The time taken to compute multiple quantiles on the same dataset using {@link Scale#indexes
112  * indexes} is generally less than the total time taken to compute each of them separately, and
113  * sometimes much less. For example, on a large enough dataset, computing the 90th and 99th
114  * percentiles together takes about 55% as long as computing them separately.
115  *
116  * <p>When calling {@link ScaleAndIndex#compute} (in {@linkplain ScaleAndIndexes#compute either
117  * form}), the memory requirement is 8*N bytes for the copy of the dataset plus an overhead which is
118  * independent of N (but depends on the quantiles being computed). When calling
119  * {@link ScaleAndIndex#computeInPlace computeInPlace} (in
120  * {@linkplain ScaleAndIndexes#computeInPlace either form}), only the overhead is required. The
121  * number of object allocations is independent of N in both cases.
122  *
123  * @author Pete Gillin
124  * @since 20.0
125  */
126 @Beta
127 @GwtIncompatible
128 public final class Quantiles {
129 
130   /**
131    * Specifies the computation of a median (i.e. the 1st 2-quantile).
132    */
133   public static ScaleAndIndex median() {
134     return scale(2).index(1);
135   }
136 
137   /**
138    * Specifies the computation of quartiles (i.e. 4-quantiles).
139    */
140   public static Scale quartiles() {
141     return scale(4);
142   }
143 
144   /**
145    * Specifies the computation of percentiles (i.e. 100-quantiles).
146    */
147   public static Scale percentiles() {
148     return scale(100);
149   }
150 
151   /**
152    * Specifies the computation of q-quantiles.
153    *
154    * @param scale the scale for the quantiles to be calculated, i.e. the q of the q-quantiles, which
155    *     must be positive
156    */
157   public static Scale scale(int scale) {
158     return new Scale(scale);
159   }
160 
161   /**
162    * Describes the point in a fluent API chain where only the scale (i.e. the q in q-quantiles) has
163    * been specified.
164    */
165   public static final class Scale {
166 
167     private final int scale;
168 
169     private Scale(int scale) {
170       checkArgument(scale > 0, "Quantile scale must be positive");
171       this.scale = scale;
172     }
173 
174     /**
175      * Specifies a single quantile index to be calculated, i.e. the k in the kth q-quantile.
176      *
177      * @param index the quantile index, which must be in the inclusive range [0, q] for q-quantiles
178      */
179     public ScaleAndIndex index(int index) {
180       return new ScaleAndIndex(scale, index);
181     }
182 
183     /**
184      * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
185      * q-quantile.
186      *
187      * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
188      *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
189      *     set will be snapshotted when this method is called
190      */
191     public ScaleAndIndexes indexes(int... indexes) {
192       return new ScaleAndIndexes(scale, indexes.clone());
193     }
194 
195     /**
196      * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
197      * q-quantile.
198      *
199      * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
200      *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
201      *     set will be snapshotted when this method is called
202      */
203     public ScaleAndIndexes indexes(Collection<Integer> indexes) {
204       return new ScaleAndIndexes(scale, Ints.toArray(indexes));
205     }
206   }
207 
208   /**
209    * Describes the point in a fluent API chain where the scale and a single quantile index (i.e. the
210    * q and the k in the kth q-quantile) have been specified.
211    */
212   public static final class ScaleAndIndex {
213 
214     private final int scale;
215     private final int index;
216 
217     private ScaleAndIndex(int scale, int index) {
218       checkIndex(index, scale);
219       this.scale = scale;
220       this.index = index;
221     }
222 
223     /**
224      * Computes the quantile value of the given dataset.
225      *
226      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
227      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
228      *     this call (it is copied instead)
229      * @return the quantile value
230      */
231     public double compute(Collection<? extends Number> dataset) {
232       return computeInPlace(Doubles.toArray(dataset));
233     }
234 
235     /**
236      * Computes the quantile value of the given dataset.
237      *
238      * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
239      *     be mutated by this call (it is copied instead)
240      * @return the quantile value
241      */
242     public double compute(double... dataset) {
243       return computeInPlace(dataset.clone());
244     }
245 
246     /**
247      * Computes the quantile value of the given dataset.
248      *
249      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
250      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
251      *     this call (it is copied instead)
252      * @return the quantile value
253      */
254     public double compute(long... dataset) {
255       return computeInPlace(longsToDoubles(dataset));
256     }
257 
258     /**
259      * Computes the quantile value of the given dataset.
260      *
261      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
262      *     cast to doubles, and which will not be mutated by this call (it is copied instead)
263      * @return the quantile value
264      */
265     public double compute(int... dataset) {
266       return computeInPlace(intsToDoubles(dataset));
267     }
268 
269     /**
270      * Computes the quantile value of the given dataset, performing the computation in-place.
271      *
272      * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
273      *     be arbitrarily reordered by this method call
274      * @return the quantile value
275      */
276     public double computeInPlace(double... dataset) {
277       checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
278       if (containsNaN(dataset)) {
279         return NaN;
280       }
281 
282       // Calculate the quotient and remainder in the integer division x = k * (N-1) / q, i.e.
283       // index * (dataset.length - 1) / scale. If there is no remainder, we can just find the value
284       // whose index in the sorted dataset equals the quotient; if there is a remainder, we
285       // interpolate between that and the next value.
286 
287       // Since index and (dataset.length - 1) are non-negative ints, their product can be expressed
288       // as a long, without risk of overflow:
289       long numerator = (long) index * (dataset.length - 1);
290       // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
291       // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to get
292       // a rounded ratio and a remainder which can be expressed as ints, without risk of overflow:
293       int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
294       int remainder = (int) (numerator - (long) quotient * scale);
295       selectInPlace(quotient, dataset, 0, dataset.length - 1);
296       if (remainder == 0) {
297         return dataset[quotient];
298       } else {
299         selectInPlace(quotient + 1, dataset, quotient + 1, dataset.length - 1);
300         return interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale);
301       }
302     }
303   }
304 
305   /**
306    * Describes the point in a fluent API chain where the scale and a multiple quantile indexes (i.e.
307    * the q and a set of values for the k in the kth q-quantile) have been specified.
308    */
309   public static final class ScaleAndIndexes {
310 
311     private final int scale;
312     private final int[] indexes;
313 
314     private ScaleAndIndexes(int scale, int[] indexes) {
315       for (int index : indexes) {
316         checkIndex(index, scale);
317       }
318       this.scale = scale;
319       this.indexes = indexes;
320     }
321 
322     /**
323      * Computes the quantile values of the given dataset.
324      *
325      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
326      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
327      *     this call (it is copied instead)
328      * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
329      *     the values the corresponding quantile values
330      */
331     public Map<Integer, Double> compute(Collection<? extends Number> dataset) {
332       return computeInPlace(Doubles.toArray(dataset));
333     }
334 
335     /**
336      * Computes the quantile values of the given dataset.
337      *
338      * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
339      *     be mutated by this call (it is copied instead)
340      * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
341      *     the values the corresponding quantile values
342      */
343     public Map<Integer, Double> compute(double... dataset) {
344       return computeInPlace(dataset.clone());
345     }
346 
347     /**
348      * Computes the quantile values of the given dataset.
349      *
350      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
351      *     cast to doubles (with any associated lost of precision), and which will not be mutated by
352      *     this call (it is copied instead)
353      * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
354      *     the values the corresponding quantile values
355      */
356     public Map<Integer, Double> compute(long... dataset) {
357       return computeInPlace(longsToDoubles(dataset));
358     }
359 
360     /**
361      * Computes the quantile values of the given dataset.
362      *
363      * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
364      *     cast to doubles, and which will not be mutated by this call (it is copied instead)
365      * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
366      *     the values the corresponding quantile values
367      */
368     public Map<Integer, Double> compute(int... dataset) {
369       return computeInPlace(intsToDoubles(dataset));
370     }
371 
372     /**
373      * Computes the quantile values of the given dataset, performing the computation in-place.
374      *
375      * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
376      *     be arbitrarily reordered by this method call
377      * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
378      *     the values the corresponding quantile values
379      */
380     public Map<Integer, Double> computeInPlace(double... dataset) {
381       checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
382       if (containsNaN(dataset)) {
383         Map<Integer, Double> nanMap = new HashMap<>();
384         for (int index : indexes) {
385           nanMap.put(index, NaN);
386         }
387         return unmodifiableMap(nanMap);
388       }
389 
390       // Calculate the quotients and remainders in the integer division x = k * (N - 1) / q, i.e.
391       // index * (dataset.length - 1) / scale for each index in indexes. For each, if there is no
392       // remainder, we can just select the value whose index in the sorted dataset equals the
393       // quotient; if there is a remainder, we interpolate between that and the next value.
394 
395       int[] quotients = new int[indexes.length];
396       int[] remainders = new int[indexes.length];
397       // The indexes to select. In the worst case, we'll need one each side of each quantile.
398       int[] requiredSelections = new int[indexes.length * 2];
399       int requiredSelectionsCount = 0;
400       for (int i = 0; i < indexes.length; i++) {
401         // Since index and (dataset.length - 1) are non-negative ints, their product can be
402         // expressed as a long, without risk of overflow:
403         long numerator = (long) indexes[i] * (dataset.length - 1);
404         // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
405         // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to
406         // get a rounded ratio and a remainder which can be expressed as ints, without risk of
407         // overflow:
408         int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
409         int remainder = (int) (numerator - (long) quotient * scale);
410         quotients[i] = quotient;
411         remainders[i] = remainder;
412         requiredSelections[requiredSelectionsCount] = quotient;
413         requiredSelectionsCount++;
414         if (remainder != 0) {
415           requiredSelections[requiredSelectionsCount] = quotient + 1;
416           requiredSelectionsCount++;
417         }
418       }
419       sort(requiredSelections, 0, requiredSelectionsCount);
420       selectAllInPlace(
421           requiredSelections, 0, requiredSelectionsCount - 1, dataset, 0, dataset.length - 1);
422       Map<Integer, Double> ret = new HashMap<>();
423       for (int i = 0; i < indexes.length; i++) {
424         int quotient = quotients[i];
425         int remainder = remainders[i];
426         if (remainder == 0) {
427           ret.put(indexes[i], dataset[quotient]);
428         } else {
429           ret.put(
430               indexes[i], interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale));
431         }
432       }
433       return unmodifiableMap(ret);
434     }
435   }
436 
437   /**
438    * Returns whether any of the values in {@code dataset} are {@code NaN}.
439    */
440   private static boolean containsNaN(double... dataset) {
441     for (double value : dataset) {
442       if (Double.isNaN(value)) {
443         return true;
444       }
445     }
446     return false;
447   }
448 
449   /**
450    * Returns a value a fraction {@code (remainder / scale)} of the way between {@code lower} and
451    * {@code upper}. Assumes that {@code lower <= upper}. Correctly handles infinities (but not
452    * {@code NaN}).
453    */
454   private static double interpolate(double lower, double upper, double remainder, double scale) {
455     if (lower == NEGATIVE_INFINITY) {
456       if (upper == POSITIVE_INFINITY) {
457         // Return NaN when lower == NEGATIVE_INFINITY and upper == POSITIVE_INFINITY:
458         return NaN;
459       }
460       // Return NEGATIVE_INFINITY when NEGATIVE_INFINITY == lower <= upper < POSITIVE_INFINITY:
461       return NEGATIVE_INFINITY;
462     }
463     if (upper == POSITIVE_INFINITY) {
464       // Return POSITIVE_INFINITY when NEGATIVE_INFINITY < lower <= upper == POSITIVE_INFINITY:
465       return POSITIVE_INFINITY;
466     }
467     return lower + (upper - lower) * remainder / scale;
468   }
469 
470   private static void checkIndex(int index, int scale) {
471     if (index < 0 || index > scale) {
472       throw new IllegalArgumentException(
473           "Quantile indexes must be between 0 and the scale, which is " + scale);
474     }
475   }
476 
477   private static double[] longsToDoubles(long[] longs) {
478     int len = longs.length;
479     double[] doubles = new double[len];
480     for (int i = 0; i < len; i++) {
481       doubles[i] = longs[i];
482     }
483     return doubles;
484   }
485 
486   private static double[] intsToDoubles(int[] ints) {
487     int len = ints.length;
488     double[] doubles = new double[len];
489     for (int i = 0; i < len; i++) {
490       doubles[i] = ints[i];
491     }
492     return doubles;
493   }
494 
495   /**
496    * Performs an in-place selection to find the element which would appear at a given index in a
497    * dataset if it were sorted. The following preconditions should hold:
498    * <ul>
499    * <li>{@code required}, {@code from}, and {@code to} should all be indexes into {@code array};
500    * <li>{@code required} should be in the range [{@code from}, {@code to}];
501    * <li>all the values with indexes in the range [0, {@code from}) should be less than or equal to
502    * all the values with indexes in the range [{@code from}, {@code to}];
503    * <li>all the values with indexes in the range ({@code to}, {@code array.length - 1}] should be
504    * greater than or equal to all the values with indexes in the range [{@code from}, {@code to}].
505    * </ul>
506    * This method will reorder the values with indexes in the range [{@code from}, {@code to}] such
507    * that all the values with indexes in the range [{@code from}, {@code required}) are less than or
508    * equal to the value with index {@code required}, and all the values with indexes in the range
509    * ({@code required}, {@code to}] are greater than or equal to that value. Therefore, the value at
510    * {@code required} is the value which would appear at that index in the sorted dataset.
511    */
512   private static void selectInPlace(int required, double[] array, int from, int to) {
513     // If we are looking for the least element in the range, we can just do a linear search for it.
514     // (We will hit this whenever we are doing quantile interpolation: our first selection finds
515     // the lower value, our second one finds the upper value by looking for the next least element.)
516     if (required == from) {
517       int min = from;
518       for (int index = from + 1; index <= to; index++) {
519         if (array[min] > array[index]) {
520           min = index;
521         }
522       }
523       if (min != from) {
524         swap(array, min, from);
525       }
526       return;
527     }
528 
529     // Let's play quickselect! We'll repeatedly partition the range [from, to] containing the
530     // required element, as long as it has more than one element.
531     while (to > from) {
532       int partitionPoint = partition(array, from, to);
533       if (partitionPoint >= required) {
534         to = partitionPoint - 1;
535       }
536       if (partitionPoint <= required) {
537         from = partitionPoint + 1;
538       }
539     }
540   }
541 
542   /**
543    * Performs a partition operation on the slice of {@code array} with elements in the range
544    * [{@code from}, {@code to}]. Uses the median of {@code from}, {@code to}, and the midpoint
545    * between them as a pivot. Returns the index which the slice is partitioned around, i.e. if it
546    * returns {@code ret} then we know that the values with indexes in [{@code from}, {@code ret})
547    * are less than or equal to the value at {@code ret} and the values with indexes in ({@code ret},
548    * {@code to}] are greater than or equal to that.
549    */
550   private static int partition(double[] array, int from, int to) {
551     // Select a pivot, and move it to the start of the slice i.e. to index from.
552     movePivotToStartOfSlice(array, from, to);
553     double pivot = array[from];
554 
555     // Move all elements with indexes in (from, to] which are greater than the pivot to the end of
556     // the array. Keep track of where those elements begin.
557     int partitionPoint = to;
558     for (int i = to; i > from; i--) {
559       if (array[i] > pivot) {
560         swap(array, partitionPoint, i);
561         partitionPoint--;
562       }
563     }
564 
565     // We now know that all elements with indexes in (from, partitionPoint] are less than or equal
566     // to the pivot at from, and all elements with indexes in (partitionPoint, to] are greater than
567     // it. We swap the pivot into partitionPoint and we know the array is partitioned around that.
568     swap(array, from, partitionPoint);
569     return partitionPoint;
570   }
571 
572   /**
573    * Selects the pivot to use, namely the median of the values at {@code from}, {@code to}, and
574    * halfway between the two (rounded down), from {@code array}, and ensure (by swapping elements if
575    * necessary) that that pivot value appears at the start of the slice i.e. at {@code from}.
576    * Expects that {@code from} is strictly less than {@code to}.
577    */
578   private static void movePivotToStartOfSlice(double[] array, int from, int to) {
579     int mid = (from + to) >>> 1;
580     // We want to make a swap such that either array[to] <= array[from] <= array[mid], or
581     // array[mid] <= array[from] <= array[to]. We know that from < to, so we know mid < to
582     // (although it's possible that mid == from, if to == from + 1). Note that the postcondition
583     // would be impossible to fulfil if mid == to unless we also have array[from] == array[to].
584     boolean toLessThanMid = (array[to] < array[mid]);
585     boolean midLessThanFrom = (array[mid] < array[from]);
586     boolean toLessThanFrom = (array[to] < array[from]);
587     if (toLessThanMid == midLessThanFrom) {
588       // Either array[to] < array[mid] < array[from] or array[from] <= array[mid] <= array[to].
589       swap(array, mid, from);
590     } else if (toLessThanMid != toLessThanFrom) {
591       // Either array[from] <= array[to] < array[mid] or array[mid] <= array[to] < array[from].
592       swap(array, from, to);
593     }
594     // The postcondition now holds. So the median, our chosen pivot, is at from.
595   }
596 
597   /**
598    * Performs an in-place selection, like {@link #selectInPlace}, to select all the indexes
599    * {@code allRequired[i]} for {@code i} in the range [{@code requiredFrom}, {@code requiredTo}].
600    * These indexes must be sorted in the array and must all be in the range [{@code from},
601    * {@code to}].
602    */
603   private static void selectAllInPlace(
604       int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to) {
605     // Choose the first selection to do...
606     int requiredChosen = chooseNextSelection(allRequired, requiredFrom, requiredTo, from, to);
607     int required = allRequired[requiredChosen];
608 
609     // ...do the first selection...
610     selectInPlace(required, array, from, to);
611 
612     // ...then recursively perform the selections in the range below...
613     int requiredBelow = requiredChosen - 1;
614     while (requiredBelow >= requiredFrom && allRequired[requiredBelow] == required) {
615       requiredBelow--; // skip duplicates of required in the range below
616     }
617     if (requiredBelow >= requiredFrom) {
618       selectAllInPlace(allRequired, requiredFrom, requiredBelow, array, from, required - 1);
619     }
620 
621     // ...and then recursively perform the selections in the range above.
622     int requiredAbove = requiredChosen + 1;
623     while (requiredAbove <= requiredTo && allRequired[requiredAbove] == required) {
624       requiredAbove++; // skip duplicates of required in the range above
625     }
626     if (requiredAbove <= requiredTo) {
627       selectAllInPlace(allRequired, requiredAbove, requiredTo, array, required + 1, to);
628     }
629   }
630 
631   /**
632    * Chooses the next selection to do from the required selections. It is required that the array
633    * {@code allRequired} is sorted and that {@code allRequired[i]} are in the range [{@code from},
634    * {@code to}] for all {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. The
635    * value returned by this method is the {@code i} in that range such that {@code allRequired[i]}
636    * is as close as possible to the center of the range [{@code from}, {@code to}]. Choosing the
637    * value closest to the center of the range first is the most efficient strategy because it
638    * minimizes the size of the subranges from which the remaining selections must be done.
639    */
640   private static int chooseNextSelection(
641       int[] allRequired, int requiredFrom, int requiredTo, int from, int to) {
642     if (requiredFrom == requiredTo) {
643       return requiredFrom; // only one thing to choose, so choose it
644     }
645 
646     // Find the center and round down. The true center is either centerFloor or halfway between
647     // centerFloor and centerFloor + 1.
648     int centerFloor = (from + to) >>> 1;
649 
650     // Do a binary search until we're down to the range of two which encloses centerFloor (unless
651     // all values are lower or higher than centerFloor, in which case we find the two highest or
652     // lowest respectively). If centerFloor is in allRequired, we will definitely find it. If not,
653     // but centerFloor + 1 is, we'll definitely find that. The closest value to the true (unrounded)
654     // center will be at either low or high.
655     int low = requiredFrom;
656     int high = requiredTo;
657     while (high > low + 1) {
658       int mid = (low + high) >>> 1;
659       if (allRequired[mid] > centerFloor) {
660         high = mid;
661       } else if (allRequired[mid] < centerFloor) {
662         low = mid;
663       } else {
664         return mid; // allRequired[mid] = centerFloor, so we can't get closer than that
665       }
666     }
667 
668     // Now pick the closest of the two candidates. Note that there is no rounding here.
669     if (from + to - allRequired[low] - allRequired[high] > 0) {
670       return high;
671     } else {
672       return low;
673     }
674   }
675 
676   /**
677    * Swaps the values at {@code i} and {@code j} in {@code array}.
678    */
679   private static void swap(double[] array, int i, int j) {
680     double temp = array[i];
681     array[i] = array[j];
682     array[j] = temp;
683   }
684 }