View Javadoc
1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.math;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static com.google.common.base.Preconditions.checkNotNull;
19  import static com.google.common.base.Preconditions.checkState;
20  import static com.google.common.math.DoubleUtils.ensureNonNegative;
21  import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
22  import static com.google.common.primitives.Doubles.isFinite;
23  import static java.lang.Double.NaN;
24  import static java.lang.Double.doubleToLongBits;
25  import static java.lang.Double.isNaN;
26  
27  import com.google.common.annotations.Beta;
28  import com.google.common.annotations.GwtIncompatible;
29  import com.google.common.base.MoreObjects;
30  import com.google.common.base.Objects;
31  import java.io.Serializable;
32  import java.nio.ByteBuffer;
33  import java.nio.ByteOrder;
34  import java.util.Iterator;
35  import javax.annotation.Nullable;
36  
37  /**
38   * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
39   * forms of variance -- that were computed from a single set of zero or more floating-point values.
40   *
41   * <p>There are two ways to obtain a {@code Stats} instance:
42   *
43   * <ul>
44   * <li>If all the values you want to summarize are already known, use the appropriate {@code
45   *     Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of
46   *     {@code Number}, and primitive varargs are supported.
47   * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, feed
48   *     values to it as you get them, then call {@link StatsAccumulator#snapshot}.
49   * </ul>
50   *
51   * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to
52   * calculate <i>only</i> the mean.
53   *
54   * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use
55   * built-in JDK libraries instead of this class.
56   *
57   * @author Pete Gillin
58   * @author Kevin Bourrillion
59   * @since 20.0
60   */
61  @Beta
62  @GwtIncompatible
63  public final class Stats implements Serializable {
64  
65    private final long count;
66    private final double mean;
67    private final double sumOfSquaresOfDeltas;
68    private final double min;
69    private final double max;
70  
71    /**
72     * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}.
73     *
74     * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the
75     * following constraints. This is the callers responsibility and is not enforced here.
76     * <ul>
77     * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to
78     * get multiplied by 0 to calculate the sum), and the other parameters may have any values (they
79     * will not be used).
80     * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or
81     * {@link Double#NaN}.
82     * </ul>
83     */
84    Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) {
85      this.count = count;
86      this.mean = mean;
87      this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas;
88      this.min = min;
89      this.max = max;
90    }
91  
92    /**
93     * Returns statistics over a dataset containing the given values.
94     *
95     * @param values a series of values, which will be converted to {@code double} values (this may
96     *     cause loss of precision)
97     */
98    public static Stats of(Iterable<? extends Number> values) {
99      StatsAccumulator accumulator = new StatsAccumulator();
100     accumulator.addAll(values);
101     return accumulator.snapshot();
102   }
103 
104   /**
105    * Returns statistics over a dataset containing the given values.
106    *
107    * @param values a series of values, which will be converted to {@code double} values (this may
108    *     cause loss of precision)
109    */
110   public static Stats of(Iterator<? extends Number> values) {
111     StatsAccumulator accumulator = new StatsAccumulator();
112     accumulator.addAll(values);
113     return accumulator.snapshot();
114   }
115 
116   /**
117    * Returns statistics over a dataset containing the given values.
118    *
119    * @param values a series of values
120    */
121   public static Stats of(double... values) {
122     StatsAccumulator acummulator = new StatsAccumulator();
123     acummulator.addAll(values);
124     return acummulator.snapshot();
125   }
126 
127   /**
128    * Returns statistics over a dataset containing the given values.
129    *
130    * @param values a series of values
131    */
132   public static Stats of(int... values) {
133     StatsAccumulator acummulator = new StatsAccumulator();
134     acummulator.addAll(values);
135     return acummulator.snapshot();
136   }
137 
138   /**
139    * Returns statistics over a dataset containing the given values.
140    *
141    * @param values a series of values, which will be converted to {@code double} values (this may
142    *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
143    */
144   public static Stats of(long... values) {
145     StatsAccumulator acummulator = new StatsAccumulator();
146     acummulator.addAll(values);
147     return acummulator.snapshot();
148   }
149 
150   /**
151    * Returns the number of values.
152    */
153   public long count() {
154     return count;
155   }
156 
157   /**
158    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
159    * values. The count must be non-zero.
160    *
161    * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
162    * the arithmetic mean of the population.
163    *
164    * <h3>Non-finite values</h3>
165    *
166    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
167    * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
168    * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
169    * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
170    * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
171    * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
172    *
173    * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats}
174    * instance.
175    *
176    * @throws IllegalStateException if the dataset is empty
177    */
178   public double mean() {
179     checkState(count != 0);
180     return mean;
181   }
182 
183   /**
184    * Returns the sum of the values.
185    *
186    * <h3>Non-finite values</h3>
187    *
188    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
189    * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
190    * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
191    * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
192    * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
193    * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
194    */
195   public double sum() {
196     return mean * count;
197   }
198 
199   /**
200    * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
201    * variance</a> of the values. The count must be non-zero.
202    *
203    * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
204    * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
205    * due to numerical errors. However, it is guaranteed never to return a negative result.
206    *
207    * <h3>Non-finite values</h3>
208    *
209    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
210    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
211    *
212    * @throws IllegalStateException if the dataset is empty
213    */
214   public double populationVariance() {
215     checkState(count > 0);
216     if (isNaN(sumOfSquaresOfDeltas)) {
217       return NaN;
218     }
219     if (count == 1) {
220       return 0.0;
221     }
222     return ensureNonNegative(sumOfSquaresOfDeltas) / count();
223   }
224 
225   /**
226    * Returns the
227    * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
228    * population standard deviation</a> of the values. The count must be non-zero.
229    *
230    * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
231    * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
232    * due to numerical errors. However, it is guaranteed never to return a negative result.
233    *
234    * <h3>Non-finite values</h3>
235    *
236    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
237    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
238    *
239    * @throws IllegalStateException if the dataset is empty
240    */
241   public double populationStandardDeviation() {
242     return Math.sqrt(populationVariance());
243   }
244 
245   /**
246    * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
247    * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
248    * unbiased estimator of the population variance of the population. The count must be greater than
249    * one.
250    *
251    * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
252    * times, due to numerical errors. However, it is guaranteed never to return a negative result.
253    *
254    * <h3>Non-finite values</h3>
255    *
256    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
257    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
258    *
259    * @throws IllegalStateException if the dataset is empty or contains a single value
260    */
261   public double sampleVariance() {
262     checkState(count > 1);
263     if (isNaN(sumOfSquaresOfDeltas)) {
264       return NaN;
265     }
266     return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
267   }
268 
269   /**
270    * Returns the
271    * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
272    * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
273    * population, this is an estimator of the population standard deviation of the population which
274    * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
275    * the distribution). The count must be greater than one.
276    *
277    * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
278    * times, due to numerical errors. However, it is guaranteed never to return a negative result.
279    *
280    * <h3>Non-finite values</h3>
281    *
282    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
283    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
284    *
285    * @throws IllegalStateException if the dataset is empty or contains a single value
286    */
287   public double sampleStandardDeviation() {
288     return Math.sqrt(sampleVariance());
289   }
290 
291   /**
292    * Returns the lowest value in the dataset. The count must be non-zero.
293    *
294    * <h3>Non-finite values</h3>
295    *
296    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
297    * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is
298    * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite
299    * values only then the result is the lowest finite value. If it contains
300    * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
301    *
302    * @throws IllegalStateException if the dataset is empty
303    */
304   public double min() {
305     checkState(count != 0);
306     return min;
307   }
308 
309   /**
310    * Returns the highest value in the dataset. The count must be non-zero.
311    *
312    * <h3>Non-finite values</h3>
313    *
314    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
315    * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is
316    * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite
317    * values only then the result is the highest finite value. If it contains
318    * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
319    *
320    * @throws IllegalStateException if the dataset is empty
321    */
322   public double max() {
323     checkState(count != 0);
324     return max;
325   }
326 
327   /**
328    * {@inheritDoc}
329    *
330    * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating
331    * point values. Two instances are guaranteed to be considered equal if one is copied from the
332    * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were
333    * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any
334    * values in between the two calls, or if one is obtained from the other after round-tripping
335    * through java serialization. However, floating point rounding errors mean that it may be false
336    * for some instances where the statistics are mathematically equal, including instances
337    * constructed from the same values in a different order... or (in the general case) even in the
338    * same order. (It is guaranteed to return true for instances constructed from the same values in
339    * the same order if {@code strictfp} is in effect, or if the system architecture guarantees
340    * {@code strictfp}-like semantics.)
341    */
342   @Override
343   public boolean equals(@Nullable Object obj) {
344     if (obj == null) {
345       return false;
346     }
347     if (getClass() != obj.getClass()) {
348       return false;
349     }
350     Stats other = (Stats) obj;
351     return (count == other.count)
352         && (doubleToLongBits(mean) == doubleToLongBits(other.mean))
353         && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas))
354         && (doubleToLongBits(min) == doubleToLongBits(other.min))
355         && (doubleToLongBits(max) == doubleToLongBits(other.max));
356   }
357 
358   /**
359    * {@inheritDoc}
360    *
361    * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics,
362    * including the floating point values. See the note on {@link #equals} for details.
363    */
364   @Override
365   public int hashCode() {
366     return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max);
367   }
368 
369   @Override
370   public String toString() {
371     if (count() > 0) {
372       return MoreObjects.toStringHelper(this)
373           .add("count", count)
374           .add("mean", mean)
375           .add("populationStandardDeviation", populationStandardDeviation())
376           .add("min", min)
377           .add("max", max)
378           .toString();
379     } else {
380       return MoreObjects.toStringHelper(this).add("count", count).toString();
381     }
382   }
383 
384   double sumOfSquaresOfDeltas() {
385     return sumOfSquaresOfDeltas;
386   }
387 
388   /**
389    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
390    * values. The count must be non-zero.
391    *
392    * <p>The definition of the mean is the same as {@link Stats#mean}.
393    *
394    * @param values a series of values, which will be converted to {@code double} values (this may
395    *     cause loss of precision)
396    * @throws IllegalArgumentException if the dataset is empty
397    */
398   public static double meanOf(Iterable<? extends Number> values) {
399     return meanOf(values.iterator());
400   }
401 
402   /**
403    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
404    * values. The count must be non-zero.
405    *
406    * <p>The definition of the mean is the same as {@link Stats#mean}.
407    *
408    * @param values a series of values, which will be converted to {@code double} values (this may
409    *     cause loss of precision)
410    * @throws IllegalArgumentException if the dataset is empty
411    */
412   public static double meanOf(Iterator<? extends Number> values) {
413     checkArgument(values.hasNext());
414     long count = 1;
415     double mean = values.next().doubleValue();
416     while (values.hasNext()) {
417       double value = values.next().doubleValue();
418       count++;
419       if (isFinite(value) && isFinite(mean)) {
420         // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
421         mean += (value - mean) / count;
422       } else {
423         mean = calculateNewMeanNonFinite(mean, value);
424       }
425     }
426     return mean;
427   }
428 
429   /**
430    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
431    * values. The count must be non-zero.
432    *
433    * <p>The definition of the mean is the same as {@link Stats#mean}.
434    *
435    * @param values a series of values
436    * @throws IllegalArgumentException if the dataset is empty
437    */
438   public static double meanOf(double... values) {
439     checkArgument(values.length > 0);
440     double mean = values[0];
441     for (int index = 1; index < values.length; index++) {
442       double value = values[index];
443       if (isFinite(value) && isFinite(mean)) {
444         // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
445         mean += (value - mean) / (index + 1);
446       } else {
447         mean = calculateNewMeanNonFinite(mean, value);
448       }
449     }
450     return mean;
451   }
452 
453   /**
454    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
455    * values. The count must be non-zero.
456    *
457    * <p>The definition of the mean is the same as {@link Stats#mean}.
458    *
459    * @param values a series of values
460    * @throws IllegalArgumentException if the dataset is empty
461    */
462   public static double meanOf(int... values) {
463     checkArgument(values.length > 0);
464     double mean = values[0];
465     for (int index = 1; index < values.length; index++) {
466       double value = values[index];
467       if (isFinite(value) && isFinite(mean)) {
468         // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
469         mean += (value - mean) / (index + 1);
470       } else {
471         mean = calculateNewMeanNonFinite(mean, value);
472       }
473     }
474     return mean;
475   }
476 
477   /**
478    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
479    * values. The count must be non-zero.
480    *
481    * <p>The definition of the mean is the same as {@link Stats#mean}.
482    *
483    * @param values a series of values, which will be converted to {@code double} values (this may
484    *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
485    * @throws IllegalArgumentException if the dataset is empty
486    */
487   public static double meanOf(long... values) {
488     checkArgument(values.length > 0);
489     double mean = values[0];
490     for (int index = 1; index < values.length; index++) {
491       double value = values[index];
492       if (isFinite(value) && isFinite(mean)) {
493         // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
494         mean += (value - mean) / (index + 1);
495       } else {
496         mean = calculateNewMeanNonFinite(mean, value);
497       }
498     }
499     return mean;
500   }
501 
502   // Serialization helpers
503 
504   /**
505    * The size of byte array representation in bytes.
506    */
507   static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE;
508 
509   /**
510    * Gets a byte array representation of this instance.
511    *
512    * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
513    * versions.
514    */
515   public byte[] toByteArray() {
516     ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN);
517     writeTo(buff);
518     return buff.array();
519   }
520 
521   /**
522    * Writes to the given {@link ByteBuffer} a byte representation of this instance.
523    *
524    * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
525    * versions.
526    *
527    * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
528    *     {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance
529    *     is written. In the process increases the position of {@link ByteBuffer} by BYTES.
530    */
531   void writeTo(ByteBuffer buffer) {
532     checkNotNull(buffer);
533     checkArgument(
534         buffer.remaining() >= BYTES,
535         "Expected at least Stats.BYTES = %s remaining , got %s",
536         BYTES,
537         buffer.remaining());
538     buffer
539         .putLong(count)
540         .putDouble(mean)
541         .putDouble(sumOfSquaresOfDeltas)
542         .putDouble(min)
543         .putDouble(max);
544   }
545 
546   /**
547    * Creates a Stats instance from the given byte representation which was obtained by
548    * {@link #toByteArray}.
549    *
550    * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
551    * versions.
552    */
553   public static Stats fromByteArray(byte[] byteArray) {
554     checkNotNull(byteArray);
555     checkArgument(
556         byteArray.length == BYTES,
557         "Expected Stats.BYTES = %s remaining , got %s",
558         BYTES,
559         byteArray.length);
560     return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN));
561   }
562 
563   /**
564    * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}.
565    *
566    * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
567    * versions.
568    *
569    * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
570    *     {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this
571    *     instance is read. In the process increases the position of {@link ByteBuffer} by BYTES.
572    */
573   static Stats readFrom(ByteBuffer buffer) {
574     checkNotNull(buffer);
575     checkArgument(
576         buffer.remaining() >= BYTES,
577         "Expected at least Stats.BYTES = %s remaining , got %s",
578         BYTES,
579         buffer.remaining());
580     return new Stats(
581         buffer.getLong(),
582         buffer.getDouble(),
583         buffer.getDouble(),
584         buffer.getDouble(),
585         buffer.getDouble());
586   }
587 
588   private static final long serialVersionUID = 0;
589 }