View Javadoc
1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.math;
16  
17  import static com.google.common.base.Preconditions.checkState;
18  import static com.google.common.math.DoubleUtils.ensureNonNegative;
19  import static com.google.common.primitives.Doubles.isFinite;
20  import static java.lang.Double.NaN;
21  import static java.lang.Double.isNaN;
22  
23  import com.google.common.annotations.Beta;
24  import com.google.common.annotations.GwtIncompatible;
25  import java.util.Iterator;
26  
27  /**
28   * A mutable object which accumulates double values and tracks some basic statistics over all the
29   * values added so far. The values may be added singly or in groups. This class is not thread safe.
30   *
31   * @author Pete Gillin
32   * @author Kevin Bourrillion
33   * @since 20.0
34   */
35  @Beta
36  @GwtIncompatible
37  public final class StatsAccumulator {
38  
39    // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
40    // methods of this class.
41    private long count = 0;
42    private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
43    private double sumOfSquaresOfDeltas = 0.0;
44    private double min = NaN; // any value will do
45    private double max = NaN; // any value will do
46  
47    /**
48     * Adds the given value to the dataset.
49     */
50    public void add(double value) {
51      if (count == 0) {
52        count = 1;
53        mean = value;
54        min = value;
55        max = value;
56        if (!isFinite(value)) {
57          sumOfSquaresOfDeltas = NaN;
58        }
59      } else {
60        count++;
61        if (isFinite(value) && isFinite(mean)) {
62          // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
63          double delta = value - mean;
64          mean += delta / count;
65          sumOfSquaresOfDeltas += delta * (value - mean);
66        } else {
67          mean = calculateNewMeanNonFinite(mean, value);
68          sumOfSquaresOfDeltas = NaN;
69        }
70        min = Math.min(min, value);
71        max = Math.max(max, value);
72      }
73    }
74  
75    /**
76     * Adds the given values to the dataset.
77     *
78     * @param values a series of values, which will be converted to {@code double} values (this may
79     *     cause loss of precision)
80     */
81    public void addAll(Iterable<? extends Number> values) {
82      for (Number value : values) {
83        add(value.doubleValue());
84      }
85    }
86  
87    /**
88     * Adds the given values to the dataset.
89     *
90     * @param values a series of values, which will be converted to {@code double} values (this may
91     *     cause loss of precision)
92     */
93    public void addAll(Iterator<? extends Number> values) {
94      while (values.hasNext()) {
95        add(values.next().doubleValue());
96      }
97    }
98  
99    /**
100    * Adds the given values to the dataset.
101    *
102    * @param values a series of values
103    */
104   public void addAll(double... values) {
105     for (double value : values) {
106       add(value);
107     }
108   }
109 
110   /**
111    * Adds the given values to the dataset.
112    *
113    * @param values a series of values
114    */
115   public void addAll(int... values) {
116     for (int value : values) {
117       add(value);
118     }
119   }
120 
121   /**
122    * Adds the given values to the dataset.
123    *
124    * @param values a series of values, which will be converted to {@code double} values (this may
125    *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
126    */
127   public void addAll(long... values) {
128     for (long value : values) {
129       add(value);
130     }
131   }
132 
133   /**
134    * Adds the given statistics to the dataset, as if the individual values used to compute the
135    * statistics had been added directly.
136    */
137   public void addAll(Stats values) {
138     if (values.count() == 0) {
139       return;
140     }
141 
142     if (count == 0) {
143       count = values.count();
144       mean = values.mean();
145       sumOfSquaresOfDeltas = values.sumOfSquaresOfDeltas();
146       min = values.min();
147       max = values.max();
148     } else {
149       count += values.count();
150       if (isFinite(mean) && isFinite(values.mean())) {
151         // This is a generalized version of the calculation in add(double) above.
152         double delta = values.mean() - mean;
153         mean += delta * values.count() / count;
154         sumOfSquaresOfDeltas +=
155             values.sumOfSquaresOfDeltas() + delta * (values.mean() - mean) * values.count();
156       } else {
157         mean = calculateNewMeanNonFinite(mean, values.mean());
158         sumOfSquaresOfDeltas = NaN;
159       }
160       min = Math.min(min, values.min());
161       max = Math.max(max, values.max());
162     }
163   }
164 
165   /**
166    * Returns an immutable snapshot of the current statistics.
167    */
168   public Stats snapshot() {
169     return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
170   }
171 
172   /**
173    * Returns the number of values.
174    */
175   public long count() {
176     return count;
177   }
178 
179   /**
180    * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
181    * values. The count must be non-zero.
182    *
183    * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
184    * the arithmetic mean of the population.
185    *
186    * <h3>Non-finite values</h3>
187    *
188    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
189    * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
190    * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
191    * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
192    * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
193    * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
194    *
195    * @throws IllegalStateException if the dataset is empty
196    */
197   public double mean() {
198     checkState(count != 0);
199     return mean;
200   }
201 
202   /**
203    * Returns the sum of the values.
204    *
205    * <h3>Non-finite values</h3>
206    *
207    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
208    * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
209    * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
210    * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
211    * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or
212    * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
213    */
214   public final double sum() {
215     return mean * count;
216   }
217 
218   /**
219    * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
220    * variance</a> of the values. The count must be non-zero.
221    *
222    * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
223    * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
224    * due to numerical errors. However, it is guaranteed never to return a negative result.
225    *
226    * <h3>Non-finite values</h3>
227    *
228    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
229    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
230    *
231    * @throws IllegalStateException if the dataset is empty
232    */
233   public final double populationVariance() {
234     checkState(count != 0);
235     if (isNaN(sumOfSquaresOfDeltas)) {
236       return NaN;
237     }
238     if (count == 1) {
239       return 0.0;
240     }
241     return ensureNonNegative(sumOfSquaresOfDeltas) / count;
242   }
243 
244   /**
245    * Returns the
246    * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
247    * population standard deviation</a> of the values. The count must be non-zero.
248    *
249    * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value.
250    * It is not guaranteed to return zero when the dataset consists of the same value multiple times,
251    * due to numerical errors. However, it is guaranteed never to return a negative result.
252    *
253    * <h3>Non-finite values</h3>
254    *
255    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
256    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
257    *
258    * @throws IllegalStateException if the dataset is empty
259    */
260   public final double populationStandardDeviation() {
261     return Math.sqrt(populationVariance());
262   }
263 
264   /**
265    * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
266    * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
267    * unbiased estimator of the population variance of the population. The count must be greater than
268    * one.
269    *
270    * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
271    * times, due to numerical errors. However, it is guaranteed never to return a negative result.
272    *
273    * <h3>Non-finite values</h3>
274    *
275    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
276    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
277    *
278    * @throws IllegalStateException if the dataset is empty or contains a single value
279    */
280   public final double sampleVariance() {
281     checkState(count > 1);
282     if (isNaN(sumOfSquaresOfDeltas)) {
283       return NaN;
284     }
285     return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
286   }
287 
288   /**
289    * Returns the
290    * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
291    * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
292    * population, this is an estimator of the population standard deviation of the population which
293    * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
294    * the distribution). The count must be greater than one.
295    *
296    * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
297    * times, due to numerical errors. However, it is guaranteed never to return a negative result.
298    *
299    * <h3>Non-finite values</h3>
300    *
301    * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY},
302    * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
303    *
304    * @throws IllegalStateException if the dataset is empty or contains a single value
305    */
306   public final double sampleStandardDeviation() {
307     return Math.sqrt(sampleVariance());
308   }
309 
310   /**
311    * Returns the lowest value in the dataset. The count must be non-zero.
312    *
313    * <h3>Non-finite values</h3>
314    *
315    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
316    * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is
317    * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite
318    * values only then the result is the lowest finite value. If it contains
319    * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
320    *
321    * @throws IllegalStateException if the dataset is empty
322    */
323   public double min() {
324     checkState(count != 0);
325     return min;
326   }
327 
328   /**
329    * Returns the highest value in the dataset. The count must be non-zero.
330    *
331    * <h3>Non-finite values</h3>
332    *
333    * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
334    * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is
335    * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite
336    * values only then the result is the highest finite value. If it contains
337    * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
338    *
339    * @throws IllegalStateException if the dataset is empty
340    */
341   public double max() {
342     checkState(count != 0);
343     return max;
344   }
345 
346   double sumOfSquaresOfDeltas() {
347     return sumOfSquaresOfDeltas;
348   }
349 
350   /**
351    * Calculates the new value for the accumulated mean when a value is added, in the case where at
352    * least one of the previous mean and the value is non-finite.
353    */
354   static double calculateNewMeanNonFinite(double previousMean, double value) {
355     /*
356      * Desired behaviour is to match the results of applying the naive mean formula. In particular,
357      * the update formula can subtract infinities in cases where the naive formula would add them.
358      *
359      * Consequently:
360      * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
361      *    value (whether it is NaN or infinity).
362      * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
363      *    (whether it is NaN or infinity).
364      * 3. If both the previous mean and the new value are non-finite and...
365      * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
366      * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
367      * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
368      */
369     if (isFinite(previousMean)) {
370       // This is case 1.
371       return value;
372     } else if (isFinite(value) || previousMean == value) {
373       // This is case 2. or 3b.
374       return previousMean;
375     } else {
376       // This is case 3a. or 3c.
377       return NaN;
378     }
379   }
380 }