View Javadoc
1   /*
2    * Copyright (C) 2011 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.hash;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static com.google.common.base.Preconditions.checkNotNull;
19  
20  import com.google.common.annotations.Beta;
21  import com.google.common.annotations.VisibleForTesting;
22  import com.google.common.base.Objects;
23  import com.google.common.base.Predicate;
24  import com.google.common.hash.BloomFilterStrategies.LockFreeBitArray;
25  import com.google.common.math.DoubleMath;
26  import com.google.common.primitives.SignedBytes;
27  import com.google.common.primitives.UnsignedBytes;
28  import com.google.errorprone.annotations.CanIgnoreReturnValue;
29  import java.io.DataInputStream;
30  import java.io.DataOutputStream;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.io.OutputStream;
34  import java.io.Serializable;
35  import java.math.RoundingMode;
36  import java.util.stream.Collector;
37  import javax.annotation.Nullable;
38  
39  /**
40   * A Bloom filter for instances of {@code T}. A Bloom filter offers an approximate containment test
41   * with one-sided error: if it claims that an element is contained in it, this might be in error,
42   * but if it claims that an element is <i>not</i> contained in it, then this is definitely true.
43   *
44   * <p>If you are unfamiliar with Bloom filters, this nice <a
45   * href="http://llimllib.github.com/bloomfilter-tutorial/">tutorial</a> may help you understand how
46   * they work.
47   *
48   * <p>The false positive probability ({@code FPP}) of a Bloom filter is defined as the probability
49   * that {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that
50   * has not actually been put in the {@code BloomFilter}.
51   *
52   * <p>Bloom filters are serializable. They also support a more compact serial representation via the
53   * {@link #writeTo} and {@link #readFrom} methods. Both serialized forms will continue to be
54   * supported by future versions of this library. However, serial forms generated by newer versions
55   * of the code may not be readable by older versions of the code (e.g., a serialized Bloom filter
56   * generated today may <i>not</i> be readable by a binary that was compiled 6 months ago).
57   *
58   * <p>As of Guava 23.0, this class is thread-safe and lock-free. It internally uses atomics and
59   * compare-and-swap to ensure correctness when multiple threads are used to access it.
60   *
61   * @param <T> the type of instances that the {@code BloomFilter} accepts
62   * @author Dimitris Andreou
63   * @author Kevin Bourrillion
64   * @since 11.0 (thread-safe since 23.0)
65   */
66  @Beta
67  public final class BloomFilter<T> implements Predicate<T>, Serializable {
68    /**
69     * A strategy to translate T instances, to {@code numHashFunctions} bit indexes.
70     *
71     * <p>Implementations should be collections of pure functions (i.e. stateless).
72     */
73    interface Strategy extends java.io.Serializable {
74  
75      /**
76       * Sets {@code numHashFunctions} bits of the given bit array, by hashing a user element.
77       *
78       * <p>Returns whether any bits changed as a result of this operation.
79       */
80      <T> boolean put(
81          T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits);
82  
83      /**
84       * Queries {@code numHashFunctions} bits of the given bit array, by hashing a user element;
85       * returns {@code true} if and only if all selected bits are set.
86       */
87      <T> boolean mightContain(
88          T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits);
89  
90      /**
91       * Identifier used to encode this strategy, when marshalled as part of a BloomFilter. Only
92       * values in the [-128, 127] range are valid for the compact serial form. Non-negative values
93       * are reserved for enums defined in BloomFilterStrategies; negative values are reserved for any
94       * custom, stateful strategy we may define (e.g. any kind of strategy that would depend on user
95       * input).
96       */
97      int ordinal();
98    }
99  
100   /** The bit set of the BloomFilter (not necessarily power of 2!) */
101   private final LockFreeBitArray bits;
102 
103   /** Number of hashes per element */
104   private final int numHashFunctions;
105 
106   /** The funnel to translate Ts to bytes */
107   private final Funnel<? super T> funnel;
108 
109   /**
110    * The strategy we employ to map an element T to {@code numHashFunctions} bit indexes.
111    */
112   private final Strategy strategy;
113 
114   /** Creates a BloomFilter. */
115   private BloomFilter(
116       LockFreeBitArray bits, int numHashFunctions, Funnel<? super T> funnel, Strategy strategy) {
117     checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions);
118     checkArgument(
119         numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions);
120     this.bits = checkNotNull(bits);
121     this.numHashFunctions = numHashFunctions;
122     this.funnel = checkNotNull(funnel);
123     this.strategy = checkNotNull(strategy);
124   }
125 
126   /**
127    * Creates a new {@code BloomFilter} that's a copy of this instance. The new instance is equal to
128    * this instance but shares no mutable state.
129    *
130    * @since 12.0
131    */
132   public BloomFilter<T> copy() {
133     return new BloomFilter<T>(bits.copy(), numHashFunctions, funnel, strategy);
134   }
135 
136   /**
137    * Returns {@code true} if the element <i>might</i> have been put in this Bloom filter,
138    * {@code false} if this is <i>definitely</i> not the case.
139    */
140   public boolean mightContain(T object) {
141     return strategy.mightContain(object, funnel, numHashFunctions, bits);
142   }
143 
144   /**
145    * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #mightContain}
146    *     instead.
147    */
148   @Deprecated
149   @Override
150   public boolean apply(T input) {
151     return mightContain(input);
152   }
153 
154   /**
155    * Puts an element into this {@code BloomFilter}. Ensures that subsequent invocations of {@link
156    * #mightContain(Object)} with the same element will always return {@code true}.
157    *
158    * @return true if the Bloom filter's bits changed as a result of this operation. If the bits
159    *     changed, this is <i>definitely</i> the first time {@code object} has been added to the
160    *     filter. If the bits haven't changed, this <i>might</i> be the first time {@code object} has
161    *     been added to the filter. Note that {@code put(t)} always returns the <i>opposite</i>
162    *     result to what {@code mightContain(t)} would have returned at the time it is called.
163    * @since 12.0 (present in 11.0 with {@code void} return type})
164    */
165   @CanIgnoreReturnValue
166   public boolean put(T object) {
167     return strategy.put(object, funnel, numHashFunctions, bits);
168   }
169 
170   /**
171    * Returns the probability that {@linkplain #mightContain(Object)} will erroneously return
172    * {@code true} for an object that has not actually been put in the {@code BloomFilter}.
173    *
174    * <p>Ideally, this number should be close to the {@code fpp} parameter passed in
175    * {@linkplain #create(Funnel, int, double)}, or smaller. If it is significantly higher, it is
176    * usually the case that too many elements (more than expected) have been put in the
177    * {@code BloomFilter}, degenerating it.
178    *
179    * @since 14.0 (since 11.0 as expectedFalsePositiveProbability())
180    */
181   public double expectedFpp() {
182     // You down with FPP? (Yeah you know me!) Who's down with FPP? (Every last homie!)
183     return Math.pow((double) bits.bitCount() / bitSize(), numHashFunctions);
184   }
185 
186   /**
187    * Returns an estimate for the total number of distinct elements that have been added to this
188    * Bloom filter. This approximation is reasonably accurate if it does not exceed the value of
189    * {@code expectedInsertions} that was used when constructing the filter.
190    *
191    * @since 22.0
192    */
193   public long approximateElementCount() {
194     long bitSize = bits.bitSize();
195     long bitCount = bits.bitCount();
196 
197     /**
198      * Each insertion is expected to reduce the # of clear bits by a factor of
199      * `numHashFunctions/bitSize`. So, after n insertions, expected bitCount is `bitSize * (1 - (1 -
200      * numHashFunctions/bitSize)^n)`. Solving that for n, and approximating `ln x` as `x - 1` when x
201      * is close to 1 (why?), gives the following formula.
202      */
203     double fractionOfBitsSet = (double) bitCount / bitSize;
204     return DoubleMath.roundToLong(
205         -Math.log1p(-fractionOfBitsSet) * bitSize / numHashFunctions, RoundingMode.HALF_UP);
206   }
207 
208   /**
209    * Returns the number of bits in the underlying bit array.
210    */
211   @VisibleForTesting
212   long bitSize() {
213     return bits.bitSize();
214   }
215 
216   /**
217    * Determines whether a given Bloom filter is compatible with this Bloom filter. For two Bloom
218    * filters to be compatible, they must:
219    *
220    * <ul>
221    *   <li>not be the same instance
222    *   <li>have the same number of hash functions
223    *   <li>have the same bit size
224    *   <li>have the same strategy
225    *   <li>have equal funnels
226    * </ul>
227    *
228    * @param that The Bloom filter to check for compatibility.
229    * @since 15.0
230    */
231   public boolean isCompatible(BloomFilter<T> that) {
232     checkNotNull(that);
233     return (this != that)
234         && (this.numHashFunctions == that.numHashFunctions)
235         && (this.bitSize() == that.bitSize())
236         && (this.strategy.equals(that.strategy))
237         && (this.funnel.equals(that.funnel));
238   }
239 
240   /**
241    * Combines this Bloom filter with another Bloom filter by performing a bitwise OR of the
242    * underlying data. The mutations happen to <b>this</b> instance. Callers must ensure the Bloom
243    * filters are appropriately sized to avoid saturating them.
244    *
245    * @param that The Bloom filter to combine this Bloom filter with. It is not mutated.
246    * @throws IllegalArgumentException if {@code isCompatible(that) == false}
247    * @since 15.0
248    */
249   public void putAll(BloomFilter<T> that) {
250     checkNotNull(that);
251     checkArgument(this != that, "Cannot combine a BloomFilter with itself.");
252     checkArgument(
253         this.numHashFunctions == that.numHashFunctions,
254         "BloomFilters must have the same number of hash functions (%s != %s)",
255         this.numHashFunctions,
256         that.numHashFunctions);
257     checkArgument(
258         this.bitSize() == that.bitSize(),
259         "BloomFilters must have the same size underlying bit arrays (%s != %s)",
260         this.bitSize(),
261         that.bitSize());
262     checkArgument(
263         this.strategy.equals(that.strategy),
264         "BloomFilters must have equal strategies (%s != %s)",
265         this.strategy,
266         that.strategy);
267     checkArgument(
268         this.funnel.equals(that.funnel),
269         "BloomFilters must have equal funnels (%s != %s)",
270         this.funnel,
271         that.funnel);
272     this.bits.putAll(that.bits);
273   }
274 
275   @Override
276   public boolean equals(@Nullable Object object) {
277     if (object == this) {
278       return true;
279     }
280     if (object instanceof BloomFilter) {
281       BloomFilter<?> that = (BloomFilter<?>) object;
282       return this.numHashFunctions == that.numHashFunctions
283           && this.funnel.equals(that.funnel)
284           && this.bits.equals(that.bits)
285           && this.strategy.equals(that.strategy);
286     }
287     return false;
288   }
289 
290   @Override
291   public int hashCode() {
292     return Objects.hashCode(numHashFunctions, funnel, strategy, bits);
293   }
294 
295   /**
296    * Returns a {@code Collector} expecting the specified number of insertions, and yielding a {@link
297    * BloomFilter} with false positive probability 3%.
298    *
299    * <p>Note that if the {@code Collector} receives significantly more elements than specified, the
300    * resulting {@code BloomFilter} will suffer a sharp deterioration of its false positive
301    * probability.
302    *
303    * <p>The constructed {@code BloomFilter} will be serializable if the provided {@code
304    * Funnel<T>} is.
305    *
306    * <p>It is recommended that the funnel be implemented as a Java enum. This has the benefit of
307    * ensuring proper serialization and deserialization, which is important since {@link #equals}
308    * also relies on object identity of funnels.
309    *
310    * @param funnel the funnel of T's that the constructed {@code BloomFilter} will use
311    * @param expectedInsertions the number of expected insertions to the constructed {@code
312    *     BloomFilter}; must be positive
313    * @return a {@code Collector} generating a {@code BloomFilter} of the received elements
314    * @since 23.0
315    */
316   public static <T> Collector<T, ?, BloomFilter<T>> toBloomFilter(
317       Funnel<? super T> funnel, long expectedInsertions) {
318     return toBloomFilter(funnel, expectedInsertions, 0.03);
319   }
320 
321   /**
322    * Returns a {@code Collector} expecting the specified number of insertions, and yielding a {@link
323    * BloomFilter} with the specified expected false positive probability.
324    *
325    * <p>Note that if the {@code Collector} receives significantly more elements than specified, the
326    * resulting {@code BloomFilter} will suffer a sharp deterioration of its false positive
327    * probability.
328    *
329    * <p>The constructed {@code BloomFilter} will be serializable if the provided {@code
330    * Funnel<T>} is.
331    *
332    * <p>It is recommended that the funnel be implemented as a Java enum. This has the benefit of
333    * ensuring proper serialization and deserialization, which is important since {@link #equals}
334    * also relies on object identity of funnels.
335    *
336    * @param funnel the funnel of T's that the constructed {@code BloomFilter} will use
337    * @param expectedInsertions the number of expected insertions to the constructed {@code
338    *     BloomFilter}; must be positive
339    * @param fpp the desired false positive probability (must be positive and less than 1.0)
340    * @return a {@code Collector} generating a {@code BloomFilter} of the received elements
341    * @since 23.0
342    */
343   public static <T> Collector<T, ?, BloomFilter<T>> toBloomFilter(
344       Funnel<? super T> funnel, long expectedInsertions, double fpp) {
345     checkNotNull(funnel);
346     checkArgument(
347         expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions);
348     checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp);
349     checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp);
350     return Collector.of(
351         () -> BloomFilter.create(funnel, expectedInsertions, fpp),
352         BloomFilter::put,
353         (bf1, bf2) -> {
354           bf1.putAll(bf2);
355           return bf1;
356         },
357         Collector.Characteristics.UNORDERED,
358         Collector.Characteristics.CONCURRENT);
359   }
360 
361   /**
362    * Creates a {@link BloomFilter} with the expected number of insertions and
363    * expected false positive probability.
364    *
365    * <p>Note that overflowing a {@code BloomFilter} with significantly more elements than specified,
366    * will result in its saturation, and a sharp deterioration of its false positive probability.
367    *
368    * <p>The constructed {@code BloomFilter} will be serializable if the provided
369    * {@code Funnel<T>} is.
370    *
371    * <p>It is recommended that the funnel be implemented as a Java enum. This has the benefit of
372    * ensuring proper serialization and deserialization, which is important since {@link #equals}
373    * also relies on object identity of funnels.
374    *
375    * @param funnel the funnel of T's that the constructed {@code BloomFilter} will use
376    * @param expectedInsertions the number of expected insertions to the constructed
377    *     {@code BloomFilter}; must be positive
378    * @param fpp the desired false positive probability (must be positive and less than 1.0)
379    * @return a {@code BloomFilter}
380    */
381   public static <T> BloomFilter<T> create(
382       Funnel<? super T> funnel, int expectedInsertions, double fpp) {
383     return create(funnel, (long) expectedInsertions, fpp);
384   }
385 
386   /**
387    * Creates a {@link BloomFilter} with the expected number of insertions and
388    * expected false positive probability.
389    *
390    * <p>Note that overflowing a {@code BloomFilter} with significantly more elements than specified,
391    * will result in its saturation, and a sharp deterioration of its false positive probability.
392    *
393    * <p>The constructed {@code BloomFilter} will be serializable if the provided
394    * {@code Funnel<T>} is.
395    *
396    * <p>It is recommended that the funnel be implemented as a Java enum. This has the benefit of
397    * ensuring proper serialization and deserialization, which is important since {@link #equals}
398    * also relies on object identity of funnels.
399    *
400    * @param funnel the funnel of T's that the constructed {@code BloomFilter} will use
401    * @param expectedInsertions the number of expected insertions to the constructed
402    *     {@code BloomFilter}; must be positive
403    * @param fpp the desired false positive probability (must be positive and less than 1.0)
404    * @return a {@code BloomFilter}
405    * @since 19.0
406    */
407   public static <T> BloomFilter<T> create(
408       Funnel<? super T> funnel, long expectedInsertions, double fpp) {
409     return create(funnel, expectedInsertions, fpp, BloomFilterStrategies.MURMUR128_MITZ_64);
410   }
411 
412   @VisibleForTesting
413   static <T> BloomFilter<T> create(
414       Funnel<? super T> funnel, long expectedInsertions, double fpp, Strategy strategy) {
415     checkNotNull(funnel);
416     checkArgument(
417         expectedInsertions >= 0, "Expected insertions (%s) must be >= 0", expectedInsertions);
418     checkArgument(fpp > 0.0, "False positive probability (%s) must be > 0.0", fpp);
419     checkArgument(fpp < 1.0, "False positive probability (%s) must be < 1.0", fpp);
420     checkNotNull(strategy);
421 
422     if (expectedInsertions == 0) {
423       expectedInsertions = 1;
424     }
425     /*
426      * TODO(user): Put a warning in the javadoc about tiny fpp values, since the resulting size
427      * is proportional to -log(p), but there is not much of a point after all, e.g.
428      * optimalM(1000, 0.0000000000000001) = 76680 which is less than 10kb. Who cares!
429      */
430     long numBits = optimalNumOfBits(expectedInsertions, fpp);
431     int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
432     try {
433       return new BloomFilter<T>(new LockFreeBitArray(numBits), numHashFunctions, funnel, strategy);
434     } catch (IllegalArgumentException e) {
435       throw new IllegalArgumentException("Could not create BloomFilter of " + numBits + " bits", e);
436     }
437   }
438 
439   /**
440    * Creates a {@link BloomFilter} with the expected number of insertions and a
441    * default expected false positive probability of 3%.
442    *
443    * <p>Note that overflowing a {@code BloomFilter} with significantly more elements than specified,
444    * will result in its saturation, and a sharp deterioration of its false positive probability.
445    *
446    * <p>The constructed {@code BloomFilter} will be serializable if the provided
447    * {@code Funnel<T>} is.
448    *
449    * <p>It is recommended that the funnel be implemented as a Java enum. This has the benefit of
450    * ensuring proper serialization and deserialization, which is important since {@link #equals}
451    * also relies on object identity of funnels.
452    *
453    * @param funnel the funnel of T's that the constructed {@code BloomFilter} will use
454    * @param expectedInsertions the number of expected insertions to the constructed
455    *     {@code BloomFilter}; must be positive
456    * @return a {@code BloomFilter}
457    */
458   public static <T> BloomFilter<T> create(Funnel<? super T> funnel, int expectedInsertions) {
459     return create(funnel, (long) expectedInsertions);
460   }
461 
462   /**
463    * Creates a {@link BloomFilter} with the expected number of insertions and a
464    * default expected false positive probability of 3%.
465    *
466    * <p>Note that overflowing a {@code BloomFilter} with significantly more elements than specified,
467    * will result in its saturation, and a sharp deterioration of its false positive probability.
468    *
469    * <p>The constructed {@code BloomFilter} will be serializable if the provided
470    * {@code Funnel<T>} is.
471    *
472    * <p>It is recommended that the funnel be implemented as a Java enum. This has the benefit of
473    * ensuring proper serialization and deserialization, which is important since {@link #equals}
474    * also relies on object identity of funnels.
475    *
476    * @param funnel the funnel of T's that the constructed {@code BloomFilter} will use
477    * @param expectedInsertions the number of expected insertions to the constructed
478    *     {@code BloomFilter}; must be positive
479    * @return a {@code BloomFilter}
480    * @since 19.0
481    */
482   public static <T> BloomFilter<T> create(Funnel<? super T> funnel, long expectedInsertions) {
483     return create(funnel, expectedInsertions, 0.03); // FYI, for 3%, we always get 5 hash functions
484   }
485 
486   // Cheat sheet:
487   //
488   // m: total bits
489   // n: expected insertions
490   // b: m/n, bits per insertion
491   // p: expected false positive probability
492   //
493   // 1) Optimal k = b * ln2
494   // 2) p = (1 - e ^ (-kn/m))^k
495   // 3) For optimal k: p = 2 ^ (-k) ~= 0.6185^b
496   // 4) For optimal k: m = -nlnp / ((ln2) ^ 2)
497 
498   /**
499    * Computes the optimal k (number of hashes per element inserted in Bloom filter), given the
500    * expected insertions and total number of bits in the Bloom filter.
501    *
502    * See http://en.wikipedia.org/wiki/File:Bloom_filter_fp_probability.svg for the formula.
503    *
504    * @param n expected insertions (must be positive)
505    * @param m total number of bits in Bloom filter (must be positive)
506    */
507   @VisibleForTesting
508   static int optimalNumOfHashFunctions(long n, long m) {
509     // (m / n) * log(2), but avoid truncation due to division!
510     return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
511   }
512 
513   /**
514    * Computes m (total bits of Bloom filter) which is expected to achieve, for the specified
515    * expected insertions, the required false positive probability.
516    *
517    * See http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives for the formula.
518    *
519    * @param n expected insertions (must be positive)
520    * @param p false positive rate (must be 0 < p < 1)
521    */
522   @VisibleForTesting
523   static long optimalNumOfBits(long n, double p) {
524     if (p == 0) {
525       p = Double.MIN_VALUE;
526     }
527     return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
528   }
529 
530   private Object writeReplace() {
531     return new SerialForm<T>(this);
532   }
533 
534   private static class SerialForm<T> implements Serializable {
535     final long[] data;
536     final int numHashFunctions;
537     final Funnel<? super T> funnel;
538     final Strategy strategy;
539 
540     SerialForm(BloomFilter<T> bf) {
541       this.data = LockFreeBitArray.toPlainArray(bf.bits.data);
542       this.numHashFunctions = bf.numHashFunctions;
543       this.funnel = bf.funnel;
544       this.strategy = bf.strategy;
545     }
546 
547     Object readResolve() {
548       return new BloomFilter<T>(new LockFreeBitArray(data), numHashFunctions, funnel, strategy);
549     }
550 
551     private static final long serialVersionUID = 1;
552   }
553 
554   /**
555    * Writes this {@code BloomFilter} to an output stream, with a custom format (not Java
556    * serialization). This has been measured to save at least 400 bytes compared to regular
557    * serialization.
558    *
559    * <p>Use {@linkplain #readFrom(InputStream, Funnel)} to reconstruct the written BloomFilter.
560    */
561   public void writeTo(OutputStream out) throws IOException {
562     // Serial form:
563     // 1 signed byte for the strategy
564     // 1 unsigned byte for the number of hash functions
565     // 1 big endian int, the number of longs in our bitset
566     // N big endian longs of our bitset
567     DataOutputStream dout = new DataOutputStream(out);
568     dout.writeByte(SignedBytes.checkedCast(strategy.ordinal()));
569     dout.writeByte(UnsignedBytes.checkedCast(numHashFunctions)); // note: checked at the c'tor
570     dout.writeInt(bits.data.length());
571     for (int i = 0; i < bits.data.length(); i++) {
572       dout.writeLong(bits.data.get(i));
573     }
574   }
575 
576   /**
577    * Reads a byte stream, which was written by {@linkplain #writeTo(OutputStream)}, into a
578    * {@code BloomFilter}.
579    *
580    * The {@code Funnel} to be used is not encoded in the stream, so it must be provided here.
581    * <b>Warning:</b> the funnel provided <b>must</b> behave identically to the one used to populate
582    * the original Bloom filter!
583    *
584    * @throws IOException if the InputStream throws an {@code IOException}, or if its data does not
585    *     appear to be a BloomFilter serialized using the {@linkplain #writeTo(OutputStream)} method.
586    */
587   public static <T> BloomFilter<T> readFrom(InputStream in, Funnel<? super T> funnel)
588       throws IOException {
589     checkNotNull(in, "InputStream");
590     checkNotNull(funnel, "Funnel");
591     int strategyOrdinal = -1;
592     int numHashFunctions = -1;
593     int dataLength = -1;
594     try {
595       DataInputStream din = new DataInputStream(in);
596       // currently this assumes there is no negative ordinal; will have to be updated if we
597       // add non-stateless strategies (for which we've reserved negative ordinals; see
598       // Strategy.ordinal()).
599       strategyOrdinal = din.readByte();
600       numHashFunctions = UnsignedBytes.toInt(din.readByte());
601       dataLength = din.readInt();
602 
603       Strategy strategy = BloomFilterStrategies.values()[strategyOrdinal];
604       long[] data = new long[dataLength];
605       for (int i = 0; i < data.length; i++) {
606         data[i] = din.readLong();
607       }
608       return new BloomFilter<T>(new LockFreeBitArray(data), numHashFunctions, funnel, strategy);
609     } catch (RuntimeException e) {
610       String message =
611           "Unable to deserialize BloomFilter from InputStream."
612               + " strategyOrdinal: "
613               + strategyOrdinal
614               + " numHashFunctions: "
615               + numHashFunctions
616               + " dataLength: "
617               + dataLength;
618       throw new IOException(message, e);
619     }
620   }
621 }