View Javadoc
1   /*
2    * Copyright (C) 2011 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.hash;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  
19  import com.google.common.math.LongMath;
20  import com.google.common.primitives.Ints;
21  import com.google.common.primitives.Longs;
22  import java.math.RoundingMode;
23  import java.util.Arrays;
24  import java.util.concurrent.atomic.AtomicLongArray;
25  import javax.annotation.Nullable;
26  
27  /**
28   * Collections of strategies of generating the k * log(M) bits required for an element to be mapped
29   * to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form
30   * of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only
31   * introduction of new versions).
32   *
33   * Important: the order of the constants cannot change, and they cannot be deleted - we depend on
34   * their ordinal for BloomFilter serialization.
35   *
36   * @author Dimitris Andreou
37   * @author Kurt Alfred Kluever
38   */
39  enum BloomFilterStrategies implements BloomFilter.Strategy {
40    /**
41     * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael
42     * Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
43     * performance of a Bloom filter (yet only needs two 32bit hash functions).
44     */
45    MURMUR128_MITZ_32() {
46      @Override
47      public <T> boolean put(
48          T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
49        long bitSize = bits.bitSize();
50        long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
51        int hash1 = (int) hash64;
52        int hash2 = (int) (hash64 >>> 32);
53  
54        boolean bitsChanged = false;
55        for (int i = 1; i <= numHashFunctions; i++) {
56          int combinedHash = hash1 + (i * hash2);
57          // Flip all the bits if it's negative (guaranteed positive number)
58          if (combinedHash < 0) {
59            combinedHash = ~combinedHash;
60          }
61          bitsChanged |= bits.set(combinedHash % bitSize);
62        }
63        return bitsChanged;
64      }
65  
66      @Override
67      public <T> boolean mightContain(
68          T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
69        long bitSize = bits.bitSize();
70        long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
71        int hash1 = (int) hash64;
72        int hash2 = (int) (hash64 >>> 32);
73  
74        for (int i = 1; i <= numHashFunctions; i++) {
75          int combinedHash = hash1 + (i * hash2);
76          // Flip all the bits if it's negative (guaranteed positive number)
77          if (combinedHash < 0) {
78            combinedHash = ~combinedHash;
79          }
80          if (!bits.get(combinedHash % bitSize)) {
81            return false;
82          }
83        }
84        return true;
85      }
86    },
87    /**
88     * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different
89     * than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the
90     * loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by
91     * AND'ing with Long.MAX_VALUE instead of flipping the bits.
92     */
93    MURMUR128_MITZ_64() {
94      @Override
95      public <T> boolean put(
96          T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
97        long bitSize = bits.bitSize();
98        byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
99        long hash1 = lowerEight(bytes);
100       long hash2 = upperEight(bytes);
101 
102       boolean bitsChanged = false;
103       long combinedHash = hash1;
104       for (int i = 0; i < numHashFunctions; i++) {
105         // Make the combined hash positive and indexable
106         bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
107         combinedHash += hash2;
108       }
109       return bitsChanged;
110     }
111 
112     @Override
113     public <T> boolean mightContain(
114         T object, Funnel<? super T> funnel, int numHashFunctions, LockFreeBitArray bits) {
115       long bitSize = bits.bitSize();
116       byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
117       long hash1 = lowerEight(bytes);
118       long hash2 = upperEight(bytes);
119 
120       long combinedHash = hash1;
121       for (int i = 0; i < numHashFunctions; i++) {
122         // Make the combined hash positive and indexable
123         if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
124           return false;
125         }
126         combinedHash += hash2;
127       }
128       return true;
129     }
130 
131     private /* static */ long lowerEight(byte[] bytes) {
132       return Longs.fromBytes(
133           bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
134     }
135 
136     private /* static */ long upperEight(byte[] bytes) {
137       return Longs.fromBytes(
138           bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]);
139     }
140   };
141 
142   /**
143    * Models a lock-free array of bits.
144    *
145    * <p>We use this instead of java.util.BitSet because we need access to the array of longs and we
146    * need compare-and-swap.
147    */
148   static final class LockFreeBitArray {
149     private static final int LONG_ADDRESSABLE_BITS = 6;
150     final AtomicLongArray data;
151     private final LongAddable bitCount;
152 
153     LockFreeBitArray(long bits) {
154       this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
155     }
156 
157     // Used by serialization
158     LockFreeBitArray(long[] data) {
159       checkArgument(data.length > 0, "data length is zero!");
160       this.data = new AtomicLongArray(data);
161       this.bitCount = LongAddables.create();
162       long bitCount = 0;
163       for (long value : data) {
164         bitCount += Long.bitCount(value);
165       }
166       this.bitCount.add(bitCount);
167     }
168 
169     /** Returns true if the bit changed value. */
170     boolean set(long bitIndex) {
171       if (get(bitIndex)) {
172         return false;
173       }
174 
175       int longIndex = (int) (bitIndex >>> LONG_ADDRESSABLE_BITS);
176       long mask = 1L << bitIndex; // only cares about low 6 bits of bitIndex
177 
178       long oldValue;
179       long newValue;
180       do {
181         oldValue = data.get(longIndex);
182         newValue = oldValue | mask;
183         if (oldValue == newValue) {
184           return false;
185         }
186       } while (!data.compareAndSet(longIndex, oldValue, newValue));
187 
188       // We turned the bit on, so increment bitCount.
189       bitCount.increment();
190       return true;
191     }
192 
193     boolean get(long bitIndex) {
194       return (data.get((int) (bitIndex >>> 6)) & (1L << bitIndex)) != 0;
195     }
196 
197     /**
198      * Careful here: if threads are mutating the atomicLongArray while this method is executing, the
199      * final long[] will be a "rolling snapshot" of the state of the bit array. This is usually good
200      * enough, but should be kept in mind.
201      */
202     public static long[] toPlainArray(AtomicLongArray atomicLongArray) {
203       long[] array = new long[atomicLongArray.length()];
204       for (int i = 0; i < array.length; ++i) {
205         array[i] = atomicLongArray.get(i);
206       }
207       return array;
208     }
209 
210     /** Number of bits */
211     long bitSize() {
212       return (long) data.length() * Long.SIZE;
213     }
214 
215     /**
216      * Number of set bits (1s).
217      *
218      * <p>Note that because of concurrent set calls and uses of atomics, this bitCount is a (very)
219      * close *estimate* of the actual number of bits set. It's not possible to do better than an
220      * estimate without locking. Note that the number, if not exactly accurate, is *always*
221      * underestimating, never overestimating.
222      */
223     long bitCount() {
224       return bitCount.sum();
225     }
226 
227     LockFreeBitArray copy() {
228       return new LockFreeBitArray(toPlainArray(data));
229     }
230 
231     /**
232      * Combines the two BitArrays using bitwise OR.
233      *
234      * <p>NOTE: Because of the use of atomics, if the other LockFreeBitArray is being mutated while
235      * this operation is executing, not all of those new 1's may be set in the final state of this
236      * LockFreeBitArray. The ONLY guarantee provided is that all the bits that were set in the other
237      * LockFreeBitArray at the start of this method will be set in this LockFreeBitArray at the end
238      * of this method.
239      */
240     void putAll(LockFreeBitArray other) {
241       checkArgument(
242           data.length() == other.data.length(),
243           "BitArrays must be of equal length (%s != %s)",
244           data.length(),
245           other.data.length());
246       for (int i = 0; i < data.length(); i++) {
247         long otherLong = other.data.get(i);
248 
249         long ourLongOld;
250         long ourLongNew;
251         boolean changedAnyBits = true;
252         do {
253           ourLongOld = data.get(i);
254           ourLongNew = ourLongOld | otherLong;
255           if (ourLongOld == ourLongNew) {
256             changedAnyBits = false;
257             break;
258           }
259         } while (!data.compareAndSet(i, ourLongOld, ourLongNew));
260 
261         if (changedAnyBits) {
262           int bitsAdded = Long.bitCount(ourLongNew) - Long.bitCount(ourLongOld);
263           bitCount.add(bitsAdded);
264         }
265       }
266     }
267 
268     @Override
269     public boolean equals(@Nullable Object o) {
270       if (o instanceof LockFreeBitArray) {
271         LockFreeBitArray lockFreeBitArray = (LockFreeBitArray) o;
272         // TODO(lowasser): avoid allocation here
273         return Arrays.equals(toPlainArray(data), toPlainArray(lockFreeBitArray.data));
274       }
275       return false;
276     }
277 
278     @Override
279     public int hashCode() {
280       // TODO(lowasser): avoid allocation here
281       return Arrays.hashCode(toPlainArray(data));
282     }
283   }
284 }