View Javadoc
1   /*
2    * Copyright (C) 2015 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.hash;
16  
17  import static com.google.common.base.Preconditions.checkPositionIndexes;
18  import static com.google.common.hash.LittleEndianByteArray.load32;
19  import static com.google.common.hash.LittleEndianByteArray.load64;
20  import static java.lang.Long.rotateRight;
21  
22  import com.google.common.annotations.VisibleForTesting;
23  
24  /**
25   * Implementation of FarmHash Fingerprint64, an open-source fingerprinting algorithm for strings.
26   *
27   * <p>Its speed is comparable to CityHash64, and its quality of hashing is at least as good.
28   *
29   * <p>Note to maintainers: This implementation relies on signed arithmetic being bit-wise equivalent
30   * to unsigned arithmetic in all cases except:
31   *
32   * <ul>
33   * <li>comparisons (signed values can be negative)
34   * <li>division (avoided here)
35   * <li>shifting (right shift must be unsigned)
36   * </ul>
37   *
38   * @author Kyle Maddison
39   * @author Geoff Pike
40   */
41  final class FarmHashFingerprint64 extends AbstractNonStreamingHashFunction {
42    static final HashFunction FARMHASH_FINGERPRINT_64 = new FarmHashFingerprint64();
43  
44    // Some primes between 2^63 and 2^64 for various uses.
45    private static final long K0 = 0xc3a5c85c97cb3127L;
46    private static final long K1 = 0xb492b66fbe98f273L;
47    private static final long K2 = 0x9ae16a3b2f90404fL;
48  
49    @Override
50    public HashCode hashBytes(byte[] input, int off, int len) {
51      checkPositionIndexes(off, off + len, input.length);
52      return HashCode.fromLong(fingerprint(input, off, len));
53    }
54  
55    @Override
56    public int bits() {
57      return 64;
58    }
59  
60    @Override
61    public String toString() {
62      return "Hashing.farmHashFingerprint64()";
63    }
64  
65    // End of public functions.
66  
67    @VisibleForTesting
68    static long fingerprint(byte[] bytes, int offset, int length) {
69      if (length <= 32) {
70        if (length <= 16) {
71          return hashLength0to16(bytes, offset, length);
72        } else {
73          return hashLength17to32(bytes, offset, length);
74        }
75      } else if (length <= 64) {
76        return hashLength33To64(bytes, offset, length);
77      } else {
78        return hashLength65Plus(bytes, offset, length);
79      }
80    }
81  
82    private static long shiftMix(long val) {
83      return val ^ (val >>> 47);
84    }
85  
86    private static long hashLength16(long u, long v, long mul) {
87      long a = (u ^ v) * mul;
88      a ^= (a >>> 47);
89      long b = (v ^ a) * mul;
90      b ^= (b >>> 47);
91      b *= mul;
92      return b;
93    }
94  
95    /**
96     * Computes intermediate hash of 32 bytes of byte array from the given offset. Results are
97     * returned in the output array because when we last measured, this was 12% faster than allocating
98     * new arrays every time.
99     */
100   private static void weakHashLength32WithSeeds(
101       byte[] bytes, int offset, long seedA, long seedB, long[] output) {
102     long part1 = load64(bytes, offset);
103     long part2 = load64(bytes, offset + 8);
104     long part3 = load64(bytes, offset + 16);
105     long part4 = load64(bytes, offset + 24);
106 
107     seedA += part1;
108     seedB = rotateRight(seedB + seedA + part4, 21);
109     long c = seedA;
110     seedA += part2;
111     seedA += part3;
112     seedB += rotateRight(seedA, 44);
113     output[0] = seedA + part4;
114     output[1] = seedB + c;
115   }
116 
117   private static long hashLength0to16(byte[] bytes, int offset, int length) {
118     if (length >= 8) {
119       long mul = K2 + length * 2;
120       long a = load64(bytes, offset) + K2;
121       long b = load64(bytes, offset + length - 8);
122       long c = rotateRight(b, 37) * mul + a;
123       long d = (rotateRight(a, 25) + b) * mul;
124       return hashLength16(c, d, mul);
125     }
126     if (length >= 4) {
127       long mul = K2 + length * 2;
128       long a = load32(bytes, offset) & 0xFFFFFFFFL;
129       return hashLength16(length + (a << 3), load32(bytes, offset + length - 4) & 0xFFFFFFFFL, mul);
130     }
131     if (length > 0) {
132       byte a = bytes[offset];
133       byte b = bytes[offset + (length >> 1)];
134       byte c = bytes[offset + (length - 1)];
135       int y = (a & 0xFF) + ((b & 0xFF) << 8);
136       int z = length + ((c & 0xFF) << 2);
137       return shiftMix(y * K2 ^ z * K0) * K2;
138     }
139     return K2;
140   }
141 
142   private static long hashLength17to32(byte[] bytes, int offset, int length) {
143     long mul = K2 + length * 2;
144     long a = load64(bytes, offset) * K1;
145     long b = load64(bytes, offset + 8);
146     long c = load64(bytes, offset + length - 8) * mul;
147     long d = load64(bytes, offset + length - 16) * K2;
148     return hashLength16(
149         rotateRight(a + b, 43) + rotateRight(c, 30) + d, a + rotateRight(b + K2, 18) + c, mul);
150   }
151 
152   private static long hashLength33To64(byte[] bytes, int offset, int length) {
153     long mul = K2 + length * 2;
154     long a = load64(bytes, offset) * K2;
155     long b = load64(bytes, offset + 8);
156     long c = load64(bytes, offset + length - 8) * mul;
157     long d = load64(bytes, offset + length - 16) * K2;
158     long y = rotateRight(a + b, 43) + rotateRight(c, 30) + d;
159     long z = hashLength16(y, a + rotateRight(b + K2, 18) + c, mul);
160     long e = load64(bytes, offset + 16) * mul;
161     long f = load64(bytes, offset + 24);
162     long g = (y + load64(bytes, offset + length - 32)) * mul;
163     long h = (z + load64(bytes, offset + length - 24)) * mul;
164     return hashLength16(
165         rotateRight(e + f, 43) + rotateRight(g, 30) + h, e + rotateRight(f + a, 18) + g, mul);
166   }
167 
168   /*
169    * Compute an 8-byte hash of a byte array of length greater than 64 bytes.
170    */
171   private static long hashLength65Plus(byte[] bytes, int offset, int length) {
172     final int seed = 81;
173     // For strings over 64 bytes we loop. Internal state consists of 56 bytes: v, w, x, y, and z.
174     long x = seed;
175     @SuppressWarnings("ConstantOverflow")
176     long y = seed * K1 + 113;
177     long z = shiftMix(y * K2 + 113) * K2;
178     long[] v = new long[2];
179     long[] w = new long[2];
180     x = x * K2 + load64(bytes, offset);
181 
182     // Set end so that after the loop we have 1 to 64 bytes left to process.
183     int end = offset + ((length - 1) / 64) * 64;
184     int last64offset = end + ((length - 1) & 63) - 63;
185     do {
186       x = rotateRight(x + y + v[0] + load64(bytes, offset + 8), 37) * K1;
187       y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * K1;
188       x ^= w[1];
189       y += v[0] + load64(bytes, offset + 40);
190       z = rotateRight(z + w[0], 33) * K1;
191       weakHashLength32WithSeeds(bytes, offset, v[1] * K1, x + w[0], v);
192       weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y + load64(bytes, offset + 16), w);
193       long tmp = x;
194       x = z;
195       z = tmp;
196       offset += 64;
197     } while (offset != end);
198     long mul = K1 + ((z & 0xFF) << 1);
199     // Operate on the last 64 bytes of input.
200     offset = last64offset;
201     w[0] += ((length - 1) & 63);
202     v[0] += w[0];
203     w[0] += v[0];
204     x = rotateRight(x + y + v[0] + load64(bytes, offset + 8), 37) * mul;
205     y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * mul;
206     x ^= w[1] * 9;
207     y += v[0] * 9 + load64(bytes, offset + 40);
208     z = rotateRight(z + w[0], 33) * mul;
209     weakHashLength32WithSeeds(bytes, offset, v[1] * mul, x + w[0], v);
210     weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y + load64(bytes, offset + 16), w);
211     return hashLength16(
212         hashLength16(v[0], w[0], mul) + shiftMix(y) * K0 + x,
213         hashLength16(v[1], w[1], mul) + z,
214         mul);
215   }
216 }