View Javadoc
1   /*
2    * Copyright (C) 2011 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.hash;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  
19  import com.google.errorprone.annotations.CanIgnoreReturnValue;
20  import java.nio.ByteBuffer;
21  import java.nio.ByteOrder;
22  
23  /**
24   * A convenience base class for implementors of {@code Hasher}; handles accumulating data until an
25   * entire "chunk" (of implementation-dependent length) is ready to be hashed.
26   *
27   * @author Kevin Bourrillion
28   * @author Dimitris Andreou
29   */
30  // TODO(kevinb): this class still needs some design-and-document-for-inheritance love
31  @CanIgnoreReturnValue
32  abstract class AbstractStreamingHasher extends AbstractHasher {
33    /** Buffer via which we pass data to the hash algorithm (the implementor) */
34    private final ByteBuffer buffer;
35  
36    /** Number of bytes to be filled before process() invocation(s). */
37    private final int bufferSize;
38  
39    /** Number of bytes processed per process() invocation. */
40    private final int chunkSize;
41  
42    /**
43     * Constructor for use by subclasses. This hasher instance will process chunks of the specified
44     * size.
45     *
46     * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation;
47     *     must be at least 4
48     */
49    protected AbstractStreamingHasher(int chunkSize) {
50      this(chunkSize, chunkSize);
51    }
52  
53    /**
54     * Constructor for use by subclasses. This hasher instance will process chunks of the specified
55     * size, using an internal buffer of {@code bufferSize} size, which must be a multiple of
56     * {@code chunkSize}.
57     *
58     * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation;
59     *     must be at least 4
60     * @param bufferSize the size of the internal buffer. Must be a multiple of chunkSize
61     */
62    protected AbstractStreamingHasher(int chunkSize, int bufferSize) {
63      // TODO(kevinb): check more preconditions (as bufferSize >= chunkSize) if this is ever public
64      checkArgument(bufferSize % chunkSize == 0);
65  
66      // TODO(user): benchmark performance difference with longer buffer
67      // always space for a single primitive
68      this.buffer = ByteBuffer.allocate(bufferSize + 7).order(ByteOrder.LITTLE_ENDIAN);
69      this.bufferSize = bufferSize;
70      this.chunkSize = chunkSize;
71    }
72  
73    /**
74     * Processes the available bytes of the buffer (at most {@code chunk} bytes).
75     */
76    protected abstract void process(ByteBuffer bb);
77  
78    /**
79     * This is invoked for the last bytes of the input, which are not enough to fill a whole chunk.
80     * The passed {@code ByteBuffer} is guaranteed to be non-empty.
81     *
82     * <p>This implementation simply pads with zeros and delegates to {@link #process(ByteBuffer)}.
83     */
84    protected void processRemaining(ByteBuffer bb) {
85      bb.position(bb.limit()); // move at the end
86      bb.limit(chunkSize + 7); // get ready to pad with longs
87      while (bb.position() < chunkSize) {
88        bb.putLong(0);
89      }
90      bb.limit(chunkSize);
91      bb.flip();
92      process(bb);
93    }
94  
95    @Override
96    public final Hasher putBytes(byte[] bytes, int off, int len) {
97      return putBytesInternal(ByteBuffer.wrap(bytes, off, len).order(ByteOrder.LITTLE_ENDIAN));
98    }
99  
100   @Override
101   public final Hasher putBytes(ByteBuffer readBuffer) {
102     ByteOrder order = readBuffer.order();
103     try {
104       readBuffer.order(ByteOrder.LITTLE_ENDIAN);
105       return putBytesInternal(readBuffer);
106     } finally {
107       readBuffer.order(order);
108     }
109   }
110 
111   private Hasher putBytesInternal(ByteBuffer readBuffer) {
112     // If we have room for all of it, this is easy
113     if (readBuffer.remaining() <= buffer.remaining()) {
114       buffer.put(readBuffer);
115       munchIfFull();
116       return this;
117     }
118 
119     // First add just enough to fill buffer size, and munch that
120     int bytesToCopy = bufferSize - buffer.position();
121     for (int i = 0; i < bytesToCopy; i++) {
122       buffer.put(readBuffer.get());
123     }
124     munch(); // buffer becomes empty here, since chunkSize divides bufferSize
125 
126     // Now process directly from the rest of the input buffer
127     while (readBuffer.remaining() >= chunkSize) {
128       process(readBuffer);
129     }
130 
131     // Finally stick the remainder back in our usual buffer
132     buffer.put(readBuffer);
133     return this;
134   }
135 
136   /*
137    * Note: hashString(CharSequence, Charset) is intentionally not overridden.
138    *
139    * While intuitively, using CharsetEncoder to encode the CharSequence directly to the buffer (or
140    * even to an intermediate buffer) should be considerably more efficient than potentially
141    * copying the CharSequence to a String and then calling getBytes(Charset) on that String, in
142    * reality there are optimizations that make the getBytes(Charset) approach considerably faster,
143    * at least for commonly used charsets like UTF-8.
144    */
145 
146   @Override
147   public final Hasher putByte(byte b) {
148     buffer.put(b);
149     munchIfFull();
150     return this;
151   }
152 
153   @Override
154   public final Hasher putShort(short s) {
155     buffer.putShort(s);
156     munchIfFull();
157     return this;
158   }
159 
160   @Override
161   public final Hasher putChar(char c) {
162     buffer.putChar(c);
163     munchIfFull();
164     return this;
165   }
166 
167   @Override
168   public final Hasher putInt(int i) {
169     buffer.putInt(i);
170     munchIfFull();
171     return this;
172   }
173 
174   @Override
175   public final Hasher putLong(long l) {
176     buffer.putLong(l);
177     munchIfFull();
178     return this;
179   }
180 
181   @Override
182   public final HashCode hash() {
183     munch();
184     buffer.flip();
185     if (buffer.remaining() > 0) {
186       processRemaining(buffer);
187       buffer.position(buffer.limit());
188     }
189     return makeHash();
190   }
191 
192   /**
193    * Computes a hash code based on the data that have been provided to this hasher.  This is called
194    * after all chunks are handled with {@link #process} and any leftover bytes that did not make
195    * a complete chunk are handled with {@link #processRemaining}.
196    */
197   protected abstract HashCode makeHash();
198 
199   // Process pent-up data in chunks
200   private void munchIfFull() {
201     if (buffer.remaining() < 8) {
202       // buffer is full; not enough room for a primitive. We have at least one full chunk.
203       munch();
204     }
205   }
206 
207   private void munch() {
208     buffer.flip();
209     while (buffer.remaining() >= chunkSize) {
210       // we could limit the buffer to ensure process() does not read more than
211       // chunkSize number of bytes, but we trust the implementations
212       process(buffer);
213     }
214     buffer.compact(); // preserve any remaining data that do not make a full chunk
215   }
216 }