View Javadoc
1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.io;
16  
17  import static com.google.common.base.Preconditions.checkArgument;
18  import static com.google.common.base.Preconditions.checkNotNull;
19  import static com.google.common.io.ByteStreams.createBuffer;
20  import static com.google.common.io.ByteStreams.skipUpTo;
21  
22  import com.google.common.annotations.Beta;
23  import com.google.common.annotations.GwtIncompatible;
24  import com.google.common.base.Ascii;
25  import com.google.common.base.Optional;
26  import com.google.common.collect.ImmutableList;
27  import com.google.common.hash.Funnels;
28  import com.google.common.hash.HashCode;
29  import com.google.common.hash.HashFunction;
30  import com.google.common.hash.Hasher;
31  import com.google.errorprone.annotations.CanIgnoreReturnValue;
32  import java.io.BufferedInputStream;
33  import java.io.ByteArrayInputStream;
34  import java.io.IOException;
35  import java.io.InputStream;
36  import java.io.InputStreamReader;
37  import java.io.OutputStream;
38  import java.io.Reader;
39  import java.nio.charset.Charset;
40  import java.util.Arrays;
41  import java.util.Iterator;
42  
43  /**
44   * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a {@code ByteSource}
45   * is not an open, stateful stream for input that can be read and closed. Instead, it is an
46   * immutable <i>supplier</i> of {@code InputStream} instances.
47   *
48   * <p>{@code ByteSource} provides two kinds of methods:
49   * <ul>
50   * <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
51   *     instance each time they are called. The caller is responsible for ensuring that the returned
52   *     stream is closed.
53   * <li><b>Convenience methods:</b> These are implementations of common operations that are typically
54   *     implemented by opening a stream using one of the methods in the first category, doing
55   *     something and finally closing the stream that was opened.
56   * </ul>
57   *
58   * @since 14.0
59   * @author Colin Decker
60   */
61  @GwtIncompatible
62  public abstract class ByteSource {
63  
64    /**
65     * Constructor for use by subclasses.
66     */
67    protected ByteSource() {}
68  
69    /**
70     * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
71     * as characters using the given {@link Charset}.
72     *
73     * <p>If {@link CharSource#asByteSource} is called on the returned source with the same charset,
74     * the default implementation of this method will ensure that the original {@code ByteSource} is
75     * returned, rather than round-trip encoding. Subclasses that override this method should behave
76     * the same way.
77     */
78    public CharSource asCharSource(Charset charset) {
79      return new AsCharSource(charset);
80    }
81  
82    /**
83     * Opens a new {@link InputStream} for reading from this source. This method returns a new,
84     * independent stream each time it is called.
85     *
86     * <p>The caller is responsible for ensuring that the returned stream is closed.
87     *
88     * @throws IOException if an I/O error occurs while opening the stream
89     */
90    public abstract InputStream openStream() throws IOException;
91  
92    /**
93     * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
94     * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
95     * delegate to {@link #openStream()} when the stream returned by that method does not benefit from
96     * additional buffering (for example, a {@code ByteArrayInputStream}). This method returns a
97     * new, independent stream each time it is called.
98     *
99     * <p>The caller is responsible for ensuring that the returned stream is closed.
100    *
101    * @throws IOException if an I/O error occurs while opening the stream
102    * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
103    */
104   public InputStream openBufferedStream() throws IOException {
105     InputStream in = openStream();
106     return (in instanceof BufferedInputStream)
107         ? (BufferedInputStream) in
108         : new BufferedInputStream(in);
109   }
110 
111   /**
112    * Returns a view of a slice of this byte source that is at most {@code length} bytes long
113    * starting at the given {@code offset}. If {@code offset} is greater than the size of this
114    * source, the returned source will be empty. If {@code offset + length} is greater than the size
115    * of this source, the returned source will contain the slice starting at {@code offset} and
116    * ending at the end of this source.
117    *
118    * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
119    */
120   public ByteSource slice(long offset, long length) {
121     return new SlicedByteSource(offset, length);
122   }
123 
124   /**
125    * Returns whether the source has zero bytes. The default implementation first checks
126    * {@link #sizeIfKnown}, returning true if it's known to be zero and false if it's known to be
127    * non-zero. If the size is not known, it falls back to opening a stream and checking for EOF.
128    *
129    * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes
130    * are actually available for reading. (For example, some special files may return a size of 0
131    * despite actually having content when read.) This means that a source may return {@code true}
132    * from {@code isEmpty()} despite having readable content.
133    *
134    * @throws IOException if an I/O error occurs
135    * @since 15.0
136    */
137   public boolean isEmpty() throws IOException {
138     Optional<Long> sizeIfKnown = sizeIfKnown();
139     if (sizeIfKnown.isPresent()) {
140       return sizeIfKnown.get() == 0L;
141     }
142     Closer closer = Closer.create();
143     try {
144       InputStream in = closer.register(openStream());
145       return in.read() == -1;
146     } catch (Throwable e) {
147       throw closer.rethrow(e);
148     } finally {
149       closer.close();
150     }
151   }
152 
153   /**
154    * Returns the size of this source in bytes, if the size can be easily determined without actually
155    * opening the data stream.
156    *
157    * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file,
158    * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method
159    * will return a different number of bytes than would be returned by reading all of the bytes (for
160    * example, some special files may return a size of 0 despite actually having content when read).
161    *
162    * <p>Additionally, for mutable sources such as files, a subsequent read may return a different
163    * number of bytes if the contents are changed.
164    *
165    * @since 19.0
166    */
167   @Beta
168   public Optional<Long> sizeIfKnown() {
169     return Optional.absent();
170   }
171 
172   /**
173    * Returns the size of this source in bytes, even if doing so requires opening and traversing an
174    * entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}.
175    *
176    * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. If
177    * absent, it will fall back to a heavyweight operation that will open a stream, read (or
178    * {@link InputStream#skip(long) skip}, if possible) to the end of the stream and return the total
179    * number of bytes that were read.
180    *
181    * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient
182    * implementation, it is <i>possible</i> that this method will return a different number of bytes
183    * than would be returned by reading all of the bytes (for example, some special files may return
184    * a size of 0 despite actually having content when read).
185    *
186    * <p>In either case, for mutable sources such as files, a subsequent read may return a different
187    * number of bytes if the contents are changed.
188    *
189    * @throws IOException if an I/O error occurs while reading the size of this source
190    */
191   public long size() throws IOException {
192     Optional<Long> sizeIfKnown = sizeIfKnown();
193     if (sizeIfKnown.isPresent()) {
194       return sizeIfKnown.get();
195     }
196 
197     Closer closer = Closer.create();
198     try {
199       InputStream in = closer.register(openStream());
200       return countBySkipping(in);
201     } catch (IOException e) {
202       // skip may not be supported... at any rate, try reading
203     } finally {
204       closer.close();
205     }
206 
207     closer = Closer.create();
208     try {
209       InputStream in = closer.register(openStream());
210       return ByteStreams.exhaust(in);
211     } catch (Throwable e) {
212       throw closer.rethrow(e);
213     } finally {
214       closer.close();
215     }
216   }
217 
218   /**
219    * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
220    * first call to skip threw, in which case skip may just not be supported.
221    */
222   private long countBySkipping(InputStream in) throws IOException {
223     long count = 0;
224     long skipped;
225     while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) {
226       count += skipped;
227     }
228     return count;
229   }
230 
231   /**
232    * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
233    * {@code output}.
234    *
235    * @return the number of bytes copied
236    * @throws IOException if an I/O error occurs while reading from this source or writing to
237    *     {@code output}
238    */
239   @CanIgnoreReturnValue
240   public long copyTo(OutputStream output) throws IOException {
241     checkNotNull(output);
242 
243     Closer closer = Closer.create();
244     try {
245       InputStream in = closer.register(openStream());
246       return ByteStreams.copy(in, output);
247     } catch (Throwable e) {
248       throw closer.rethrow(e);
249     } finally {
250       closer.close();
251     }
252   }
253 
254   /**
255    * Copies the contents of this byte source to the given {@code ByteSink}.
256    *
257    * @return the number of bytes copied
258    * @throws IOException if an I/O error occurs while reading from this source or writing to
259    *     {@code sink}
260    */
261   @CanIgnoreReturnValue
262   public long copyTo(ByteSink sink) throws IOException {
263     checkNotNull(sink);
264 
265     Closer closer = Closer.create();
266     try {
267       InputStream in = closer.register(openStream());
268       OutputStream out = closer.register(sink.openStream());
269       return ByteStreams.copy(in, out);
270     } catch (Throwable e) {
271       throw closer.rethrow(e);
272     } finally {
273       closer.close();
274     }
275   }
276 
277   /**
278    * Reads the full contents of this byte source as a byte array.
279    *
280    * @throws IOException if an I/O error occurs while reading from this source
281    */
282   public byte[] read() throws IOException {
283     Closer closer = Closer.create();
284     try {
285       InputStream in = closer.register(openStream());
286       return ByteStreams.toByteArray(in);
287     } catch (Throwable e) {
288       throw closer.rethrow(e);
289     } finally {
290       closer.close();
291     }
292   }
293 
294   /**
295    * Reads the contents of this byte source using the given {@code processor} to process bytes as
296    * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
297    * Returns the result produced by the processor.
298    *
299    * @throws IOException if an I/O error occurs while reading from this source or if
300    *     {@code processor} throws an {@code IOException}
301    * @since 16.0
302    */
303   @Beta
304   @CanIgnoreReturnValue // some processors won't return a useful result
305   public <T> T read(ByteProcessor<T> processor) throws IOException {
306     checkNotNull(processor);
307 
308     Closer closer = Closer.create();
309     try {
310       InputStream in = closer.register(openStream());
311       return ByteStreams.readBytes(in, processor);
312     } catch (Throwable e) {
313       throw closer.rethrow(e);
314     } finally {
315       closer.close();
316     }
317   }
318 
319   /**
320    * Hashes the contents of this byte source using the given hash function.
321    *
322    * @throws IOException if an I/O error occurs while reading from this source
323    */
324   public HashCode hash(HashFunction hashFunction) throws IOException {
325     Hasher hasher = hashFunction.newHasher();
326     copyTo(Funnels.asOutputStream(hasher));
327     return hasher.hash();
328   }
329 
330   /**
331    * Checks that the contents of this byte source are equal to the contents of the given byte
332    * source.
333    *
334    * @throws IOException if an I/O error occurs while reading from this source or {@code other}
335    */
336   public boolean contentEquals(ByteSource other) throws IOException {
337     checkNotNull(other);
338 
339     byte[] buf1 = createBuffer();
340     byte[] buf2 = createBuffer();
341 
342     Closer closer = Closer.create();
343     try {
344       InputStream in1 = closer.register(openStream());
345       InputStream in2 = closer.register(other.openStream());
346       while (true) {
347         int read1 = ByteStreams.read(in1, buf1, 0, buf1.length);
348         int read2 = ByteStreams.read(in2, buf2, 0, buf2.length);
349         if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
350           return false;
351         } else if (read1 != buf1.length) {
352           return true;
353         }
354       }
355     } catch (Throwable e) {
356       throw closer.rethrow(e);
357     } finally {
358       closer.close();
359     }
360   }
361 
362   /**
363    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
364    * the source will contain the concatenated data from the streams of the underlying sources.
365    *
366    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
367    * close the open underlying stream.
368    *
369    * @param sources the sources to concatenate
370    * @return a {@code ByteSource} containing the concatenated data
371    * @since 15.0
372    */
373   public static ByteSource concat(Iterable<? extends ByteSource> sources) {
374     return new ConcatenatedByteSource(sources);
375   }
376 
377   /**
378    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
379    * the source will contain the concatenated data from the streams of the underlying sources.
380    *
381    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
382    * close the open underlying stream.
383    *
384    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
385    * is called. This will fail if the iterator is infinite and may cause problems if the iterator
386    * eagerly fetches data for each source when iterated (rather than producing sources that only
387    * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
388    * possible.
389    *
390    * @param sources the sources to concatenate
391    * @return a {@code ByteSource} containing the concatenated data
392    * @throws NullPointerException if any of {@code sources} is {@code null}
393    * @since 15.0
394    */
395   public static ByteSource concat(Iterator<? extends ByteSource> sources) {
396     return concat(ImmutableList.copyOf(sources));
397   }
398 
399   /**
400    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
401    * the source will contain the concatenated data from the streams of the underlying sources.
402    *
403    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
404    * close the open underlying stream.
405    *
406    * @param sources the sources to concatenate
407    * @return a {@code ByteSource} containing the concatenated data
408    * @throws NullPointerException if any of {@code sources} is {@code null}
409    * @since 15.0
410    */
411   public static ByteSource concat(ByteSource... sources) {
412     return concat(ImmutableList.copyOf(sources));
413   }
414 
415   /**
416    * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
417    * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
418    *
419    * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
420    */
421   public static ByteSource wrap(byte[] b) {
422     return new ByteArrayByteSource(b);
423   }
424 
425   /**
426    * Returns an immutable {@link ByteSource} that contains no bytes.
427    *
428    * @since 15.0
429    */
430   public static ByteSource empty() {
431     return EmptyByteSource.INSTANCE;
432   }
433 
434   /**
435    * A char source that reads bytes from this source and decodes them as characters using a charset.
436    */
437   class AsCharSource extends CharSource {
438 
439     final Charset charset;
440 
441     AsCharSource(Charset charset) {
442       this.charset = checkNotNull(charset);
443     }
444 
445     @Override
446     public ByteSource asByteSource(Charset charset) {
447       if (charset.equals(this.charset)) {
448         return ByteSource.this;
449       }
450       return super.asByteSource(charset);
451     }
452 
453     @Override
454     public Reader openStream() throws IOException {
455       return new InputStreamReader(ByteSource.this.openStream(), charset);
456     }
457 
458     @Override
459     public String read() throws IOException {
460       // Reading all the data as a byte array is more efficient than the default read()
461       // implementation because:
462       // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the
463       //    internal char array (hard to avoid using StringBuilder)
464       // 2. we avoid extra copies into temporary buffers altogether
465       // The downside is that this will cause us to store the file bytes in memory twice for a short
466       // amount of time.
467       return new String(ByteSource.this.read(), charset);
468     }
469 
470     @Override
471     public String toString() {
472       return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
473     }
474   }
475 
476   /**
477    * A view of a subsection of the containing byte source.
478    */
479   private final class SlicedByteSource extends ByteSource {
480 
481     final long offset;
482     final long length;
483 
484     SlicedByteSource(long offset, long length) {
485       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
486       checkArgument(length >= 0, "length (%s) may not be negative", length);
487       this.offset = offset;
488       this.length = length;
489     }
490 
491     @Override
492     public InputStream openStream() throws IOException {
493       return sliceStream(ByteSource.this.openStream());
494     }
495 
496     @Override
497     public InputStream openBufferedStream() throws IOException {
498       return sliceStream(ByteSource.this.openBufferedStream());
499     }
500 
501     private InputStream sliceStream(InputStream in) throws IOException {
502       if (offset > 0) {
503         long skipped;
504         try {
505           skipped = ByteStreams.skipUpTo(in, offset);
506         } catch (Throwable e) {
507           Closer closer = Closer.create();
508           closer.register(in);
509           try {
510             throw closer.rethrow(e);
511           } finally {
512             closer.close();
513           }
514         }
515 
516         if (skipped < offset) {
517           // offset was beyond EOF
518           in.close();
519           return new ByteArrayInputStream(new byte[0]);
520         }
521       }
522       return ByteStreams.limit(in, length);
523     }
524 
525     @Override
526     public ByteSource slice(long offset, long length) {
527       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
528       checkArgument(length >= 0, "length (%s) may not be negative", length);
529       long maxLength = this.length - offset;
530       return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
531     }
532 
533     @Override
534     public boolean isEmpty() throws IOException {
535       return length == 0 || super.isEmpty();
536     }
537 
538     @Override
539     public Optional<Long> sizeIfKnown() {
540       Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown();
541       if (optionalUnslicedSize.isPresent()) {
542         long unslicedSize = optionalUnslicedSize.get();
543         long off = Math.min(offset, unslicedSize);
544         return Optional.of(Math.min(length, unslicedSize - off));
545       }
546       return Optional.absent();
547     }
548 
549     @Override
550     public String toString() {
551       return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
552     }
553   }
554 
555   private static class ByteArrayByteSource extends ByteSource {
556 
557     final byte[] bytes;
558     final int offset;
559     final int length;
560 
561     ByteArrayByteSource(byte[] bytes) {
562       this(bytes, 0, bytes.length);
563     }
564 
565     // NOTE: Preconditions are enforced by slice, the only non-trivial caller.
566     ByteArrayByteSource(byte[] bytes, int offset, int length) {
567       this.bytes = bytes;
568       this.offset = offset;
569       this.length = length;
570     }
571 
572     @Override
573     public InputStream openStream() {
574       return new ByteArrayInputStream(bytes, offset, length);
575     }
576 
577     @Override
578     public InputStream openBufferedStream() throws IOException {
579       return openStream();
580     }
581 
582     @Override
583     public boolean isEmpty() {
584       return length == 0;
585     }
586 
587     @Override
588     public long size() {
589       return length;
590     }
591 
592     @Override
593     public Optional<Long> sizeIfKnown() {
594       return Optional.of((long) length);
595     }
596 
597     @Override
598     public byte[] read() {
599       return Arrays.copyOfRange(bytes, offset, offset + length);
600     }
601 
602     @Override
603     public long copyTo(OutputStream output) throws IOException {
604       output.write(bytes, offset, length);
605       return length;
606     }
607 
608     @SuppressWarnings("CheckReturnValue") // it doesn't matter what processBytes returns here
609     @Override
610     public <T> T read(ByteProcessor<T> processor) throws IOException {
611       processor.processBytes(bytes, offset, length);
612       return processor.getResult();
613     }
614 
615     @Override
616     public HashCode hash(HashFunction hashFunction) throws IOException {
617       return hashFunction.hashBytes(bytes, offset, length);
618     }
619 
620     @Override
621     public ByteSource slice(long offset, long length) {
622       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
623       checkArgument(length >= 0, "length (%s) may not be negative", length);
624 
625       offset = Math.min(offset, this.length);
626       length = Math.min(length, this.length - offset);
627       int newOffset = this.offset + (int) offset;
628       return new ByteArrayByteSource(bytes, newOffset, (int) length);
629     }
630 
631     @Override
632     public String toString() {
633       return "ByteSource.wrap("
634           + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") + ")";
635     }
636   }
637 
638   private static final class EmptyByteSource extends ByteArrayByteSource {
639 
640     static final EmptyByteSource INSTANCE = new EmptyByteSource();
641 
642     EmptyByteSource() {
643       super(new byte[0]);
644     }
645 
646     @Override
647     public CharSource asCharSource(Charset charset) {
648       checkNotNull(charset);
649       return CharSource.empty();
650     }
651 
652     @Override
653     public byte[] read() {
654       return bytes; // length is 0, no need to clone
655     }
656 
657     @Override
658     public String toString() {
659       return "ByteSource.empty()";
660     }
661   }
662 
663   private static final class ConcatenatedByteSource extends ByteSource {
664 
665     final Iterable<? extends ByteSource> sources;
666 
667     ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
668       this.sources = checkNotNull(sources);
669     }
670 
671     @Override
672     public InputStream openStream() throws IOException {
673       return new MultiInputStream(sources.iterator());
674     }
675 
676     @Override
677     public boolean isEmpty() throws IOException {
678       for (ByteSource source : sources) {
679         if (!source.isEmpty()) {
680           return false;
681         }
682       }
683       return true;
684     }
685 
686     @Override
687     public Optional<Long> sizeIfKnown() {
688       long result = 0L;
689       for (ByteSource source : sources) {
690         Optional<Long> sizeIfKnown = source.sizeIfKnown();
691         if (!sizeIfKnown.isPresent()) {
692           return Optional.absent();
693         }
694         result += sizeIfKnown.get();
695       }
696       return Optional.of(result);
697     }
698 
699     @Override
700     public long size() throws IOException {
701       long result = 0L;
702       for (ByteSource source : sources) {
703         result += source.size();
704       }
705       return result;
706     }
707 
708     @Override
709     public String toString() {
710       return "ByteSource.concat(" + sources + ")";
711     }
712   }
713 }