View Javadoc
1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   */
14  
15  package com.google.common.io;
16  
17  import static com.google.common.base.Preconditions.checkNotNull;
18  
19  import com.google.common.annotations.Beta;
20  import com.google.common.annotations.GwtIncompatible;
21  import com.google.common.base.Ascii;
22  import com.google.common.base.Optional;
23  import com.google.common.base.Splitter;
24  import com.google.common.collect.AbstractIterator;
25  import com.google.common.collect.ImmutableList;
26  import com.google.common.collect.Lists;
27  import com.google.common.collect.Streams;
28  import com.google.errorprone.annotations.CanIgnoreReturnValue;
29  import com.google.errorprone.annotations.MustBeClosed;
30  import java.io.BufferedReader;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.io.Reader;
34  import java.io.StringReader;
35  import java.io.UncheckedIOException;
36  import java.io.Writer;
37  import java.nio.charset.Charset;
38  import java.util.Iterator;
39  import java.util.List;
40  import java.util.function.Consumer;
41  import java.util.stream.Stream;
42  import javax.annotation.Nullable;
43  
44  /**
45   * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
46   * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
47   * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
48   *
49   * <p>{@code CharSource} provides two kinds of methods:
50   * <ul>
51   * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
52   *     instance each time they are called. The caller is responsible for ensuring that the returned
53   *     reader is closed.
54   * <li><b>Convenience methods:</b> These are implementations of common operations that are typically
55   *     implemented by opening a reader using one of the methods in the first category, doing
56   *     something and finally closing the reader that was opened.
57   * </ul>
58   *
59   * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
60   * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
61   * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
62   * there to be an empty line at the end if the contents are terminated with a line separator.
63   *
64   * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
65   * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
66   *
67   * @since 14.0
68   * @author Colin Decker
69   */
70  @GwtIncompatible
71  public abstract class CharSource {
72  
73    /**
74     * Constructor for use by subclasses.
75     */
76    protected CharSource() {}
77  
78    /**
79     * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
80     * as bytes using the given {@link Charset}.
81     *
82     * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
83     * the default implementation of this method will ensure that the original {@code CharSource} is
84     * returned, rather than round-trip encoding. Subclasses that override this method should behave
85     * the same way.
86     *
87     * @since 20.0
88     */
89    @Beta
90    public ByteSource asByteSource(Charset charset) {
91      return new AsByteSource(charset);
92    }
93  
94    /**
95     * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
96     * reader each time it is called.
97     *
98     * <p>The caller is responsible for ensuring that the returned reader is closed.
99     *
100    * @throws IOException if an I/O error occurs while opening the reader
101    */
102   public abstract Reader openStream() throws IOException;
103 
104   /**
105    * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
106    * independent reader each time it is called.
107    *
108    * <p>The caller is responsible for ensuring that the returned reader is closed.
109    *
110    * @throws IOException if an I/O error occurs while of opening the reader
111    */
112   public BufferedReader openBufferedStream() throws IOException {
113     Reader reader = openStream();
114     return (reader instanceof BufferedReader)
115         ? (BufferedReader) reader
116         : new BufferedReader(reader);
117   }
118 
119   /**
120    * Opens a new {@link Stream} for reading text one line at a time from this source. This method
121    * returns a new, independent stream each time it is called.
122    *
123    * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an
124    * I/O error occurs while the stream is reading from the source or when the stream is closed, an
125    * {@link UncheckedIOException} is thrown.
126    *
127    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
128    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
129    * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
130    * as if it does.
131    *
132    * <p>The caller is responsible for ensuring that the returned stream is closed. For example:
133    *
134    * <pre>{@code
135    * try (Stream<String> lines = source.lines()) {
136    *   lines.map(...)
137    *      .filter(...)
138    *      .forEach(...);
139    * }
140    * }</pre>
141    *
142    * @throws IOException if an I/O error occurs while opening the stream
143    * @since 22.0
144    */
145   @Beta
146   @MustBeClosed
147   public Stream<String> lines() throws IOException {
148     BufferedReader reader = openBufferedStream();
149     return reader.lines().onClose(() -> {
150       try {
151         reader.close();
152       } catch (IOException e) {
153         throw new UncheckedIOException(e);
154       }
155     });
156   }
157 
158   /**
159    * Returns the size of this source in chars, if the size can be easily determined without actually
160    * opening the data stream.
161    *
162    * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a
163    * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
164    * <i>possible</i> that this method will return a different number of chars than would be returned
165    * by reading all of the chars.
166    *
167    * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
168    * return a different number of chars if the contents are changed.
169    *
170    * @since 19.0
171    */
172   @Beta
173   public Optional<Long> lengthIfKnown() {
174     return Optional.absent();
175   }
176 
177   /**
178    * Returns the length of this source in chars, even if doing so requires opening and traversing an
179    * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
180    *
181    * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
182    * absent, it will fall back to a heavyweight operation that will open a stream,
183    * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
184    * that were skipped.
185    *
186    * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
187    * implementation, it is <i>possible</i> that this method will return a different number of chars
188    * than would be returned by reading all of the chars.
189    *
190    * <p>In either case, for mutable sources such as files, a subsequent read may return a different
191    * number of chars if the contents are changed.
192    *
193    * @throws IOException if an I/O error occurs while reading the length of this source
194    * @since 19.0
195    */
196   @Beta
197   public long length() throws IOException {
198     Optional<Long> lengthIfKnown = lengthIfKnown();
199     if (lengthIfKnown.isPresent()) {
200       return lengthIfKnown.get();
201     }
202 
203     Closer closer = Closer.create();
204     try {
205       Reader reader = closer.register(openStream());
206       return countBySkipping(reader);
207     } catch (Throwable e) {
208       throw closer.rethrow(e);
209     } finally {
210       closer.close();
211     }
212   }
213 
214   private long countBySkipping(Reader reader) throws IOException {
215     long count = 0;
216     long read;
217     while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
218       count += read;
219     }
220     return count;
221   }
222 
223   /**
224    * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
225    * Does not close {@code appendable} if it is {@code Closeable}.
226    *
227    * @return the number of characters copied
228    * @throws IOException if an I/O error occurs while reading from this source or writing to
229    *     {@code appendable}
230    */
231   @CanIgnoreReturnValue
232   public long copyTo(Appendable appendable) throws IOException {
233     checkNotNull(appendable);
234 
235     Closer closer = Closer.create();
236     try {
237       Reader reader = closer.register(openStream());
238       return CharStreams.copy(reader, appendable);
239     } catch (Throwable e) {
240       throw closer.rethrow(e);
241     } finally {
242       closer.close();
243     }
244   }
245 
246   /**
247    * Copies the contents of this source to the given sink.
248    *
249    * @return the number of characters copied
250    * @throws IOException if an I/O error occurs while reading from this source or writing to
251    *     {@code sink}
252    */
253   @CanIgnoreReturnValue
254   public long copyTo(CharSink sink) throws IOException {
255     checkNotNull(sink);
256 
257     Closer closer = Closer.create();
258     try {
259       Reader reader = closer.register(openStream());
260       Writer writer = closer.register(sink.openStream());
261       return CharStreams.copy(reader, writer);
262     } catch (Throwable e) {
263       throw closer.rethrow(e);
264     } finally {
265       closer.close();
266     }
267   }
268 
269   /**
270    * Reads the contents of this source as a string.
271    *
272    * @throws IOException if an I/O error occurs while reading from this source
273    */
274   public String read() throws IOException {
275     Closer closer = Closer.create();
276     try {
277       Reader reader = closer.register(openStream());
278       return CharStreams.toString(reader);
279     } catch (Throwable e) {
280       throw closer.rethrow(e);
281     } finally {
282       closer.close();
283     }
284   }
285 
286   /**
287    * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
288    *
289    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
290    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
291    * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
292    * as if it does.
293    *
294    * @throws IOException if an I/O error occurs while reading from this source
295    */
296   @Nullable
297   public String readFirstLine() throws IOException {
298     Closer closer = Closer.create();
299     try {
300       BufferedReader reader = closer.register(openBufferedStream());
301       return reader.readLine();
302     } catch (Throwable e) {
303       throw closer.rethrow(e);
304     } finally {
305       closer.close();
306     }
307   }
308 
309   /**
310    * Reads all the lines of this source as a list of strings. The returned list will be empty if
311    * this source is empty.
312    *
313    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
314    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
315    * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
316    * as if it does.
317    *
318    * @throws IOException if an I/O error occurs while reading from this source
319    */
320   public ImmutableList<String> readLines() throws IOException {
321     Closer closer = Closer.create();
322     try {
323       BufferedReader reader = closer.register(openBufferedStream());
324       List<String> result = Lists.newArrayList();
325       String line;
326       while ((line = reader.readLine()) != null) {
327         result.add(line);
328       }
329       return ImmutableList.copyOf(result);
330     } catch (Throwable e) {
331       throw closer.rethrow(e);
332     } finally {
333       closer.close();
334     }
335   }
336 
337   /**
338    * Reads lines of text from this source, processing each line as it is read using the given
339    * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
340    * returns {@code false} and returns the result produced by the processor.
341    *
342    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
343    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
344    * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
345    * as if it does.
346    *
347    * @throws IOException if an I/O error occurs while reading from this source or if
348    *     {@code processor} throws an {@code IOException}
349    * @since 16.0
350    */
351   @Beta
352   @CanIgnoreReturnValue // some processors won't return a useful result
353   public <T> T readLines(LineProcessor<T> processor) throws IOException {
354     checkNotNull(processor);
355 
356     Closer closer = Closer.create();
357     try {
358       Reader reader = closer.register(openStream());
359       return CharStreams.readLines(reader, processor);
360     } catch (Throwable e) {
361       throw closer.rethrow(e);
362     } finally {
363       closer.close();
364     }
365   }
366 
367   /**
368    * Reads all lines of text from this source, running the given {@code action} for each line as
369    * it is read.
370    *
371    * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
372    * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
373    * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
374    * as if it does.
375    *
376    * @throws IOException if an I/O error occurs while reading from this source or if
377    *     {@code action} throws an {@code UncheckedIOException}
378    * @since 22.0
379    */
380   @Beta
381   public void forEachLine(Consumer<? super String> action) throws IOException {
382     try (Stream<String> lines = lines()) {
383       // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
384       lines.forEachOrdered(action);
385     } catch (UncheckedIOException e) {
386       throw e.getCause();
387     }
388   }
389 
390   /**
391    * Returns whether the source has zero chars. The default implementation first checks
392    * {@link #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
393    * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
394    *
395    * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
396    * chars are actually available for reading. This means that a source may return {@code true} from
397    * {@code isEmpty()} despite having readable content.
398    *
399    * @throws IOException if an I/O error occurs
400    * @since 15.0
401    */
402   public boolean isEmpty() throws IOException {
403     Optional<Long> lengthIfKnown = lengthIfKnown();
404     if (lengthIfKnown.isPresent()) {
405       return lengthIfKnown.get() == 0L;
406     }
407     Closer closer = Closer.create();
408     try {
409       Reader reader = closer.register(openStream());
410       return reader.read() == -1;
411     } catch (Throwable e) {
412       throw closer.rethrow(e);
413     } finally {
414       closer.close();
415     }
416   }
417 
418   /**
419    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
420    * the source will contain the concatenated data from the streams of the underlying sources.
421    *
422    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
423    * close the open underlying stream.
424    *
425    * @param sources the sources to concatenate
426    * @return a {@code CharSource} containing the concatenated data
427    * @since 15.0
428    */
429   public static CharSource concat(Iterable<? extends CharSource> sources) {
430     return new ConcatenatedCharSource(sources);
431   }
432 
433   /**
434    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
435    * the source will contain the concatenated data from the streams of the underlying sources.
436    *
437    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
438    * close the open underlying stream.
439    *
440    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
441    * is called. This will fail if the iterator is infinite and may cause problems if the iterator
442    * eagerly fetches data for each source when iterated (rather than producing sources that only
443    * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
444    * possible.
445    *
446    * @param sources the sources to concatenate
447    * @return a {@code CharSource} containing the concatenated data
448    * @throws NullPointerException if any of {@code sources} is {@code null}
449    * @since 15.0
450    */
451   public static CharSource concat(Iterator<? extends CharSource> sources) {
452     return concat(ImmutableList.copyOf(sources));
453   }
454 
455   /**
456    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
457    * the source will contain the concatenated data from the streams of the underlying sources.
458    *
459    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
460    * close the open underlying stream.
461    *
462    * @param sources the sources to concatenate
463    * @return a {@code CharSource} containing the concatenated data
464    * @throws NullPointerException if any of {@code sources} is {@code null}
465    * @since 15.0
466    */
467   public static CharSource concat(CharSource... sources) {
468     return concat(ImmutableList.copyOf(sources));
469   }
470 
471   /**
472    * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
473    * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
474    * the {@code charSequence} is mutated while it is being read, so don't do that.
475    *
476    * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
477    */
478   public static CharSource wrap(CharSequence charSequence) {
479     return charSequence instanceof String
480         ? new StringCharSource((String) charSequence)
481         : new CharSequenceCharSource(charSequence);
482   }
483 
484   /**
485    * Returns an immutable {@link CharSource} that contains no characters.
486    *
487    * @since 15.0
488    */
489   public static CharSource empty() {
490     return EmptyCharSource.INSTANCE;
491   }
492 
493   /**
494    * A byte source that reads chars from this source and encodes them as bytes using a charset.
495    */
496   private final class AsByteSource extends ByteSource {
497 
498     final Charset charset;
499 
500     AsByteSource(Charset charset) {
501       this.charset = checkNotNull(charset);
502     }
503 
504     @Override
505     public CharSource asCharSource(Charset charset) {
506       if (charset.equals(this.charset)) {
507         return CharSource.this;
508       }
509       return super.asCharSource(charset);
510     }
511 
512     @Override
513     public InputStream openStream() throws IOException {
514       return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
515     }
516 
517     @Override
518     public String toString() {
519       return CharSource.this.toString() + ".asByteSource(" + charset + ")";
520     }
521   }
522 
523   private static class CharSequenceCharSource extends CharSource {
524 
525     private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
526 
527     protected final CharSequence seq;
528 
529     protected CharSequenceCharSource(CharSequence seq) {
530       this.seq = checkNotNull(seq);
531     }
532 
533     @Override
534     public Reader openStream() {
535       return new CharSequenceReader(seq);
536     }
537 
538     @Override
539     public String read() {
540       return seq.toString();
541     }
542 
543     @Override
544     public boolean isEmpty() {
545       return seq.length() == 0;
546     }
547 
548     @Override
549     public long length() {
550       return seq.length();
551     }
552 
553     @Override
554     public Optional<Long> lengthIfKnown() {
555       return Optional.of((long) seq.length());
556     }
557 
558     /**
559      * Returns an iterator over the lines in the string. If the string ends in a newline, a final
560      * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
561      */
562     private Iterator<String> linesIterator() {
563       return new AbstractIterator<String>() {
564         Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
565 
566         @Override
567         protected String computeNext() {
568           if (lines.hasNext()) {
569             String next = lines.next();
570             // skip last line if it's empty
571             if (lines.hasNext() || !next.isEmpty()) {
572               return next;
573             }
574           }
575           return endOfData();
576         }
577       };
578     }
579 
580     @Override
581     public Stream<String> lines() {
582       return Streams.stream(linesIterator());
583     }
584 
585     @Override
586     public String readFirstLine() {
587       Iterator<String> lines = linesIterator();
588       return lines.hasNext() ? lines.next() : null;
589     }
590 
591     @Override
592     public ImmutableList<String> readLines() {
593       return ImmutableList.copyOf(linesIterator());
594     }
595 
596     @Override
597     public <T> T readLines(LineProcessor<T> processor) throws IOException {
598       Iterator<String> lines = linesIterator();
599       while (lines.hasNext()) {
600         if (!processor.processLine(lines.next())) {
601           break;
602         }
603       }
604       return processor.getResult();
605     }
606 
607     @Override
608     public String toString() {
609       return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
610     }
611   }
612 
613   /**
614    * Subclass specialized for string instances.
615    *
616    * <p>Since Strings are immutable and built into the jdk we can optimize some operations
617    *
618    * <ul>
619    *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
620    *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
621    *       one with {@link CharSequence#charAt(int)}.
622    *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
623    *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
624    *       can't change, and it is faster because many writers and appendables are optimized for
625    *       appending string instances.
626    * </ul>
627    */
628   private static class StringCharSource extends CharSequenceCharSource {
629     protected StringCharSource(String seq) {
630       super(seq);
631     }
632 
633     @Override
634     public Reader openStream() {
635       return new StringReader((String) seq);
636     }
637 
638     @Override
639     public long copyTo(Appendable appendable) throws IOException {
640       appendable.append(seq);
641       return seq.length();
642     }
643 
644     @Override
645     public long copyTo(CharSink sink) throws IOException {
646       checkNotNull(sink);
647       Closer closer = Closer.create();
648       try {
649         Writer writer = closer.register(sink.openStream());
650         writer.write((String) seq);
651         return seq.length();
652       } catch (Throwable e) {
653         throw closer.rethrow(e);
654       } finally {
655         closer.close();
656       }
657     }
658   }
659 
660   private static final class EmptyCharSource extends StringCharSource {
661 
662     private static final EmptyCharSource INSTANCE = new EmptyCharSource();
663 
664     private EmptyCharSource() {
665       super("");
666     }
667 
668     @Override
669     public String toString() {
670       return "CharSource.empty()";
671     }
672   }
673 
674   private static final class ConcatenatedCharSource extends CharSource {
675 
676     private final Iterable<? extends CharSource> sources;
677 
678     ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
679       this.sources = checkNotNull(sources);
680     }
681 
682     @Override
683     public Reader openStream() throws IOException {
684       return new MultiReader(sources.iterator());
685     }
686 
687     @Override
688     public boolean isEmpty() throws IOException {
689       for (CharSource source : sources) {
690         if (!source.isEmpty()) {
691           return false;
692         }
693       }
694       return true;
695     }
696 
697     @Override
698     public Optional<Long> lengthIfKnown() {
699       long result = 0L;
700       for (CharSource source : sources) {
701         Optional<Long> lengthIfKnown = source.lengthIfKnown();
702         if (!lengthIfKnown.isPresent()) {
703           return Optional.absent();
704         }
705         result += lengthIfKnown.get();
706       }
707       return Optional.of(result);
708     }
709 
710     @Override
711     public long length() throws IOException {
712       long result = 0L;
713       for (CharSource source : sources) {
714         result += source.length();
715       }
716       return result;
717     }
718 
719     @Override
720     public String toString() {
721       return "CharSource.concat(" + sources + ")";
722     }
723   }
724 }