001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.common.collect.Streams;
028import com.google.errorprone.annotations.CanIgnoreReturnValue;
029import com.google.errorprone.annotations.MustBeClosed;
030import java.io.BufferedReader;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.Reader;
034import java.io.StringReader;
035import java.io.UncheckedIOException;
036import java.io.Writer;
037import java.nio.charset.Charset;
038import java.util.Iterator;
039import java.util.List;
040import java.util.function.Consumer;
041import java.util.stream.Stream;
042import org.checkerframework.checker.nullness.qual.Nullable;
043
044/**
045 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
046 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
047 * it is an immutable <i>supplier</i> of {@code Reader} instances.
048 *
049 * <p>{@code CharSource} provides two kinds of methods:
050 *
051 * <ul>
052 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
053 *       instance each time they are called. The caller is responsible for ensuring that the
054 *       returned reader is closed.
055 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
056 *       typically implemented by opening a reader using one of the methods in the first category,
057 *       doing something and finally closing the reader that was opened.
058 * </ul>
059 *
060 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
061 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
062 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
063 * be an empty line at the end if the contents are terminated with a line separator.
064 *
065 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
066 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
067 *
068 * @since 14.0
069 * @author Colin Decker
070 */
071@GwtIncompatible
072public abstract class CharSource implements InputSupplier<Reader> {
073
074  /** Constructor for use by subclasses. */
075  protected CharSource() {}
076
077  /**
078   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
079   * as bytes using the given {@link Charset}.
080   *
081   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
082   * the default implementation of this method will ensure that the original {@code CharSource} is
083   * returned, rather than round-trip encoding. Subclasses that override this method should behave
084   * the same way.
085   *
086   * @since 20.0
087   */
088  @Beta
089  public ByteSource asByteSource(Charset charset) {
090    return new AsByteSource(charset);
091  }
092
093  /**
094   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
095   * reader each time it is called.
096   *
097   * <p>The caller is responsible for ensuring that the returned reader is closed.
098   *
099   * @throws IOException if an I/O error occurs while opening the reader
100   */
101  public abstract Reader openStream() throws IOException;
102
103  /**
104   * This method is a temporary method provided for easing migration from suppliers to sources and
105   * sinks.
106   *
107   * @since 15.0
108   * @deprecated This method is only provided for temporary compatibility with the
109   *     {@link InputSupplier} interface and should not be called directly. Use {@link #openStream}
110   *     instead. This method is scheduled for removal in Guava 18.0.
111   */
112  @Override
113  @Deprecated
114  public final Reader getInput() throws IOException {
115    return openStream();
116  }
117
118  /**
119   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
120   * independent reader each time it is called.
121   *
122   * <p>The caller is responsible for ensuring that the returned reader is closed.
123   *
124   * @throws IOException if an I/O error occurs while of opening the reader
125   */
126  public BufferedReader openBufferedStream() throws IOException {
127    Reader reader = openStream();
128    return (reader instanceof BufferedReader)
129        ? (BufferedReader) reader
130        : new BufferedReader(reader);
131  }
132
133  /**
134   * Opens a new {@link Stream} for reading text one line at a time from this source. This method
135   * returns a new, independent stream each time it is called.
136   *
137   * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an
138   * I/O error occurs while the stream is reading from the source or when the stream is closed, an
139   * {@link UncheckedIOException} is thrown.
140   *
141   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
142   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
143   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
144   * it does.
145   *
146   * <p>The caller is responsible for ensuring that the returned stream is closed. For example:
147   *
148   * <pre>{@code
149   * try (Stream<String> lines = source.lines()) {
150   *   lines.map(...)
151   *      .filter(...)
152   *      .forEach(...);
153   * }
154   * }</pre>
155   *
156   * @throws IOException if an I/O error occurs while opening the stream
157   * @since 22.0
158   */
159  @Beta
160  @MustBeClosed
161  public Stream<String> lines() throws IOException {
162    BufferedReader reader = openBufferedStream();
163    return reader
164        .lines()
165        .onClose(
166            () -> {
167              try {
168                reader.close();
169              } catch (IOException e) {
170                throw new UncheckedIOException(e);
171              }
172            });
173  }
174
175  /**
176   * Returns the size of this source in chars, if the size can be easily determined without actually
177   * opening the data stream.
178   *
179   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
180   * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
181   * that this method will return a different number of chars than would be returned by reading all
182   * of the chars.
183   *
184   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
185   * return a different number of chars if the contents are changed.
186   *
187   * @since 19.0
188   */
189  @Beta
190  public Optional<Long> lengthIfKnown() {
191    return Optional.absent();
192  }
193
194  /**
195   * Returns the length of this source in chars, even if doing so requires opening and traversing an
196   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
197   *
198   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
199   * absent, it will fall back to a heavyweight operation that will open a stream, {@link
200   * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
201   * were skipped.
202   *
203   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
204   * implementation, it is <i>possible</i> that this method will return a different number of chars
205   * than would be returned by reading all of the chars.
206   *
207   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
208   * number of chars if the contents are changed.
209   *
210   * @throws IOException if an I/O error occurs while reading the length of this source
211   * @since 19.0
212   */
213  @Beta
214  public long length() throws IOException {
215    Optional<Long> lengthIfKnown = lengthIfKnown();
216    if (lengthIfKnown.isPresent()) {
217      return lengthIfKnown.get();
218    }
219
220    Closer closer = Closer.create();
221    try {
222      Reader reader = closer.register(openStream());
223      return countBySkipping(reader);
224    } catch (Throwable e) {
225      throw closer.rethrow(e);
226    } finally {
227      closer.close();
228    }
229  }
230
231  private long countBySkipping(Reader reader) throws IOException {
232    long count = 0;
233    long read;
234    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
235      count += read;
236    }
237    return count;
238  }
239
240  /**
241   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
242   * Does not close {@code appendable} if it is {@code Closeable}.
243   *
244   * @return the number of characters copied
245   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
246   *     appendable}
247   */
248  @CanIgnoreReturnValue
249  public long copyTo(Appendable appendable) throws IOException {
250    checkNotNull(appendable);
251
252    Closer closer = Closer.create();
253    try {
254      Reader reader = closer.register(openStream());
255      return CharStreams.copy(reader, appendable);
256    } catch (Throwable e) {
257      throw closer.rethrow(e);
258    } finally {
259      closer.close();
260    }
261  }
262
263  /**
264   * Copies the contents of this source to the given sink.
265   *
266   * @return the number of characters copied
267   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
268   *     sink}
269   */
270  @CanIgnoreReturnValue
271  public long copyTo(CharSink sink) throws IOException {
272    checkNotNull(sink);
273
274    Closer closer = Closer.create();
275    try {
276      Reader reader = closer.register(openStream());
277      Writer writer = closer.register(sink.openStream());
278      return CharStreams.copy(reader, writer);
279    } catch (Throwable e) {
280      throw closer.rethrow(e);
281    } finally {
282      closer.close();
283    }
284  }
285
286  /**
287   * Reads the contents of this source as a string.
288   *
289   * @throws IOException if an I/O error occurs while reading from this source
290   */
291  public String read() throws IOException {
292    Closer closer = Closer.create();
293    try {
294      Reader reader = closer.register(openStream());
295      return CharStreams.toString(reader);
296    } catch (Throwable e) {
297      throw closer.rethrow(e);
298    } finally {
299      closer.close();
300    }
301  }
302
303  /**
304   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
305   *
306   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
307   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
308   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
309   * it does.
310   *
311   * @throws IOException if an I/O error occurs while reading from this source
312   */
313  public @Nullable String readFirstLine() throws IOException {
314    Closer closer = Closer.create();
315    try {
316      BufferedReader reader = closer.register(openBufferedStream());
317      return reader.readLine();
318    } catch (Throwable e) {
319      throw closer.rethrow(e);
320    } finally {
321      closer.close();
322    }
323  }
324
325  /**
326   * Reads all the lines of this source as a list of strings. The returned list will be empty if
327   * this source is empty.
328   *
329   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
330   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
331   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
332   * it does.
333   *
334   * @throws IOException if an I/O error occurs while reading from this source
335   */
336  public ImmutableList<String> readLines() throws IOException {
337    Closer closer = Closer.create();
338    try {
339      BufferedReader reader = closer.register(openBufferedStream());
340      List<String> result = Lists.newArrayList();
341      String line;
342      while ((line = reader.readLine()) != null) {
343        result.add(line);
344      }
345      return ImmutableList.copyOf(result);
346    } catch (Throwable e) {
347      throw closer.rethrow(e);
348    } finally {
349      closer.close();
350    }
351  }
352
353  /**
354   * Reads lines of text from this source, processing each line as it is read using the given {@link
355   * LineProcessor processor}. Stops when all lines have been processed or the processor returns
356   * {@code false} and returns the result produced by the processor.
357   *
358   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
359   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
360   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
361   * it does.
362   *
363   * @throws IOException if an I/O error occurs while reading from this source or if {@code
364   *     processor} throws an {@code IOException}
365   * @since 16.0
366   */
367  @Beta
368  @CanIgnoreReturnValue // some processors won't return a useful result
369  public <T> T readLines(LineProcessor<T> processor) throws IOException {
370    checkNotNull(processor);
371
372    Closer closer = Closer.create();
373    try {
374      Reader reader = closer.register(openStream());
375      return CharStreams.readLines(reader, processor);
376    } catch (Throwable e) {
377      throw closer.rethrow(e);
378    } finally {
379      closer.close();
380    }
381  }
382
383  /**
384   * Reads all lines of text from this source, running the given {@code action} for each line as it
385   * is read.
386   *
387   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
388   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
389   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
390   * it does.
391   *
392   * @throws IOException if an I/O error occurs while reading from this source or if {@code action}
393   *     throws an {@code UncheckedIOException}
394   * @since 22.0
395   */
396  @Beta
397  public void forEachLine(Consumer<? super String> action) throws IOException {
398    try (Stream<String> lines = lines()) {
399      // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
400      lines.forEachOrdered(action);
401    } catch (UncheckedIOException e) {
402      throw e.getCause();
403    }
404  }
405
406  /**
407   * Returns whether the source has zero chars. The default implementation first checks {@link
408   * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
409   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
410   *
411   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
412   * chars are actually available for reading. This means that a source may return {@code true} from
413   * {@code isEmpty()} despite having readable content.
414   *
415   * @throws IOException if an I/O error occurs
416   * @since 15.0
417   */
418  public boolean isEmpty() throws IOException {
419    Optional<Long> lengthIfKnown = lengthIfKnown();
420    if (lengthIfKnown.isPresent()) {
421      return lengthIfKnown.get() == 0L;
422    }
423    Closer closer = Closer.create();
424    try {
425      Reader reader = closer.register(openStream());
426      return reader.read() == -1;
427    } catch (Throwable e) {
428      throw closer.rethrow(e);
429    } finally {
430      closer.close();
431    }
432  }
433
434  /**
435   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
436   * the source will contain the concatenated data from the streams of the underlying sources.
437   *
438   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
439   * close the open underlying stream.
440   *
441   * @param sources the sources to concatenate
442   * @return a {@code CharSource} containing the concatenated data
443   * @since 15.0
444   */
445  public static CharSource concat(Iterable<? extends CharSource> sources) {
446    return new ConcatenatedCharSource(sources);
447  }
448
449  /**
450   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
451   * the source will contain the concatenated data from the streams of the underlying sources.
452   *
453   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
454   * close the open underlying stream.
455   *
456   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
457   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
458   * eagerly fetches data for each source when iterated (rather than producing sources that only
459   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
460   * possible.
461   *
462   * @param sources the sources to concatenate
463   * @return a {@code CharSource} containing the concatenated data
464   * @throws NullPointerException if any of {@code sources} is {@code null}
465   * @since 15.0
466   */
467  public static CharSource concat(Iterator<? extends CharSource> sources) {
468    return concat(ImmutableList.copyOf(sources));
469  }
470
471  /**
472   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
473   * the source will contain the concatenated data from the streams of the underlying sources.
474   *
475   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
476   * close the open underlying stream.
477   *
478   * @param sources the sources to concatenate
479   * @return a {@code CharSource} containing the concatenated data
480   * @throws NullPointerException if any of {@code sources} is {@code null}
481   * @since 15.0
482   */
483  public static CharSource concat(CharSource... sources) {
484    return concat(ImmutableList.copyOf(sources));
485  }
486
487  /**
488   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
489   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
490   * the {@code charSequence} is mutated while it is being read, so don't do that.
491   *
492   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
493   */
494  public static CharSource wrap(CharSequence charSequence) {
495    return charSequence instanceof String
496        ? new StringCharSource((String) charSequence)
497        : new CharSequenceCharSource(charSequence);
498  }
499
500  /**
501   * Returns an immutable {@link CharSource} that contains no characters.
502   *
503   * @since 15.0
504   */
505  public static CharSource empty() {
506    return EmptyCharSource.INSTANCE;
507  }
508
509  /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
510  private final class AsByteSource extends ByteSource {
511
512    final Charset charset;
513
514    AsByteSource(Charset charset) {
515      this.charset = checkNotNull(charset);
516    }
517
518    @Override
519    public CharSource asCharSource(Charset charset) {
520      if (charset.equals(this.charset)) {
521        return CharSource.this;
522      }
523      return super.asCharSource(charset);
524    }
525
526    @Override
527    public InputStream openStream() throws IOException {
528      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
529    }
530
531    @Override
532    public String toString() {
533      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
534    }
535  }
536
537  private static class CharSequenceCharSource extends CharSource {
538
539    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
540
541    protected final CharSequence seq;
542
543    protected CharSequenceCharSource(CharSequence seq) {
544      this.seq = checkNotNull(seq);
545    }
546
547    @Override
548    public Reader openStream() {
549      return new CharSequenceReader(seq);
550    }
551
552    @Override
553    public String read() {
554      return seq.toString();
555    }
556
557    @Override
558    public boolean isEmpty() {
559      return seq.length() == 0;
560    }
561
562    @Override
563    public long length() {
564      return seq.length();
565    }
566
567    @Override
568    public Optional<Long> lengthIfKnown() {
569      return Optional.of((long) seq.length());
570    }
571
572    /**
573     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
574     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
575     */
576    private Iterator<String> linesIterator() {
577      return new AbstractIterator<String>() {
578        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
579
580        @Override
581        protected String computeNext() {
582          if (lines.hasNext()) {
583            String next = lines.next();
584            // skip last line if it's empty
585            if (lines.hasNext() || !next.isEmpty()) {
586              return next;
587            }
588          }
589          return endOfData();
590        }
591      };
592    }
593
594    @Override
595    public Stream<String> lines() {
596      return Streams.stream(linesIterator());
597    }
598
599    @Override
600    public String readFirstLine() {
601      Iterator<String> lines = linesIterator();
602      return lines.hasNext() ? lines.next() : null;
603    }
604
605    @Override
606    public ImmutableList<String> readLines() {
607      return ImmutableList.copyOf(linesIterator());
608    }
609
610    @Override
611    public <T> T readLines(LineProcessor<T> processor) throws IOException {
612      Iterator<String> lines = linesIterator();
613      while (lines.hasNext()) {
614        if (!processor.processLine(lines.next())) {
615          break;
616        }
617      }
618      return processor.getResult();
619    }
620
621    @Override
622    public String toString() {
623      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
624    }
625  }
626
627  /**
628   * Subclass specialized for string instances.
629   *
630   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
631   *
632   * <ul>
633   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
634   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
635   *       one with {@link CharSequence#charAt(int)}.
636   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
637   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
638   *       can't change, and it is faster because many writers and appendables are optimized for
639   *       appending string instances.
640   * </ul>
641   */
642  private static class StringCharSource extends CharSequenceCharSource {
643    protected StringCharSource(String seq) {
644      super(seq);
645    }
646
647    @Override
648    public Reader openStream() {
649      return new StringReader((String) seq);
650    }
651
652    @Override
653    public long copyTo(Appendable appendable) throws IOException {
654      appendable.append(seq);
655      return seq.length();
656    }
657
658    @Override
659    public long copyTo(CharSink sink) throws IOException {
660      checkNotNull(sink);
661      Closer closer = Closer.create();
662      try {
663        Writer writer = closer.register(sink.openStream());
664        writer.write((String) seq);
665        return seq.length();
666      } catch (Throwable e) {
667        throw closer.rethrow(e);
668      } finally {
669        closer.close();
670      }
671    }
672  }
673
674  private static final class EmptyCharSource extends StringCharSource {
675
676    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
677
678    private EmptyCharSource() {
679      super("");
680    }
681
682    @Override
683    public String toString() {
684      return "CharSource.empty()";
685    }
686  }
687
688  private static final class ConcatenatedCharSource extends CharSource {
689
690    private final Iterable<? extends CharSource> sources;
691
692    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
693      this.sources = checkNotNull(sources);
694    }
695
696    @Override
697    public Reader openStream() throws IOException {
698      return new MultiReader(sources.iterator());
699    }
700
701    @Override
702    public boolean isEmpty() throws IOException {
703      for (CharSource source : sources) {
704        if (!source.isEmpty()) {
705          return false;
706        }
707      }
708      return true;
709    }
710
711    @Override
712    public Optional<Long> lengthIfKnown() {
713      long result = 0L;
714      for (CharSource source : sources) {
715        Optional<Long> lengthIfKnown = source.lengthIfKnown();
716        if (!lengthIfKnown.isPresent()) {
717          return Optional.absent();
718        }
719        result += lengthIfKnown.get();
720      }
721      return Optional.of(result);
722    }
723
724    @Override
725    public long length() throws IOException {
726      long result = 0L;
727      for (CharSource source : sources) {
728        result += source.length();
729      }
730      return result;
731    }
732
733    @Override
734    public String toString() {
735      return "CharSource.concat(" + sources + ")";
736    }
737  }
738}