View Javadoc
1   ////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code for adherence to a set of rules.
3   // Copyright (C) 2001-2017 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.api;
21  
22  import java.io.BufferedReader;
23  import java.io.File;
24  import java.io.FileInputStream;
25  import java.io.FileNotFoundException;
26  import java.io.IOException;
27  import java.io.InputStreamReader;
28  import java.io.Reader;
29  import java.io.StringReader;
30  import java.nio.charset.Charset;
31  import java.nio.charset.CharsetDecoder;
32  import java.nio.charset.CodingErrorAction;
33  import java.nio.charset.UnsupportedCharsetException;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.List;
37  import java.util.regex.Matcher;
38  import java.util.regex.Pattern;
39  
40  import com.puppycrawl.tools.checkstyle.utils.CommonUtils;
41  
42  /**
43   * Represents the text contents of a file of arbitrary plain text type.
44   * <p>
45   * This class will be passed to instances of class FileSetCheck by
46   * Checker.
47   * </p>
48   *
49   * @author Martin von Gagern
50   */
51  public final class FileText {
52  
53      /**
54       * The number of characters to read in one go.
55       */
56      private static final int READ_BUFFER_SIZE = 1024;
57  
58      /**
59       * Regular expression pattern matching all line terminators.
60       */
61      private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
62  
63      // For now, we always keep both full text and lines array.
64      // In the long run, however, the one passed at initialization might be
65      // enough, while the other could be lazily created when requested.
66      // This would save memory but cost CPU cycles.
67  
68      /**
69       * The name of the file.
70       * {@code null} if no file name is available for whatever reason.
71       */
72      private final File file;
73  
74      /**
75       * The charset used to read the file.
76       * {@code null} if the file was reconstructed from a list of lines.
77       */
78      private final Charset charset;
79  
80      /**
81       * The full text contents of the file.
82       */
83      private final String fullText;
84  
85      /**
86       * The lines of the file, without terminators.
87       */
88      private final String[] lines;
89  
90      /**
91       * The first position of each line within the full text.
92       */
93      private int[] lineBreaks;
94  
95      /**
96       * Creates a new file text representation.
97       *
98       * <p>The file will be read using the specified encoding, replacing
99       * malformed input and unmappable characters with the default
100      * replacement character.
101      *
102      * @param file the name of the file
103      * @param charsetName the encoding to use when reading the file
104      * @throws NullPointerException if the text is null
105      * @throws IOException if the file could not be read
106      */
107     public FileText(File file, String charsetName) throws IOException {
108         this.file = file;
109 
110         // We use our own decoder, to be sure we have complete control
111         // about replacements.
112         final CharsetDecoder decoder;
113         try {
114             charset = Charset.forName(charsetName);
115             decoder = charset.newDecoder();
116             decoder.onMalformedInput(CodingErrorAction.REPLACE);
117             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
118         }
119         catch (final UnsupportedCharsetException ex) {
120             final String message = "Unsupported charset: " + charsetName;
121             throw new IllegalStateException(message, ex);
122         }
123 
124         fullText = readFile(file, decoder);
125 
126         // Use the BufferedReader to break down the lines as this
127         // is about 30% faster than using the
128         // LINE_TERMINATOR.split(fullText, -1) method
129         final BufferedReader reader = new BufferedReader(new StringReader(fullText));
130         try {
131             final ArrayList<String> textLines = new ArrayList<>();
132             while (true) {
133                 final String line = reader.readLine();
134                 if (line == null) {
135                     break;
136                 }
137                 textLines.add(line);
138             }
139             lines = textLines.toArray(new String[textLines.size()]);
140         }
141         finally {
142             CommonUtils.close(reader);
143         }
144     }
145 
146     /**
147      * Copy constructor.
148      * @param fileText to make copy of
149      */
150     public FileText(FileText fileText) {
151         file = fileText.file;
152         charset = fileText.charset;
153         fullText = fileText.fullText;
154         lines = fileText.lines.clone();
155         if (fileText.lineBreaks == null) {
156             lineBreaks = null;
157         }
158         else {
159             lineBreaks = fileText.lineBreaks.clone();
160         }
161     }
162 
163     /**
164      * Compatibility constructor.
165      *
166      * <p>This constructor reconstructs the text of the file by joining
167      * lines with linefeed characters. This process does not restore
168      * the original line terminators and should therefore be avoided.
169      *
170      * @param file the name of the file
171      * @param lines the lines of the text, without terminators
172      * @throws NullPointerException if the lines array is null
173      */
174     public FileText(File file, List<String> lines) {
175         final StringBuilder buf = new StringBuilder(1024);
176         for (final String line : lines) {
177             buf.append(line).append('\n');
178         }
179 
180         this.file = file;
181         charset = null;
182         fullText = buf.toString();
183         this.lines = lines.toArray(new String[lines.size()]);
184     }
185 
186     /**
187      * Reads file using specific decoder and returns all its content as a String.
188      * @param inputFile File to read
189      * @param decoder Charset decoder
190      * @return File's text
191      * @throws IOException Unable to open or read the file
192      */
193     private static String readFile(final File inputFile, final CharsetDecoder decoder)
194             throws IOException {
195         if (!inputFile.exists()) {
196             throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
197         }
198         final StringBuilder buf = new StringBuilder(1024);
199         final FileInputStream stream = new FileInputStream(inputFile);
200         final Reader reader = new InputStreamReader(stream, decoder);
201         try {
202             final char[] chars = new char[READ_BUFFER_SIZE];
203             while (true) {
204                 final int len = reader.read(chars);
205                 if (len == -1) {
206                     break;
207                 }
208                 buf.append(chars, 0, len);
209             }
210         }
211         finally {
212             CommonUtils.close(reader);
213         }
214         return buf.toString();
215     }
216 
217     /**
218      * Get the name of the file.
219      * @return an object containing the name of the file
220      */
221     public File getFile() {
222         return file;
223     }
224 
225     /**
226      * Get the character set which was used to read the file.
227      * Will be {@code null} for a file reconstructed from its lines.
228      * @return the charset used when the file was read
229      */
230     public Charset getCharset() {
231         return charset;
232     }
233 
234     /**
235      * Retrieve the full text of the file.
236      * @return the full text of the file
237      */
238     public CharSequence getFullText() {
239         return fullText;
240     }
241 
242     /**
243      * Returns an array of all lines.
244      * {@code text.toLinesArray()} is equivalent to
245      * {@code text.toArray(new String[text.size()])}.
246      * @return an array of all lines of the text
247      */
248     public String[] toLinesArray() {
249         return lines.clone();
250     }
251 
252     /**
253      * Find positions of line breaks in the full text.
254      * @return an array giving the first positions of each line.
255      */
256     private int[] findLineBreaks() {
257         if (lineBreaks == null) {
258             final int[] lineBreakPositions = new int[size() + 1];
259             lineBreakPositions[0] = 0;
260             int lineNo = 1;
261             final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
262             while (matcher.find()) {
263                 lineBreakPositions[lineNo] = matcher.end();
264                 lineNo++;
265             }
266             if (lineNo < lineBreakPositions.length) {
267                 lineBreakPositions[lineNo] = fullText.length();
268             }
269             lineBreaks = lineBreakPositions;
270         }
271         return lineBreaks;
272     }
273 
274     /**
275      * Determine line and column numbers in full text.
276      * @param pos the character position in the full text
277      * @return the line and column numbers of this character
278      */
279     public LineColumn lineColumn(int pos) {
280         final int[] lineBreakPositions = findLineBreaks();
281         int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
282         if (lineNo < 0) {
283             // we have: lineNo = -(insertion point) - 1
284             // we want: lineNo =  (insertion point) - 1
285             lineNo = -lineNo - 2;
286         }
287         final int startOfLine = lineBreakPositions[lineNo];
288         final int columnNo = pos - startOfLine;
289         // now we have lineNo and columnNo, both starting at zero.
290         return new LineColumn(lineNo + 1, columnNo);
291     }
292 
293     /**
294      * Retrieves a line of the text by its number.
295      * The returned line will not contain a trailing terminator.
296      * @param lineNo the number of the line to get, starting at zero
297      * @return the line with the given number
298      */
299     public String get(final int lineNo) {
300         return lines[lineNo];
301     }
302 
303     /**
304      * Counts the lines of the text.
305      * @return the number of lines in the text
306      */
307     public int size() {
308         return lines.length;
309     }
310 
311 }