001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.api; 021 022import java.io.BufferedReader; 023import java.io.File; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.InputStreamReader; 028import java.io.Reader; 029import java.io.StringReader; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.CodingErrorAction; 033import java.nio.charset.UnsupportedCharsetException; 034import java.nio.file.Files; 035import java.util.ArrayList; 036import java.util.Arrays; 037import java.util.List; 038import java.util.regex.Matcher; 039import java.util.regex.Pattern; 040 041import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 042 043/** 044 * Represents the text contents of a file of arbitrary plain text type. 045 * <p> 046 * This class will be passed to instances of class FileSetCheck by 047 * Checker. 048 * </p> 049 * 050 */ 051public final class FileText { 052 053 /** 054 * The number of characters to read in one go. 055 */ 056 private static final int READ_BUFFER_SIZE = 1024; 057 058 /** 059 * Regular expression pattern matching all line terminators. 060 */ 061 private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?"); 062 063 // For now, we always keep both full text and lines array. 064 // In the long run, however, the one passed at initialization might be 065 // enough, while the other could be lazily created when requested. 066 // This would save memory but cost CPU cycles. 067 068 /** 069 * The name of the file. 070 * {@code null} if no file name is available for whatever reason. 071 */ 072 private final File file; 073 074 /** 075 * The charset used to read the file. 076 * {@code null} if the file was reconstructed from a list of lines. 077 */ 078 private final Charset charset; 079 080 /** 081 * The lines of the file, without terminators. 082 */ 083 private final String[] lines; 084 085 /** 086 * The full text contents of the file. 087 * 088 * @noinspection FieldMayBeFinal 089 * @noinspectionreason FieldMayBeFinal - field is not final to ease reaching full test coverage. 090 */ 091 private String fullText; 092 093 /** 094 * The first position of each line within the full text. 095 */ 096 private int[] lineBreaks; 097 098 /** 099 * Copy constructor. 100 * 101 * @param fileText to make copy of 102 */ 103 public FileText(FileText fileText) { 104 file = fileText.file; 105 charset = fileText.charset; 106 fullText = fileText.fullText; 107 lines = fileText.lines.clone(); 108 if (fileText.lineBreaks != null) { 109 lineBreaks = fileText.lineBreaks.clone(); 110 } 111 } 112 113 /** 114 * Compatibility constructor. 115 * 116 * <p>This constructor reconstructs the text of the file by joining 117 * lines with linefeed characters. This process does not restore 118 * the original line terminators and should therefore be avoided. 119 * 120 * @param file the name of the file 121 * @param lines the lines of the text, without terminators 122 * @throws NullPointerException if the lines array is null 123 */ 124 public FileText(File file, List<String> lines) { 125 final StringBuilder buf = new StringBuilder(1024); 126 for (final String line : lines) { 127 buf.append(line).append('\n'); 128 } 129 130 this.file = file; 131 charset = null; 132 fullText = buf.toString(); 133 this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY); 134 } 135 136 /** 137 * Creates a new file text representation. 138 * 139 * <p>The file will be read using the specified encoding, replacing 140 * malformed input and unmappable characters with the default 141 * replacement character. 142 * 143 * @param file the name of the file 144 * @param charsetName the encoding to use when reading the file 145 * @throws NullPointerException if the text is null 146 * @throws IllegalStateException if the charset is not supported. 147 * @throws IOException if the file could not be read 148 */ 149 public FileText(File file, String charsetName) throws IOException { 150 this.file = file; 151 152 // We use our own decoder, to be sure we have complete control 153 // about replacements. 154 final CharsetDecoder decoder; 155 try { 156 charset = Charset.forName(charsetName); 157 decoder = charset.newDecoder(); 158 decoder.onMalformedInput(CodingErrorAction.REPLACE); 159 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 160 } 161 catch (final UnsupportedCharsetException ex) { 162 final String message = "Unsupported charset: " + charsetName; 163 throw new IllegalStateException(message, ex); 164 } 165 166 fullText = readFile(file, decoder); 167 168 // Use the BufferedReader to break down the lines as this 169 // is about 30% faster than using the 170 // LINE_TERMINATOR.split(fullText, -1) method 171 try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) { 172 final ArrayList<String> textLines = new ArrayList<>(); 173 while (true) { 174 final String line = reader.readLine(); 175 if (line == null) { 176 break; 177 } 178 textLines.add(line); 179 } 180 lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY); 181 } 182 } 183 184 /** 185 * Reads file using specific decoder and returns all its content as a String. 186 * 187 * @param inputFile File to read 188 * @param decoder Charset decoder 189 * @return File's text 190 * @throws IOException Unable to open or read the file 191 * @throws FileNotFoundException when inputFile does not exist 192 */ 193 private static String readFile(final File inputFile, final CharsetDecoder decoder) 194 throws IOException { 195 if (!inputFile.exists()) { 196 throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)"); 197 } 198 final StringBuilder buf = new StringBuilder(1024); 199 final InputStream stream = Files.newInputStream(inputFile.toPath()); 200 try (Reader reader = new InputStreamReader(stream, decoder)) { 201 final char[] chars = new char[READ_BUFFER_SIZE]; 202 while (true) { 203 final int len = reader.read(chars); 204 if (len == -1) { 205 break; 206 } 207 buf.append(chars, 0, len); 208 } 209 } 210 return buf.toString(); 211 } 212 213 /** 214 * Retrieves a line of the text by its number. 215 * The returned line will not contain a trailing terminator. 216 * 217 * @param lineNo the number of the line to get, starting at zero 218 * @return the line with the given number 219 */ 220 public String get(final int lineNo) { 221 return lines[lineNo]; 222 } 223 224 /** 225 * Get the name of the file. 226 * 227 * @return an object containing the name of the file 228 */ 229 public File getFile() { 230 return file; 231 } 232 233 /** 234 * Get the character set which was used to read the file. 235 * Will be {@code null} for a file reconstructed from its lines. 236 * 237 * @return the charset used when the file was read 238 */ 239 public Charset getCharset() { 240 return charset; 241 } 242 243 /** 244 * Retrieve the full text of the file. 245 * 246 * @return the full text of the file 247 */ 248 public CharSequence getFullText() { 249 return fullText; 250 } 251 252 /** 253 * Returns an array of all lines. 254 * {@code text.toLinesArray()} is equivalent to 255 * {@code text.toArray(new String[text.size()])}. 256 * 257 * @return an array of all lines of the text 258 */ 259 public String[] toLinesArray() { 260 return lines.clone(); 261 } 262 263 /** 264 * Determine line and column numbers in full text. 265 * 266 * @param pos the character position in the full text 267 * @return the line and column numbers of this character 268 */ 269 public LineColumn lineColumn(int pos) { 270 final int[] lineBreakPositions = findLineBreaks(); 271 int lineNo = Arrays.binarySearch(lineBreakPositions, pos); 272 if (lineNo < 0) { 273 // we have: lineNo = -(insertion point) - 1 274 // we want: lineNo = (insertion point) - 1 275 lineNo = -lineNo - 2; 276 } 277 final int startOfLine = lineBreakPositions[lineNo]; 278 final int columnNo = pos - startOfLine; 279 // now we have lineNo and columnNo, both starting at zero. 280 return new LineColumn(lineNo + 1, columnNo); 281 } 282 283 /** 284 * Find positions of line breaks in the full text. 285 * 286 * @return an array giving the first positions of each line. 287 */ 288 private int[] findLineBreaks() { 289 if (lineBreaks == null) { 290 final int[] lineBreakPositions = new int[size() + 1]; 291 lineBreakPositions[0] = 0; 292 int lineNo = 1; 293 final Matcher matcher = LINE_TERMINATOR.matcher(fullText); 294 while (matcher.find()) { 295 lineBreakPositions[lineNo] = matcher.end(); 296 lineNo++; 297 } 298 if (lineNo < lineBreakPositions.length) { 299 lineBreakPositions[lineNo] = fullText.length(); 300 } 301 lineBreaks = lineBreakPositions; 302 } 303 return lineBreaks; 304 } 305 306 /** 307 * Counts the lines of the text. 308 * 309 * @return the number of lines in the text 310 */ 311 public int size() { 312 return lines.length; 313 } 314 315}