001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <p> 035 * Checks that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </p> 038 * <p> 039 * This check combines all the functionality provided by 040 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a> 041 * except supplying the regular expression from a file. 042 * </p> 043 * <p> 044 * It differs from them in that it works in multiline mode. Its regular expression 045 * can span multiple lines and it checks this against the whole file at once. 046 * The others work in single-line mode. Their single or multiple regular expressions 047 * can only span one line. They check each of these against each line in the file in turn. 048 * </p> 049 * <p> 050 * <b>Note:</b> Because of the different mode of operation there may be some 051 * changes in the regular expressions used to achieve a particular end. 052 * </p> 053 * <p> 054 * In multiline mode... 055 * </p> 056 * <ul> 057 * <li> 058 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 059 * </li> 060 * <li> 061 * For beginning of the input use {@code \A}. 062 * </li> 063 * <li> 064 * {@code $} means the end of a line, as opposed to the end of the input. 065 * </li> 066 * <li> 067 * For end of input use {@code \Z}. 068 * </li> 069 * <li> 070 * Each line in the file is terminated with a line feed character. 071 * </li> 072 * </ul> 073 * <p> 074 * <b>Note:</b> Not all regular expression engines are created equal. 075 * Some provide extra functions that others do not and some elements 076 * of the syntax may vary. This check makes use of the 077 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 078 * java.util.regex package</a>; please check its documentation for details 079 * of how to construct a regular expression to achieve a particular goal. 080 * </p> 081 * <p> 082 * <b>Note:</b> When entering a regular expression as a parameter in 083 * the XML config file you must also take into account the XML rules. e.g. 084 * if you want to match a < symbol you need to enter &lt;. 085 * The regular expression should be entered on one line. 086 * </p> 087 * <ul> 088 * <li> 089 * Property {@code duplicateLimit} - Control whether to check for duplicates 090 * of a required pattern, any negative value means no checking for duplicates, 091 * any positive value is used as the maximum number of allowed duplicates, 092 * if the limit is exceeded violations will be logged. 093 * Type is {@code int}. 094 * Default value is {@code 0}. 095 * </li> 096 * <li> 097 * Property {@code errorLimit} - Specify the maximum number of violations before 098 * the check will abort. 099 * Type is {@code int}. 100 * Default value is {@code 100}. 101 * </li> 102 * <li> 103 * Property {@code format} - Specify the pattern to match against. 104 * Type is {@code java.util.regex.Pattern}. 105 * Default value is {@code "^$"}. 106 * </li> 107 * <li> 108 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 109 * Type is {@code boolean}. 110 * Default value is {@code false}. 111 * </li> 112 * <li> 113 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 114 * Type is {@code boolean}. 115 * Default value is {@code false}. 116 * </li> 117 * <li> 118 * Property {@code message} - Specify message which is used to notify about 119 * violations, if empty then the default (hard-coded) message is used. 120 * Type is {@code java.lang.String}. 121 * Default value is {@code null}. 122 * </li> 123 * </ul> 124 * <p> 125 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 126 * </p> 127 * <p> 128 * Violation Message Keys: 129 * </p> 130 * <ul> 131 * <li> 132 * {@code duplicate.regexp} 133 * </li> 134 * <li> 135 * {@code illegal.regexp} 136 * </li> 137 * <li> 138 * {@code required.regexp} 139 * </li> 140 * </ul> 141 * 142 * @since 4.0 143 */ 144@FileStatefulCheck 145public class RegexpCheck extends AbstractCheck { 146 147 /** 148 * A key is pointing to the warning message text in "messages.properties" 149 * file. 150 */ 151 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 152 153 /** 154 * A key is pointing to the warning message text in "messages.properties" 155 * file. 156 */ 157 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 158 159 /** 160 * A key is pointing to the warning message text in "messages.properties" 161 * file. 162 */ 163 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 164 165 /** Default duplicate limit. */ 166 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 167 168 /** Default error report limit. */ 169 private static final int DEFAULT_ERROR_LIMIT = 100; 170 171 /** Error count exceeded message. */ 172 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 173 "The error limit has been exceeded, " 174 + "the check is aborting, there may be more unreported errors."; 175 176 /** 177 * Specify message which is used to notify about violations, 178 * if empty then the default (hard-coded) message is used. 179 */ 180 private String message; 181 182 /** Control whether to ignore matches found within comments. */ 183 private boolean ignoreComments; 184 185 /** Control whether the pattern is required or illegal. */ 186 private boolean illegalPattern; 187 188 /** Specify the maximum number of violations before the check will abort. */ 189 private int errorLimit = DEFAULT_ERROR_LIMIT; 190 191 /** 192 * Control whether to check for duplicates of a required pattern, 193 * any negative value means no checking for duplicates, 194 * any positive value is used as the maximum number of allowed duplicates, 195 * if the limit is exceeded violations will be logged. 196 */ 197 private int duplicateLimit; 198 199 /** Boolean to say if we should check for duplicates. */ 200 private boolean checkForDuplicates; 201 202 /** Tracks number of matches made. */ 203 private int matchCount; 204 205 /** Tracks number of errors. */ 206 private int errorCount; 207 208 /** Specify the pattern to match against. */ 209 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 210 211 /** The matcher. */ 212 private Matcher matcher; 213 214 /** 215 * Setter to specify message which is used to notify about violations, 216 * if empty then the default (hard-coded) message is used. 217 * 218 * @param message custom message which should be used in report. 219 * @since 4.0 220 */ 221 public void setMessage(String message) { 222 this.message = message; 223 } 224 225 /** 226 * Setter to control whether to ignore matches found within comments. 227 * 228 * @param ignoreComments True if comments should be ignored. 229 * @since 4.0 230 */ 231 public void setIgnoreComments(boolean ignoreComments) { 232 this.ignoreComments = ignoreComments; 233 } 234 235 /** 236 * Setter to control whether the pattern is required or illegal. 237 * 238 * @param illegalPattern True if pattern is not allowed. 239 * @since 4.0 240 */ 241 public void setIllegalPattern(boolean illegalPattern) { 242 this.illegalPattern = illegalPattern; 243 } 244 245 /** 246 * Setter to specify the maximum number of violations before the check will abort. 247 * 248 * @param errorLimit the number of errors to report. 249 * @since 4.0 250 */ 251 public void setErrorLimit(int errorLimit) { 252 this.errorLimit = errorLimit; 253 } 254 255 /** 256 * Setter to control whether to check for duplicates of a required pattern, 257 * any negative value means no checking for duplicates, 258 * any positive value is used as the maximum number of allowed duplicates, 259 * if the limit is exceeded violations will be logged. 260 * 261 * @param duplicateLimit negative values mean no duplicate checking, 262 * any positive value is used as the limit. 263 * @since 4.0 264 */ 265 public void setDuplicateLimit(int duplicateLimit) { 266 this.duplicateLimit = duplicateLimit; 267 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 268 } 269 270 /** 271 * Setter to specify the pattern to match against. 272 * 273 * @param pattern the new pattern 274 * @since 4.0 275 */ 276 public final void setFormat(Pattern pattern) { 277 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 278 } 279 280 @Override 281 public int[] getDefaultTokens() { 282 return getRequiredTokens(); 283 } 284 285 @Override 286 public int[] getAcceptableTokens() { 287 return getRequiredTokens(); 288 } 289 290 @Override 291 public int[] getRequiredTokens() { 292 return CommonUtil.EMPTY_INT_ARRAY; 293 } 294 295 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 296 @SuppressWarnings("deprecation") 297 @Override 298 public void beginTree(DetailAST rootAST) { 299 matcher = format.matcher(getFileContents().getText().getFullText()); 300 matchCount = 0; 301 errorCount = 0; 302 findMatch(); 303 } 304 305 /** Recursive method that finds the matches. */ 306 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 307 @SuppressWarnings("deprecation") 308 private void findMatch() { 309 final boolean foundMatch = matcher.find(); 310 if (foundMatch) { 311 final FileText text = getFileContents().getText(); 312 final LineColumn start = text.lineColumn(matcher.start()); 313 final int startLine = start.getLine(); 314 315 final boolean ignore = isIgnore(startLine, text, start); 316 317 if (!ignore) { 318 matchCount++; 319 if (illegalPattern || checkForDuplicates 320 && matchCount - 1 > duplicateLimit) { 321 errorCount++; 322 logMessage(startLine); 323 } 324 } 325 if (canContinueValidation(ignore)) { 326 findMatch(); 327 } 328 } 329 else if (!illegalPattern && matchCount == 0) { 330 final String msg = getMessage(); 331 log(1, MSG_REQUIRED_REGEXP, msg); 332 } 333 } 334 335 /** 336 * Check if we can stop validation. 337 * 338 * @param ignore flag 339 * @return true is we can continue 340 */ 341 private boolean canContinueValidation(boolean ignore) { 342 return errorCount <= errorLimit - 1 343 && (ignore || illegalPattern || checkForDuplicates); 344 } 345 346 /** 347 * Detect ignore situation. 348 * 349 * @param startLine position of line 350 * @param text file text 351 * @param start line column 352 * @return true is that need to be ignored 353 */ 354 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 355 @SuppressWarnings("deprecation") 356 private boolean isIgnore(int startLine, FileText text, LineColumn start) { 357 final LineColumn end; 358 if (matcher.end() == 0) { 359 end = text.lineColumn(0); 360 } 361 else { 362 end = text.lineColumn(matcher.end() - 1); 363 } 364 boolean ignore = false; 365 if (ignoreComments) { 366 final FileContents theFileContents = getFileContents(); 367 final int startColumn = start.getColumn(); 368 final int endLine = end.getLine(); 369 final int endColumn = end.getColumn(); 370 ignore = theFileContents.hasIntersectionWithComment(startLine, 371 startColumn, endLine, endColumn); 372 } 373 return ignore; 374 } 375 376 /** 377 * Displays the right message. 378 * 379 * @param lineNumber the line number the message relates to. 380 */ 381 private void logMessage(int lineNumber) { 382 final String msg = getMessage(); 383 384 if (illegalPattern) { 385 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 386 } 387 else { 388 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 389 } 390 } 391 392 /** 393 * Provide right message. 394 * 395 * @return message for violation. 396 */ 397 private String getMessage() { 398 String msg; 399 400 if (message == null || message.isEmpty()) { 401 msg = format.pattern(); 402 } 403 else { 404 msg = message; 405 } 406 407 if (errorCount >= errorLimit) { 408 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 409 } 410 411 return msg; 412 } 413 414}