View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.regexp;
21  
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
26  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
27  import com.puppycrawl.tools.checkstyle.api.DetailAST;
28  import com.puppycrawl.tools.checkstyle.api.FileContents;
29  import com.puppycrawl.tools.checkstyle.api.FileText;
30  import com.puppycrawl.tools.checkstyle.api.LineColumn;
31  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
32  
33  /**
34   * <p>
35   * Checks that a specified pattern exists, exists less than
36   * a set number of times, or does not exist in the file.
37   * </p>
38   * <p>
39   * This check combines all the functionality provided by
40   * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
41   * except supplying the regular expression from a file.
42   * </p>
43   * <p>
44   * It differs from them in that it works in multiline mode. Its regular expression
45   * can span multiple lines and it checks this against the whole file at once.
46   * The others work in single-line mode. Their single or multiple regular expressions
47   * can only span one line. They check each of these against each line in the file in turn.
48   * </p>
49   * <p>
50   * <b>Note:</b> Because of the different mode of operation there may be some
51   * changes in the regular expressions used to achieve a particular end.
52   * </p>
53   * <p>
54   * In multiline mode...
55   * </p>
56   * <ul>
57   * <li>
58   * {@code ^} means the beginning of a line, as opposed to beginning of the input.
59   * </li>
60   * <li>
61   * For beginning of the input use {@code \A}.
62   * </li>
63   * <li>
64   * {@code $} means the end of a line, as opposed to the end of the input.
65   * </li>
66   * <li>
67   * For end of input use {@code \Z}.
68   * </li>
69   * <li>
70   * Each line in the file is terminated with a line feed character.
71   * </li>
72   * </ul>
73   * <p>
74   * <b>Note:</b> Not all regular expression engines are created equal.
75   * Some provide extra functions that others do not and some elements
76   * of the syntax may vary. This check makes use of the
77   * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
78   * java.util.regex package</a>; please check its documentation for details
79   * of how to construct a regular expression to achieve a particular goal.
80   * </p>
81   * <p>
82   * <b>Note:</b> When entering a regular expression as a parameter in
83   * the XML config file you must also take into account the XML rules. e.g.
84   * if you want to match a &lt; symbol you need to enter &amp;lt;.
85   * The regular expression should be entered on one line.
86   * </p>
87   * <ul>
88   * <li>
89   * Property {@code duplicateLimit} - Control whether to check for duplicates
90   * of a required pattern, any negative value means no checking for duplicates,
91   * any positive value is used as the maximum number of allowed duplicates,
92   * if the limit is exceeded violations will be logged.
93   * Type is {@code int}.
94   * Default value is {@code 0}.
95   * </li>
96   * <li>
97   * Property {@code errorLimit} - Specify the maximum number of violations before
98   * the check will abort.
99   * Type is {@code int}.
100  * Default value is {@code 100}.
101  * </li>
102  * <li>
103  * Property {@code format} - Specify the pattern to match against.
104  * Type is {@code java.util.regex.Pattern}.
105  * Default value is {@code "^$"}.
106  * </li>
107  * <li>
108  * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
109  * Type is {@code boolean}.
110  * Default value is {@code false}.
111  * </li>
112  * <li>
113  * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
114  * Type is {@code boolean}.
115  * Default value is {@code false}.
116  * </li>
117  * <li>
118  * Property {@code message} - Specify message which is used to notify about
119  * violations, if empty then the default (hard-coded) message is used.
120  * Type is {@code java.lang.String}.
121  * Default value is {@code null}.
122  * </li>
123  * </ul>
124  * <p>
125  * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
126  * </p>
127  * <p>
128  * Violation Message Keys:
129  * </p>
130  * <ul>
131  * <li>
132  * {@code duplicate.regexp}
133  * </li>
134  * <li>
135  * {@code illegal.regexp}
136  * </li>
137  * <li>
138  * {@code required.regexp}
139  * </li>
140  * </ul>
141  *
142  * @since 4.0
143  */
144 @FileStatefulCheck
145 public class RegexpCheck extends AbstractCheck {
146 
147     /**
148      * A key is pointing to the warning message text in "messages.properties"
149      * file.
150      */
151     public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
152 
153     /**
154      * A key is pointing to the warning message text in "messages.properties"
155      * file.
156      */
157     public static final String MSG_REQUIRED_REGEXP = "required.regexp";
158 
159     /**
160      * A key is pointing to the warning message text in "messages.properties"
161      * file.
162      */
163     public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
164 
165     /** Default duplicate limit. */
166     private static final int DEFAULT_DUPLICATE_LIMIT = -1;
167 
168     /** Default error report limit. */
169     private static final int DEFAULT_ERROR_LIMIT = 100;
170 
171     /** Error count exceeded message. */
172     private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
173         "The error limit has been exceeded, "
174         + "the check is aborting, there may be more unreported errors.";
175 
176     /**
177      * Specify message which is used to notify about violations,
178      * if empty then the default (hard-coded) message is used.
179      */
180     private String message;
181 
182     /** Control whether to ignore matches found within comments. */
183     private boolean ignoreComments;
184 
185     /** Control whether the pattern is required or illegal. */
186     private boolean illegalPattern;
187 
188     /** Specify the maximum number of violations before the check will abort. */
189     private int errorLimit = DEFAULT_ERROR_LIMIT;
190 
191     /**
192      * Control whether to check for duplicates of a required pattern,
193      * any negative value means no checking for duplicates,
194      * any positive value is used as the maximum number of allowed duplicates,
195      * if the limit is exceeded violations will be logged.
196      */
197     private int duplicateLimit;
198 
199     /** Boolean to say if we should check for duplicates. */
200     private boolean checkForDuplicates;
201 
202     /** Tracks number of matches made. */
203     private int matchCount;
204 
205     /** Tracks number of errors. */
206     private int errorCount;
207 
208     /** Specify the pattern to match against. */
209     private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
210 
211     /** The matcher. */
212     private Matcher matcher;
213 
214     /**
215      * Setter to specify message which is used to notify about violations,
216      * if empty then the default (hard-coded) message is used.
217      *
218      * @param message custom message which should be used in report.
219      * @since 4.0
220      */
221     public void setMessage(String message) {
222         this.message = message;
223     }
224 
225     /**
226      * Setter to control whether to ignore matches found within comments.
227      *
228      * @param ignoreComments True if comments should be ignored.
229      * @since 4.0
230      */
231     public void setIgnoreComments(boolean ignoreComments) {
232         this.ignoreComments = ignoreComments;
233     }
234 
235     /**
236      * Setter to control whether the pattern is required or illegal.
237      *
238      * @param illegalPattern True if pattern is not allowed.
239      * @since 4.0
240      */
241     public void setIllegalPattern(boolean illegalPattern) {
242         this.illegalPattern = illegalPattern;
243     }
244 
245     /**
246      * Setter to specify the maximum number of violations before the check will abort.
247      *
248      * @param errorLimit the number of errors to report.
249      * @since 4.0
250      */
251     public void setErrorLimit(int errorLimit) {
252         this.errorLimit = errorLimit;
253     }
254 
255     /**
256      * Setter to control whether to check for duplicates of a required pattern,
257      * any negative value means no checking for duplicates,
258      * any positive value is used as the maximum number of allowed duplicates,
259      * if the limit is exceeded violations will be logged.
260      *
261      * @param duplicateLimit negative values mean no duplicate checking,
262      *     any positive value is used as the limit.
263      * @since 4.0
264      */
265     public void setDuplicateLimit(int duplicateLimit) {
266         this.duplicateLimit = duplicateLimit;
267         checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
268     }
269 
270     /**
271      * Setter to specify the pattern to match against.
272      *
273      * @param pattern the new pattern
274      * @since 4.0
275      */
276     public final void setFormat(Pattern pattern) {
277         format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
278     }
279 
280     @Override
281     public int[] getDefaultTokens() {
282         return getRequiredTokens();
283     }
284 
285     @Override
286     public int[] getAcceptableTokens() {
287         return getRequiredTokens();
288     }
289 
290     @Override
291     public int[] getRequiredTokens() {
292         return CommonUtil.EMPTY_INT_ARRAY;
293     }
294 
295     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
296     @SuppressWarnings("deprecation")
297     @Override
298     public void beginTree(DetailAST rootAST) {
299         matcher = format.matcher(getFileContents().getText().getFullText());
300         matchCount = 0;
301         errorCount = 0;
302         findMatch();
303     }
304 
305     /** Recursive method that finds the matches. */
306     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
307     @SuppressWarnings("deprecation")
308     private void findMatch() {
309         final boolean foundMatch = matcher.find();
310         if (foundMatch) {
311             final FileText text = getFileContents().getText();
312             final LineColumn start = text.lineColumn(matcher.start());
313             final int startLine = start.getLine();
314 
315             final boolean ignore = isIgnore(startLine, text, start);
316 
317             if (!ignore) {
318                 matchCount++;
319                 if (illegalPattern || checkForDuplicates
320                         && matchCount - 1 > duplicateLimit) {
321                     errorCount++;
322                     logMessage(startLine);
323                 }
324             }
325             if (canContinueValidation(ignore)) {
326                 findMatch();
327             }
328         }
329         else if (!illegalPattern && matchCount == 0) {
330             final String msg = getMessage();
331             log(1, MSG_REQUIRED_REGEXP, msg);
332         }
333     }
334 
335     /**
336      * Check if we can stop validation.
337      *
338      * @param ignore flag
339      * @return true is we can continue
340      */
341     private boolean canContinueValidation(boolean ignore) {
342         return errorCount <= errorLimit - 1
343                 && (ignore || illegalPattern || checkForDuplicates);
344     }
345 
346     /**
347      * Detect ignore situation.
348      *
349      * @param startLine position of line
350      * @param text file text
351      * @param start line column
352      * @return true is that need to be ignored
353      */
354     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
355     @SuppressWarnings("deprecation")
356     private boolean isIgnore(int startLine, FileText text, LineColumn start) {
357         final LineColumn end;
358         if (matcher.end() == 0) {
359             end = text.lineColumn(0);
360         }
361         else {
362             end = text.lineColumn(matcher.end() - 1);
363         }
364         boolean ignore = false;
365         if (ignoreComments) {
366             final FileContents theFileContents = getFileContents();
367             final int startColumn = start.getColumn();
368             final int endLine = end.getLine();
369             final int endColumn = end.getColumn();
370             ignore = theFileContents.hasIntersectionWithComment(startLine,
371                 startColumn, endLine, endColumn);
372         }
373         return ignore;
374     }
375 
376     /**
377      * Displays the right message.
378      *
379      * @param lineNumber the line number the message relates to.
380      */
381     private void logMessage(int lineNumber) {
382         final String msg = getMessage();
383 
384         if (illegalPattern) {
385             log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
386         }
387         else {
388             log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
389         }
390     }
391 
392     /**
393      * Provide right message.
394      *
395      * @return message for violation.
396      */
397     private String getMessage() {
398         String msg;
399 
400         if (message == null || message.isEmpty()) {
401             msg = format.pattern();
402         }
403         else {
404             msg = message;
405         }
406 
407         if (errorCount >= errorLimit) {
408             msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
409         }
410 
411         return msg;
412     }
413 
414 }