001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
027import com.puppycrawl.tools.checkstyle.api.DetailAST;
028import com.puppycrawl.tools.checkstyle.api.FileContents;
029import com.puppycrawl.tools.checkstyle.api.FileText;
030import com.puppycrawl.tools.checkstyle.api.LineColumn;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032
033/**
034 * <p>
035 * Checks that a specified pattern exists, exists less than
036 * a set number of times, or does not exist in the file.
037 * </p>
038 * <p>
039 * This check combines all the functionality provided by
040 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
041 * except supplying the regular expression from a file.
042 * </p>
043 * <p>
044 * It differs from them in that it works in multiline mode. Its regular expression
045 * can span multiple lines and it checks this against the whole file at once.
046 * The others work in single-line mode. Their single or multiple regular expressions
047 * can only span one line. They check each of these against each line in the file in turn.
048 * </p>
049 * <p>
050 * <b>Note:</b> Because of the different mode of operation there may be some
051 * changes in the regular expressions used to achieve a particular end.
052 * </p>
053 * <p>
054 * In multiline mode...
055 * </p>
056 * <ul>
057 * <li>
058 * {@code ^} means the beginning of a line, as opposed to beginning of the input.
059 * </li>
060 * <li>
061 * For beginning of the input use {@code \A}.
062 * </li>
063 * <li>
064 * {@code $} means the end of a line, as opposed to the end of the input.
065 * </li>
066 * <li>
067 * For end of input use {@code \Z}.
068 * </li>
069 * <li>
070 * Each line in the file is terminated with a line feed character.
071 * </li>
072 * </ul>
073 * <p>
074 * <b>Note:</b> Not all regular expression engines are created equal.
075 * Some provide extra functions that others do not and some elements
076 * of the syntax may vary. This check makes use of the
077 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
078 * java.util.regex package</a>; please check its documentation for details
079 * of how to construct a regular expression to achieve a particular goal.
080 * </p>
081 * <p>
082 * <b>Note:</b> When entering a regular expression as a parameter in
083 * the XML config file you must also take into account the XML rules. e.g.
084 * if you want to match a &lt; symbol you need to enter &amp;lt;.
085 * The regular expression should be entered on one line.
086 * </p>
087 * <ul>
088 * <li>
089 * Property {@code duplicateLimit} - Control whether to check for duplicates
090 * of a required pattern, any negative value means no checking for duplicates,
091 * any positive value is used as the maximum number of allowed duplicates,
092 * if the limit is exceeded violations will be logged.
093 * Type is {@code int}.
094 * Default value is {@code 0}.
095 * </li>
096 * <li>
097 * Property {@code errorLimit} - Specify the maximum number of violations before
098 * the check will abort.
099 * Type is {@code int}.
100 * Default value is {@code 100}.
101 * </li>
102 * <li>
103 * Property {@code format} - Specify the pattern to match against.
104 * Type is {@code java.util.regex.Pattern}.
105 * Default value is {@code "^$"}.
106 * </li>
107 * <li>
108 * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
109 * Type is {@code boolean}.
110 * Default value is {@code false}.
111 * </li>
112 * <li>
113 * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
114 * Type is {@code boolean}.
115 * Default value is {@code false}.
116 * </li>
117 * <li>
118 * Property {@code message} - Specify message which is used to notify about
119 * violations, if empty then the default (hard-coded) message is used.
120 * Type is {@code java.lang.String}.
121 * Default value is {@code null}.
122 * </li>
123 * </ul>
124 * <p>
125 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
126 * </p>
127 * <p>
128 * Violation Message Keys:
129 * </p>
130 * <ul>
131 * <li>
132 * {@code duplicate.regexp}
133 * </li>
134 * <li>
135 * {@code illegal.regexp}
136 * </li>
137 * <li>
138 * {@code required.regexp}
139 * </li>
140 * </ul>
141 *
142 * @since 4.0
143 */
144@FileStatefulCheck
145public class RegexpCheck extends AbstractCheck {
146
147    /**
148     * A key is pointing to the warning message text in "messages.properties"
149     * file.
150     */
151    public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
152
153    /**
154     * A key is pointing to the warning message text in "messages.properties"
155     * file.
156     */
157    public static final String MSG_REQUIRED_REGEXP = "required.regexp";
158
159    /**
160     * A key is pointing to the warning message text in "messages.properties"
161     * file.
162     */
163    public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
164
165    /** Default duplicate limit. */
166    private static final int DEFAULT_DUPLICATE_LIMIT = -1;
167
168    /** Default error report limit. */
169    private static final int DEFAULT_ERROR_LIMIT = 100;
170
171    /** Error count exceeded message. */
172    private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
173        "The error limit has been exceeded, "
174        + "the check is aborting, there may be more unreported errors.";
175
176    /**
177     * Specify message which is used to notify about violations,
178     * if empty then the default (hard-coded) message is used.
179     */
180    private String message;
181
182    /** Control whether to ignore matches found within comments. */
183    private boolean ignoreComments;
184
185    /** Control whether the pattern is required or illegal. */
186    private boolean illegalPattern;
187
188    /** Specify the maximum number of violations before the check will abort. */
189    private int errorLimit = DEFAULT_ERROR_LIMIT;
190
191    /**
192     * Control whether to check for duplicates of a required pattern,
193     * any negative value means no checking for duplicates,
194     * any positive value is used as the maximum number of allowed duplicates,
195     * if the limit is exceeded violations will be logged.
196     */
197    private int duplicateLimit;
198
199    /** Boolean to say if we should check for duplicates. */
200    private boolean checkForDuplicates;
201
202    /** Tracks number of matches made. */
203    private int matchCount;
204
205    /** Tracks number of errors. */
206    private int errorCount;
207
208    /** Specify the pattern to match against. */
209    private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
210
211    /** The matcher. */
212    private Matcher matcher;
213
214    /**
215     * Setter to specify message which is used to notify about violations,
216     * if empty then the default (hard-coded) message is used.
217     *
218     * @param message custom message which should be used in report.
219     * @since 4.0
220     */
221    public void setMessage(String message) {
222        this.message = message;
223    }
224
225    /**
226     * Setter to control whether to ignore matches found within comments.
227     *
228     * @param ignoreComments True if comments should be ignored.
229     * @since 4.0
230     */
231    public void setIgnoreComments(boolean ignoreComments) {
232        this.ignoreComments = ignoreComments;
233    }
234
235    /**
236     * Setter to control whether the pattern is required or illegal.
237     *
238     * @param illegalPattern True if pattern is not allowed.
239     * @since 4.0
240     */
241    public void setIllegalPattern(boolean illegalPattern) {
242        this.illegalPattern = illegalPattern;
243    }
244
245    /**
246     * Setter to specify the maximum number of violations before the check will abort.
247     *
248     * @param errorLimit the number of errors to report.
249     * @since 4.0
250     */
251    public void setErrorLimit(int errorLimit) {
252        this.errorLimit = errorLimit;
253    }
254
255    /**
256     * Setter to control whether to check for duplicates of a required pattern,
257     * any negative value means no checking for duplicates,
258     * any positive value is used as the maximum number of allowed duplicates,
259     * if the limit is exceeded violations will be logged.
260     *
261     * @param duplicateLimit negative values mean no duplicate checking,
262     *     any positive value is used as the limit.
263     * @since 4.0
264     */
265    public void setDuplicateLimit(int duplicateLimit) {
266        this.duplicateLimit = duplicateLimit;
267        checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
268    }
269
270    /**
271     * Setter to specify the pattern to match against.
272     *
273     * @param pattern the new pattern
274     * @since 4.0
275     */
276    public final void setFormat(Pattern pattern) {
277        format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
278    }
279
280    @Override
281    public int[] getDefaultTokens() {
282        return getRequiredTokens();
283    }
284
285    @Override
286    public int[] getAcceptableTokens() {
287        return getRequiredTokens();
288    }
289
290    @Override
291    public int[] getRequiredTokens() {
292        return CommonUtil.EMPTY_INT_ARRAY;
293    }
294
295    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
296    @SuppressWarnings("deprecation")
297    @Override
298    public void beginTree(DetailAST rootAST) {
299        matcher = format.matcher(getFileContents().getText().getFullText());
300        matchCount = 0;
301        errorCount = 0;
302        findMatch();
303    }
304
305    /** Recursive method that finds the matches. */
306    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
307    @SuppressWarnings("deprecation")
308    private void findMatch() {
309        final boolean foundMatch = matcher.find();
310        if (foundMatch) {
311            final FileText text = getFileContents().getText();
312            final LineColumn start = text.lineColumn(matcher.start());
313            final int startLine = start.getLine();
314
315            final boolean ignore = isIgnore(startLine, text, start);
316
317            if (!ignore) {
318                matchCount++;
319                if (illegalPattern || checkForDuplicates
320                        && matchCount - 1 > duplicateLimit) {
321                    errorCount++;
322                    logMessage(startLine);
323                }
324            }
325            if (canContinueValidation(ignore)) {
326                findMatch();
327            }
328        }
329        else if (!illegalPattern && matchCount == 0) {
330            final String msg = getMessage();
331            log(1, MSG_REQUIRED_REGEXP, msg);
332        }
333    }
334
335    /**
336     * Check if we can stop validation.
337     *
338     * @param ignore flag
339     * @return true is we can continue
340     */
341    private boolean canContinueValidation(boolean ignore) {
342        return errorCount <= errorLimit - 1
343                && (ignore || illegalPattern || checkForDuplicates);
344    }
345
346    /**
347     * Detect ignore situation.
348     *
349     * @param startLine position of line
350     * @param text file text
351     * @param start line column
352     * @return true is that need to be ignored
353     */
354    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
355    @SuppressWarnings("deprecation")
356    private boolean isIgnore(int startLine, FileText text, LineColumn start) {
357        final LineColumn end;
358        if (matcher.end() == 0) {
359            end = text.lineColumn(0);
360        }
361        else {
362            end = text.lineColumn(matcher.end() - 1);
363        }
364        boolean ignore = false;
365        if (ignoreComments) {
366            final FileContents theFileContents = getFileContents();
367            final int startColumn = start.getColumn();
368            final int endLine = end.getLine();
369            final int endColumn = end.getColumn();
370            ignore = theFileContents.hasIntersectionWithComment(startLine,
371                startColumn, endLine, endColumn);
372        }
373        return ignore;
374    }
375
376    /**
377     * Displays the right message.
378     *
379     * @param lineNumber the line number the message relates to.
380     */
381    private void logMessage(int lineNumber) {
382        final String msg = getMessage();
383
384        if (illegalPattern) {
385            log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
386        }
387        else {
388            log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
389        }
390    }
391
392    /**
393     * Provide right message.
394     *
395     * @return message for violation.
396     */
397    private String getMessage() {
398        String msg;
399
400        if (message == null || message.isEmpty()) {
401            msg = format.pattern();
402        }
403        else {
404            msg = message;
405        }
406
407        if (errorCount >= errorLimit) {
408            msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
409        }
410
411        return msg;
412    }
413
414}