View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.xpath;
21  
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.stream.Collectors;
25  
26  import javax.annotation.Nullable;
27  
28  import com.puppycrawl.tools.checkstyle.TreeWalkerAuditEvent;
29  import com.puppycrawl.tools.checkstyle.api.DetailAST;
30  import com.puppycrawl.tools.checkstyle.api.FileText;
31  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
32  import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
33  import com.puppycrawl.tools.checkstyle.utils.XpathUtil;
34  
35  /**
36   * Generates xpath queries. Xpath queries are generated based on received
37   * {@code DetailAst} element, line number, column number and token type.
38   * Token type parameter is optional.
39   *
40   * <p>
41   *     Example class
42   * </p>
43   * <pre>
44   * public class Main {
45   *
46   *     public String sayHello(String name) {
47   *         return "Hello, " + name;
48   *     }
49   * }
50   * </pre>
51   *
52   * <p>
53   *     Following expression returns list of queries. Each query is the string representing full
54   *     path to the node inside Xpath tree, whose line number is 3 and column number is 4.
55   * </p>
56   * <pre>
57   *     new XpathQueryGenerator(rootAst, 3, 4).generate();
58   * </pre>
59   *
60   * <p>
61   *     Result list
62   * </p>
63   * <ul>
64   *     <li>
65   *         /COMPILATION_UNIT/CLASS_DEF[./IDENT[@text='Main']]/OBJBLOCK/METHOD_DEF[./IDENT[@text='sayHello']]
66   *     </li>
67   *     <li>
68   *         /COMPILATION_UNIT/CLASS_DEF[./IDENT[@text='Main']]/OBJBLOCK/METHOD_DEF[./IDENT[@text='sayHello']]
69   *         /MODIFIERS
70   *     </li>
71   *     <li>
72   *         /COMPILATION_UNIT/CLASS_DEF[./IDENT[@text='Main']]/OBJBLOCK/METHOD_DEF[./IDENT[@text='sayHello']]
73   *         /MODIFIERS/LITERAL_PUBLIC
74   *     </li>
75   * </ul>
76   *
77   */
78  public class XpathQueryGenerator {
79  
80      /** The root ast. */
81      private final DetailAST rootAst;
82      /** The line number of the element for which the query should be generated. */
83      private final int lineNumber;
84      /** The column number of the element for which the query should be generated. */
85      private final int columnNumber;
86      /** The token type of the element for which the query should be generated. Optional. */
87      private final int tokenType;
88      /** The {@code FileText} object, representing content of the file. */
89      private final FileText fileText;
90      /** The distance between tab stop position. */
91      private final int tabWidth;
92  
93      /**
94       * Creates a new {@code XpathQueryGenerator} instance.
95       *
96       * @param event {@code TreeWalkerAuditEvent} object
97       * @param tabWidth distance between tab stop position
98       */
99      public XpathQueryGenerator(TreeWalkerAuditEvent event, int tabWidth) {
100         this(event.getRootAst(), event.getLine(), event.getColumn(), event.getTokenType(),
101                 event.getFileContents().getText(), tabWidth);
102     }
103 
104     /**
105      * Creates a new {@code XpathQueryGenerator} instance.
106      *
107      * @param rootAst root ast
108      * @param lineNumber line number of the element for which the query should be generated
109      * @param columnNumber column number of the element for which the query should be generated
110      * @param fileText the {@code FileText} object
111      * @param tabWidth distance between tab stop position
112      */
113     public XpathQueryGenerator(DetailAST rootAst, int lineNumber, int columnNumber,
114                                FileText fileText, int tabWidth) {
115         this(rootAst, lineNumber, columnNumber, 0, fileText, tabWidth);
116     }
117 
118     /**
119      * Creates a new {@code XpathQueryGenerator} instance.
120      *
121      * @param rootAst root ast
122      * @param lineNumber line number of the element for which the query should be generated
123      * @param columnNumber column number of the element for which the query should be generated
124      * @param tokenType token type of the element for which the query should be generated
125      * @param fileText the {@code FileText} object
126      * @param tabWidth distance between tab stop position
127      */
128     public XpathQueryGenerator(DetailAST rootAst, int lineNumber, int columnNumber, int tokenType,
129                                FileText fileText, int tabWidth) {
130         this.rootAst = rootAst;
131         this.lineNumber = lineNumber;
132         this.columnNumber = columnNumber;
133         this.tokenType = tokenType;
134         this.fileText = fileText;
135         this.tabWidth = tabWidth;
136     }
137 
138     /**
139      * Returns list of xpath queries of nodes, matching line number, column number and token type.
140      * This approach uses DetailAST traversal. DetailAST means detail abstract syntax tree.
141      *
142      * @return list of xpath queries of nodes, matching line number, column number and token type
143      */
144     public List<String> generate() {
145         return getMatchingAstElements()
146             .stream()
147             .map(XpathQueryGenerator::generateXpathQuery)
148             .collect(Collectors.toUnmodifiableList());
149     }
150 
151     /**
152      * Returns child {@code DetailAst} element of the given root, which has text attribute.
153      *
154      * @param root {@code DetailAST} root ast
155      * @return child {@code DetailAst} element of the given root
156      */
157     @Nullable
158     private static DetailAST findChildWithTextAttribute(DetailAST root) {
159         return TokenUtil.findFirstTokenByPredicate(root,
160                 XpathUtil::supportsTextAttribute).orElse(null);
161     }
162 
163     /**
164      * Returns child {@code DetailAst} element of the given root, which has text attribute.
165      * Performs search recursively inside node's subtree.
166      *
167      * @param root {@code DetailAST} root ast
168      * @return child {@code DetailAst} element of the given root
169      */
170     @Nullable
171     private static DetailAST findChildWithTextAttributeRecursively(DetailAST root) {
172         DetailAST res = findChildWithTextAttribute(root);
173         for (DetailAST ast = root.getFirstChild(); ast != null && res == null;
174              ast = ast.getNextSibling()) {
175             res = findChildWithTextAttributeRecursively(ast);
176         }
177         return res;
178     }
179 
180     /**
181      * Returns full xpath query for given ast element.
182      *
183      * @param ast {@code DetailAST} ast element
184      * @return full xpath query for given ast element
185      */
186     public static String generateXpathQuery(DetailAST ast) {
187         final StringBuilder xpathQueryBuilder = new StringBuilder(getXpathQuery(null, ast));
188         if (!isXpathQueryForNodeIsAccurateEnough(ast)) {
189             xpathQueryBuilder.append('[');
190             final DetailAST child = findChildWithTextAttributeRecursively(ast);
191             if (child == null) {
192                 xpathQueryBuilder.append(findPositionAmongSiblings(ast));
193             }
194             else {
195                 xpathQueryBuilder.append('.').append(getXpathQuery(ast, child));
196             }
197             xpathQueryBuilder.append(']');
198         }
199         return xpathQueryBuilder.toString();
200     }
201 
202     /**
203      * Finds position of the ast element among siblings.
204      *
205      * @param ast {@code DetailAST} ast element
206      * @return position of the ast element
207      */
208     private static int findPositionAmongSiblings(DetailAST ast) {
209         DetailAST cur = ast;
210         int pos = 0;
211         while (cur != null) {
212             if (cur.getType() == ast.getType()) {
213                 pos++;
214             }
215             cur = cur.getPreviousSibling();
216         }
217         return pos;
218     }
219 
220     /**
221      * Checks if ast element has all requirements to have unique xpath query.
222      *
223      * @param ast {@code DetailAST} ast element
224      * @return true if ast element will have unique xpath query, false otherwise
225      */
226     private static boolean isXpathQueryForNodeIsAccurateEnough(DetailAST ast) {
227         return !hasAtLeastOneSiblingWithSameTokenType(ast)
228                 || XpathUtil.supportsTextAttribute(ast)
229                 || findChildWithTextAttribute(ast) != null;
230     }
231 
232     /**
233      * Returns list of nodes matching defined line number, column number and token type.
234      *
235      * @return list of nodes matching defined line number, column number and token type
236      */
237     private List<DetailAST> getMatchingAstElements() {
238         final List<DetailAST> result = new ArrayList<>();
239         DetailAST curNode = rootAst;
240         while (curNode != null) {
241             if (isMatchingByLineAndColumnAndTokenType(curNode)) {
242                 result.add(curNode);
243             }
244             DetailAST toVisit = curNode.getFirstChild();
245             while (curNode != null && toVisit == null) {
246                 toVisit = curNode.getNextSibling();
247                 curNode = curNode.getParent();
248             }
249 
250             curNode = toVisit;
251         }
252         return result;
253     }
254 
255     /**
256      * Returns relative xpath query for given ast element from root.
257      *
258      * @param root {@code DetailAST} root element
259      * @param ast {@code DetailAST} ast element
260      * @return relative xpath query for given ast element from root
261      */
262     private static String getXpathQuery(DetailAST root, DetailAST ast) {
263         final StringBuilder resultBuilder = new StringBuilder(1024);
264         DetailAST cur = ast;
265         while (cur != root) {
266             final StringBuilder curNodeQueryBuilder = new StringBuilder(256);
267             curNodeQueryBuilder.append('/')
268                     .append(TokenUtil.getTokenName(cur.getType()));
269             if (XpathUtil.supportsTextAttribute(cur)) {
270                 curNodeQueryBuilder.append("[@text='")
271                         .append(encode(XpathUtil.getTextAttributeValue(cur)))
272                         .append("']");
273             }
274             else {
275                 final DetailAST child = findChildWithTextAttribute(cur);
276                 if (child != null && child != ast) {
277                     curNodeQueryBuilder.append("[.")
278                             .append(getXpathQuery(cur, child))
279                             .append(']');
280                 }
281             }
282 
283             resultBuilder.insert(0, curNodeQueryBuilder);
284             cur = cur.getParent();
285         }
286         return resultBuilder.toString();
287     }
288 
289     /**
290      * Checks if the given ast element has unique {@code TokenTypes} among siblings.
291      *
292      * @param ast {@code DetailAST} ast element
293      * @return if the given ast element has unique {@code TokenTypes} among siblings
294      */
295     private static boolean hasAtLeastOneSiblingWithSameTokenType(DetailAST ast) {
296         boolean result = false;
297         DetailAST prev = ast.getPreviousSibling();
298         while (prev != null) {
299             if (prev.getType() == ast.getType()) {
300                 result = true;
301                 break;
302             }
303             prev = prev.getPreviousSibling();
304         }
305         DetailAST next = ast.getNextSibling();
306         while (next != null) {
307             if (next.getType() == ast.getType()) {
308                 result = true;
309                 break;
310             }
311             next = next.getNextSibling();
312         }
313         return result;
314     }
315 
316     /**
317      * Returns the column number with tabs expanded.
318      *
319      * @param ast {@code DetailAST} root ast
320      * @return the column number with tabs expanded
321      */
322     private int expandedTabColumn(DetailAST ast) {
323         return 1 + CommonUtil.lengthExpandedTabs(fileText.get(lineNumber - 1),
324                 ast.getColumnNo(), tabWidth);
325     }
326 
327     /**
328      * Checks if the given {@code DetailAST} node is matching line number, column number and token
329      * type.
330      *
331      * @param ast {@code DetailAST} ast element
332      * @return true if the given {@code DetailAST} node is matching
333      */
334     private boolean isMatchingByLineAndColumnAndTokenType(DetailAST ast) {
335         return ast.getLineNo() == lineNumber
336                 && expandedTabColumn(ast) == columnNumber
337                 && (tokenType == 0 || tokenType == ast.getType());
338     }
339 
340     /**
341      * Escape &lt;, &gt;, &amp;, &#39; and &quot; as their entities.
342      * Custom method for Xpath generation to maintain compatibility
343      * with Saxon and encoding outside Ascii range characters.
344      *
345      * <p>According to
346      * <a href="https://saxon.sourceforge.net/saxon7.1/expressions.html">Saxon documentation</a>:
347      * <br>
348      * From Saxon 7.1, string delimiters can be doubled within the string to represent`
349      * the delimiter itself: for example select='"He said, ""Go!"""'.
350      *
351      * <p>Guava cannot as Guava encoding does not meet our requirements like
352      * double encoding for apos, removed slashes which are basic requirements
353      * for Saxon to decode.
354      *
355      * @param value the value to escape.
356      * @return the escaped value if necessary.
357      */
358     private static String encode(String value) {
359         final StringBuilder sb = new StringBuilder(256);
360         value.codePoints().forEach(
361             chr -> {
362                 sb.append(encodeCharacter(Character.toChars(chr)[0]));
363             }
364         );
365         return sb.toString();
366     }
367 
368     /**
369      * Encodes escape character for Xpath. Escape characters need '&amp;' before, but it also
370      * requires XML 1.1
371      * until <a href="https://github.com/checkstyle/checkstyle/issues/5168">#5168</a>.
372      *
373      * @param chr Character to check.
374      * @return String, Encoded string.
375      */
376     private static String encodeCharacter(char chr) {
377         final String encode;
378         switch (chr) {
379             case '<':
380                 encode = "&lt;";
381                 break;
382             case '>':
383                 encode = "&gt;";
384                 break;
385             case '\'':
386                 encode = "&apos;&apos;";
387                 break;
388             case '\"':
389                 encode = "&quot;";
390                 break;
391             case '&':
392                 encode = "&amp;";
393                 break;
394             default:
395                 encode = String.valueOf(chr);
396                 break;
397         }
398         return encode;
399     }
400 }