View Javadoc
1   ////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code for adherence to a set of rules.
3   // Copyright (C) 2001-2018 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.javadoc;
21  
22  import java.util.LinkedList;
23  import java.util.List;
24  
25  /**
26   * <p>
27   * Helper class used to parse HTML tags or generic type identifiers
28   * from a single line of text. Just the beginning of the HTML tag
29   * is located.  No attempt is made to parse out the complete tag,
30   * particularly since some of the tag parameters could be located
31   * on the following line of text.  The {@code hasNextTag} and
32   * {@code nextTag} methods are used to iterate through the HTML
33   * tags or generic type identifiers that were found on the line of text.
34   * </p>
35   *
36   * <p>
37   * This class isn't really specific to HTML tags. Currently the only HTML
38   * tag that this class looks specifically for is the HTML comment tag.
39   * This class helps figure out if a tag exists and if it is well-formed.
40   * It does not know whether it is valid HTML.  This class is also used for
41   * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>,
42   * <MY_FOO_TYPE>}, etc. According to this class they are valid tags.
43   * </p>
44   *
45   * @author Chris Stillwell
46   */
47  class TagParser {
48  
49      /** List of HtmlTags found on the input line of text. */
50      private final List<HtmlTag> tags = new LinkedList<>();
51  
52      /**
53       * Constructs a TagParser and finds the first tag if any.
54       * @param text the line of text to parse.
55       * @param lineNo the source line number.
56       */
57      TagParser(String[] text, int lineNo) {
58          parseTags(text, lineNo);
59      }
60  
61      /**
62       * Returns the next available HtmlTag.
63       * @return a HtmlTag or {@code null} if none available.
64       * @throws IndexOutOfBoundsException if there are no HtmlTags
65       *         left to return.
66       */
67      public HtmlTag nextTag() {
68          return tags.remove(0);
69      }
70  
71      /**
72       * Indicates if there are any more HtmlTag to retrieve.
73       * @return {@code true} if there are more tags.
74       */
75      public boolean hasNextTag() {
76          return !tags.isEmpty();
77      }
78  
79      /**
80       * Performs lazy initialization on the internal tags List
81       * and adds the tag.
82       * @param tag the HtmlTag to add.
83       */
84      private void add(HtmlTag tag) {
85          tags.add(tag);
86      }
87  
88      /**
89       * Parses the text line for any HTML tags and adds them to the internal
90       * List of tags.
91       * @param text the source line to parse.
92       * @param lineNo the source line number.
93       */
94      private void parseTags(String[] text, int lineNo) {
95          final int nLines = text.length;
96          Point position = findChar(text, '<', new Point(0, 0));
97          while (position.getLineNo() < nLines) {
98              // if this is html comment then skip it
99              if (isCommentTag(text, position)) {
100                 position = skipHtmlComment(text, position);
101             }
102             else if (isTag(text, position)) {
103                 position = parseTag(text, lineNo, nLines, position);
104             }
105             else {
106                 position = getNextCharPos(text, position);
107             }
108             position = findChar(text, '<', position);
109         }
110     }
111 
112     /**
113      * Parses the tag and return position after it.
114      * @param text the source line to parse.
115      * @param lineNo the source line number.
116      * @param nLines line length
117      * @param position start position for parsing
118      * @return position after tag
119      */
120     private Point parseTag(String[] text, int lineNo, final int nLines, Point position) {
121         // find end of tag
122         final Point endTag = findChar(text, '>', position);
123         final boolean incompleteTag = endTag.getLineNo() >= nLines;
124         // get tag id (one word)
125         final String tagId;
126 
127         if (incompleteTag) {
128             tagId = "";
129         }
130         else {
131             tagId = getTagId(text, position);
132         }
133         // is this closed tag
134         final boolean closedTag =
135                 endTag.getLineNo() < nLines
136                  && text[endTag.getLineNo()]
137                  .charAt(endTag.getColumnNo() - 1) == '/';
138         // add new tag
139         add(new HtmlTag(tagId,
140                         position.getLineNo() + lineNo,
141                         position.getColumnNo(),
142                         closedTag,
143                         incompleteTag,
144                         text[position.getLineNo()]));
145         return endTag;
146     }
147 
148     /**
149      * Checks if the given position is start one for HTML tag.
150      * @param javadocText text of javadoc comments.
151      * @param pos position to check.
152      * @return {@code true} some HTML tag starts from given position.
153      */
154     private static boolean isTag(String[] javadocText, Point pos) {
155         final int column = pos.getColumnNo() + 1;
156         final String text = javadocText[pos.getLineNo()];
157 
158         //Character.isJavaIdentifier... may not be a valid HTML
159         //identifier but is valid for generics
160         return column < text.length()
161                 && (Character.isJavaIdentifierStart(text.charAt(column))
162                     || text.charAt(column) == '/')
163                 || column >= text.length();
164     }
165 
166     /**
167      * Parse tag id.
168      * @param javadocText text of javadoc comments.
169      * @param tagStart start position of the tag
170      * @return id for given tag
171      */
172     private static String getTagId(String[] javadocText, Point tagStart) {
173         String tagId = "";
174         int column = tagStart.getColumnNo() + 1;
175         String text = javadocText[tagStart.getLineNo()];
176         if (column < text.length()) {
177             if (text.charAt(column) == '/') {
178                 column++;
179             }
180 
181             text = text.substring(column).trim();
182             int position = 0;
183 
184             //Character.isJavaIdentifier... may not be a valid HTML
185             //identifier but is valid for generics
186             while (position < text.length()
187                     && (Character.isJavaIdentifierStart(text.charAt(position))
188                         || Character.isJavaIdentifierPart(text.charAt(position)))) {
189                 position++;
190             }
191 
192             tagId = text.substring(0, position);
193         }
194         return tagId;
195     }
196 
197     /**
198      * If this is a HTML-comments.
199      * @param text text of javadoc comments
200      * @param pos position to check
201      * @return {@code true} if HTML-comments
202      *         starts form given position.
203      */
204     private static boolean isCommentTag(String[] text, Point pos) {
205         return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo());
206     }
207 
208     /**
209      * Skips HTML comments.
210      * @param text text of javadoc comments.
211      * @param fromPoint start position of HTML-comments
212      * @return position after HTML-comments
213      */
214     private static Point skipHtmlComment(String[] text, Point fromPoint) {
215         Point toPoint = fromPoint;
216         toPoint = findChar(text, '>', toPoint);
217         while (!text[toPoint.getLineNo()]
218                .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) {
219             toPoint = findChar(text, '>', getNextCharPos(text, toPoint));
220         }
221         return toPoint;
222     }
223 
224     /**
225      * Finds next occurrence of given character.
226      * @param text text to search
227      * @param character character to search
228      * @param from position to start search
229      * @return position of next occurrence of given character
230      */
231     private static Point findChar(String[] text, char character, Point from) {
232         Point curr = new Point(from.getLineNo(), from.getColumnNo());
233         while (curr.getLineNo() < text.length
234                && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) {
235             curr = getNextCharPos(text, curr);
236         }
237 
238         return curr;
239     }
240 
241     /**
242      * Returns position of next comment character, skips
243      * whitespaces and asterisks.
244      * @param text to search.
245      * @param from location to search from
246      * @return location of the next character.
247      */
248     private static Point getNextCharPos(String[] text, Point from) {
249         int line = from.getLineNo();
250         int column = from.getColumnNo() + 1;
251         while (line < text.length && column >= text[line].length()) {
252             // go to the next line
253             line++;
254             column = 0;
255             if (line < text.length) {
256                 //skip beginning spaces and stars
257                 final String currentLine = text[line];
258                 while (column < currentLine.length()
259                        && (Character.isWhitespace(currentLine.charAt(column))
260                            || currentLine.charAt(column) == '*')) {
261                     column++;
262                     if (column < currentLine.length()
263                         && currentLine.charAt(column - 1) == '*'
264                         && currentLine.charAt(column) == '/') {
265                         // this is end of comment
266                         column = currentLine.length();
267                     }
268                 }
269             }
270         }
271 
272         return new Point(line, column);
273     }
274 
275     /**
276      * Represents current position in the text.
277      * @author o_sukhodolsky
278      */
279     private static final class Point {
280 
281         /** Line number. */
282         private final int lineNo;
283         /** Column number.*/
284         private final int columnNo;
285 
286         /**
287          * Creates new {@code Point} instance.
288          * @param lineNo line number
289          * @param columnNo column number
290          */
291         Point(int lineNo, int columnNo) {
292             this.lineNo = lineNo;
293             this.columnNo = columnNo;
294         }
295 
296         /**
297          * Getter for line number.
298          * @return line number of the position.
299          */
300         public int getLineNo() {
301             return lineNo;
302         }
303 
304         /**
305          * Getter for column number.
306          * @return column number of the position.
307          */
308         public int getColumnNo() {
309             return columnNo;
310         }
311 
312     }
313 
314 }