View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.internal.utils;
21  
22  import static com.google.common.truth.Truth.assertWithMessage;
23  
24  import java.io.IOException;
25  import java.io.StringReader;
26  import java.util.LinkedHashSet;
27  import java.util.Set;
28  
29  import javax.xml.parsers.DocumentBuilder;
30  import javax.xml.parsers.DocumentBuilderFactory;
31  import javax.xml.parsers.ParserConfigurationException;
32  
33  import org.w3c.dom.Document;
34  import org.w3c.dom.NamedNodeMap;
35  import org.w3c.dom.Node;
36  import org.xml.sax.InputSource;
37  import org.xml.sax.SAXException;
38  
39  import com.puppycrawl.tools.checkstyle.XmlLoader;
40  
41  /**
42   * XmlUtil.
43   *
44   * @noinspection ClassOnlyUsedInOnePackage
45   * @noinspectionreason ClassOnlyUsedInOnePackage - class is internal tool, and only used in testing
46   */
47  public final class XmlUtil {
48  
49      private XmlUtil() {
50      }
51  
52      public static Document getRawXml(String fileName, String code, String unserializedSource)
53              throws ParserConfigurationException {
54          Document rawXml = null;
55          try {
56              final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
57              factory.setValidating(false);
58              factory.setFeature(
59                      XmlLoader.LoadExternalDtdFeatureProvider.EXTERNAL_GENERAL_ENTITIES, false);
60              factory.setFeature(
61                      XmlLoader.LoadExternalDtdFeatureProvider.LOAD_EXTERNAL_DTD, false);
62  
63              final DocumentBuilder builder = factory.newDocumentBuilder();
64  
65              rawXml = builder.parse(new InputSource(new StringReader(code)));
66          }
67          catch (IOException | SAXException ex) {
68              assertWithMessage(fileName + " has invalid xml (" + ex.getMessage() + "): "
69                      + unserializedSource).fail();
70          }
71  
72          return rawXml;
73      }
74  
75      public static Set<Node> getChildrenElements(Node node) {
76          final Set<Node> result = new LinkedHashSet<>();
77  
78          for (Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
79              if (child.getNodeType() != Node.TEXT_NODE) {
80                  result.add(child);
81              }
82          }
83  
84          return result;
85      }
86  
87      public static Node getFirstChildElement(Node node) {
88          Node firstChildElement = null;
89          for (Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
90              if (child.getNodeType() != Node.TEXT_NODE) {
91                  firstChildElement = child;
92                  break;
93              }
94          }
95  
96          return firstChildElement;
97      }
98  
99      /**
100      * Returns the {@code Node} that has an id attribute with the given value.
101      * The id should be unique within the Xml Document.
102      *
103      * @param node to retrieve information.
104      * @param id the unique {@code id} value for a node.
105      * @return the matching node or {@code null} if none matches.
106      */
107     public static Node findChildElementById(Node node, String id) {
108         Node childElement = null;
109         for (Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
110             final NamedNodeMap attributes = child.getAttributes();
111             if (attributes != null) {
112                 final Node attribute = attributes.getNamedItem("id");
113                 if (attribute != null && id.equals(attribute.getNodeValue())) {
114                     childElement = child;
115                     break;
116                 }
117             }
118         }
119 
120         return childElement;
121     }
122 
123     public static Set<Node> findChildElementsByTag(Node node, String tag) {
124         final Set<Node> result = new LinkedHashSet<>();
125 
126         for (Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
127             if (tag.equals(child.getNodeName())) {
128                 result.add(child);
129             }
130             else if (child.hasChildNodes()) {
131                 result.addAll(findChildElementsByTag(child, tag));
132             }
133         }
134 
135         return result;
136     }
137 
138     /**
139      * Returns the value of the "name" attribute for the given node.
140      *
141      * @param node to retrieve the name
142      * @return the value of the attribute "name"
143      */
144     public static String getNameAttributeOfNode(Node node) {
145         return node.getAttributes().getNamedItem("name").getNodeValue();
146     }
147 
148     /**
149      * <p>Sanitizes the given string for safe use in XML documents.</p>
150      * <ul>
151      * <li>Removes all whitespaces at the beginning and at the end of the string;</li>
152      * <li>Replaces repeated whitespaces in the middle of the string with a single space;</li>
153      * <li>Replaces XML entities with escaped values.</li>
154      * </ul>
155      *
156      * @param rawXml the text to sanitize
157      * @return the sanitized text
158      */
159     public static String sanitizeXml(String rawXml) {
160         return rawXml
161                 .replaceAll("(^\\s+|\\s+$)", "")
162                 .replaceAll("\\s+", " ")
163                 .replace("<", "&lt;")
164                 .replace(">", "&gt;");
165     }
166 
167 }