View Javadoc
1   /*
2    * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  package com.sun.tools.internal.xjc.reader.internalizer;
27  
28  import com.sun.istack.internal.NotNull;
29  import com.sun.istack.internal.XMLStreamReaderToContentHandler;
30  import com.sun.tools.internal.xjc.ErrorReceiver;
31  import com.sun.tools.internal.xjc.Options;
32  import com.sun.tools.internal.xjc.reader.Const;
33  import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
34  import com.sun.xml.internal.bind.marshaller.DataWriter;
35  import com.sun.xml.internal.bind.v2.util.XmlFactory;
36  import com.sun.xml.internal.xsom.parser.JAXPParser;
37  import com.sun.xml.internal.xsom.parser.XMLParser;
38  import org.w3c.dom.Document;
39  import org.w3c.dom.Element;
40  import org.xml.sax.*;
41  import org.xml.sax.helpers.XMLFilterImpl;
42  
43  import javax.xml.parsers.DocumentBuilder;
44  import javax.xml.parsers.DocumentBuilderFactory;
45  import javax.xml.parsers.ParserConfigurationException;
46  import javax.xml.parsers.SAXParserFactory;
47  import javax.xml.stream.XMLStreamException;
48  import javax.xml.stream.XMLStreamReader;
49  import javax.xml.transform.Source;
50  import javax.xml.transform.Transformer;
51  import javax.xml.transform.TransformerException;
52  import javax.xml.transform.TransformerFactory;
53  import javax.xml.transform.dom.DOMSource;
54  import javax.xml.transform.sax.SAXResult;
55  import javax.xml.transform.sax.SAXSource;
56  import javax.xml.validation.SchemaFactory;
57  import java.io.IOException;
58  import java.io.OutputStream;
59  import java.io.OutputStreamWriter;
60  import java.util.*;
61  
62  import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess;
63  import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
64  
65  
66  /**
67   * Builds a DOM forest and maintains association from
68   * system IDs to DOM trees.
69   *
70   * <p>
71   * A forest is a transitive reflexive closure of referenced documents.
72   * IOW, if a document is in a forest, all the documents referenced from
73   * it is in a forest, too. To support this semantics, {@link DOMForest}
74   * uses {@link InternalizationLogic} to find referenced documents.
75   *
76   * <p>
77   * Some documents are marked as "root"s, meaning those documents were
78   * put into a forest explicitly, not because it is referenced from another
79   * document. (However, a root document can be referenced from other
80   * documents, too.)
81   *
82   * @author
83   *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
84   */
85  public final class DOMForest {
86      /** actual data storage map&lt;SystemId,Document>. */
87      private final Map<String,Document> core = new HashMap<String,Document>();
88  
89      /**
90       * To correctly feed documents to a schema parser, we need to remember
91       * which documents (of the forest) were given as the root
92       * documents, and which of them are read as included/imported
93       * documents.
94       *
95       * <p>
96       * Set of system ids as strings.
97       */
98      private final Set<String> rootDocuments = new HashSet<String>();
99  
100     /** Stores location information for all the trees in this forest. */
101     public final LocatorTable locatorTable = new LocatorTable();
102 
103     /** Stores all the outer-most &lt;jaxb:bindings> customizations. */
104     public final Set<Element> outerMostBindings = new HashSet<Element>();
105 
106     /** Used to resolve references to other schema documents. */
107     private EntityResolver entityResolver = null;
108 
109     /** Errors encountered during the parsing will be sent to this object. */
110     private ErrorReceiver errorReceiver = null;
111 
112     /** Schema language dependent part of the processing. */
113     protected final InternalizationLogic logic;
114 
115     private final SAXParserFactory parserFactory;
116     private final DocumentBuilder documentBuilder;
117 
118     private final Options options;
119 
120     public DOMForest(
121         SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
122         InternalizationLogic logic ) {
123 
124         this.parserFactory = parserFactory;
125         this.documentBuilder = documentBuilder;
126         this.logic = logic;
127         this.options = null;
128     }
129 
130     public DOMForest( InternalizationLogic logic, Options opt ) {
131 
132         if (opt == null) throw new AssertionError("Options object null");
133         this.options = opt;
134 
135         try {
136             DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity);
137             this.documentBuilder = dbf.newDocumentBuilder();
138             this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity);
139         } catch( ParserConfigurationException e ) {
140             throw new AssertionError(e);
141         }
142 
143         this.logic = logic;
144     }
145 
146     /**
147      * Gets the DOM tree associated with the specified system ID,
148      * or null if none is found.
149      */
150     public Document get( String systemId ) {
151         Document doc = core.get(systemId);
152 
153         if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
154             // As of JDK1.4, java.net.URL.toExternal method returns URLs like
155             // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
156             // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
157             // and this descripancy breaks DOM look up by system ID.
158 
159             // this extra check solves this problem.
160             doc = core.get( "file://"+systemId.substring(5) );
161         }
162 
163         if( doc==null && systemId.startsWith("file:") ) {
164             // on Windows, filenames are case insensitive.
165             // perform case-insensitive search for improved user experience
166             String systemPath = getPath(systemId);
167             for (String key : core.keySet()) {
168                 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
169                     doc = core.get(key);
170                     break;
171                 }
172             }
173         }
174 
175         return doc;
176     }
177 
178     /**
179      * Strips off the leading 'file:///' portion from an URL.
180      */
181     private String getPath(String key) {
182         key = key.substring(5); // skip 'file:'
183         while(key.length()>0 && key.charAt(0)=='/') {
184             key = key.substring(1);
185         }
186         return key;
187     }
188 
189     /**
190      * Returns a read-only set of root document system IDs.
191      */
192     public Set<String> getRootDocuments() {
193         return Collections.unmodifiableSet(rootDocuments);
194     }
195 
196     /**
197      * Picks one document at random and returns it.
198      */
199     public Document getOneDocument() {
200         for (Document dom : core.values()) {
201             if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
202                 return dom;
203         }
204         // we should have caught this error very early on
205         throw new AssertionError();
206     }
207 
208     /**
209      * Checks the correctness of the XML Schema documents and return true
210      * if it's OK.
211      *
212      * <p>
213      * This method performs a weaker version of the tests where error messages
214      * are provided without line number information. So whenever possible
215      * use {@link SchemaConstraintChecker}.
216      *
217      * @see SchemaConstraintChecker
218      */
219     public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
220         try {
221             boolean disableXmlSecurity = false;
222             if (options != null) {
223                 disableXmlSecurity = options.disableXmlSecurity;
224             }
225             SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity);
226             ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
227             sf.setErrorHandler(filter);
228             Set<String> roots = getRootDocuments();
229             Source[] sources = new Source[roots.size()];
230             int i=0;
231             for (String root : roots) {
232                 sources[i++] = new DOMSource(get(root),root);
233             }
234             sf.newSchema(sources);
235             return !filter.hadError();
236         } catch (SAXException e) {
237             // the errors should have been reported
238             return false;
239         }
240     }
241 
242     /**
243      * Gets the system ID from which the given DOM is parsed.
244      * <p>
245      * Poor-man's base URI.
246      */
247     public String getSystemId( Document dom ) {
248         for (Map.Entry<String,Document> e : core.entrySet()) {
249             if (e.getValue() == dom)
250                 return e.getKey();
251         }
252         return null;
253     }
254 
255     public Document parse( InputSource source, boolean root ) throws SAXException {
256         if( source.getSystemId()==null )
257             throw new IllegalArgumentException();
258 
259         return parse( source.getSystemId(), source, root );
260     }
261 
262     /**
263      * Parses an XML at the given location (
264      * and XMLs referenced by it) into DOM trees
265      * and stores them to this forest.
266      *
267      * @return the parsed DOM document object.
268      */
269     public Document parse( String systemId, boolean root ) throws SAXException, IOException {
270 
271         systemId = Options.normalizeSystemId(systemId);
272 
273         if( core.containsKey(systemId) )
274             // this document has already been parsed. Just ignore.
275             return core.get(systemId);
276 
277         InputSource is=null;
278 
279         // allow entity resolver to find the actual byte stream.
280         if( entityResolver!=null )
281             is = entityResolver.resolveEntity(null,systemId);
282         if( is==null )
283             is = new InputSource(systemId);
284 
285         // but we still use the original system Id as the key.
286         return parse( systemId, is, root );
287     }
288 
289     /**
290      * Returns a {@link ContentHandler} to feed SAX events into.
291      *
292      * <p>
293      * The client of this class can feed SAX events into the handler
294      * to parse a document into this DOM forest.
295      *
296      * This version requires that the DOM object to be created and registered
297      * to the map beforehand.
298      */
299     private ContentHandler getParserHandler( Document dom ) {
300         ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
301         handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
302         handler = new VersionChecker(handler,errorReceiver,entityResolver);
303 
304         // insert the reference finder so that
305         // included/imported schemas will be also parsed
306         XMLFilterImpl f = logic.createExternalReferenceFinder(this);
307         f.setContentHandler(handler);
308 
309         if(errorReceiver!=null)
310             f.setErrorHandler(errorReceiver);
311         if(entityResolver!=null)
312             f.setEntityResolver(entityResolver);
313 
314         return f;
315     }
316 
317     public interface Handler extends ContentHandler {
318         /**
319          * Gets the DOM that was built.
320          */
321         public Document getDocument();
322     }
323 
324     private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
325     }
326 
327     /**
328      * Returns a {@link ContentHandler} to feed SAX events into.
329      *
330      * <p>
331      * The client of this class can feed SAX events into the handler
332      * to parse a document into this DOM forest.
333      */
334     public Handler getParserHandler( String systemId, boolean root ) {
335         final Document dom = documentBuilder.newDocument();
336         core.put( systemId, dom );
337         if(root)
338             rootDocuments.add(systemId);
339 
340         ContentHandler handler = getParserHandler(dom);
341 
342         // we will register the DOM to the map once the system ID becomes available.
343         // but the SAX allows the event source to not to provide that information,
344         // so be prepared for such case.
345         HandlerImpl x = new HandlerImpl() {
346             public Document getDocument() {
347                 return dom;
348             }
349         };
350         x.setContentHandler(handler);
351 
352         return x;
353    }
354 
355     /**
356      * Parses the given document and add it to the DOM forest.
357      *
358      * @return
359      *      null if there was a parse error. otherwise non-null.
360      */
361     public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
362         Document dom = documentBuilder.newDocument();
363 
364         systemId = Options.normalizeSystemId(systemId);
365 
366         // put into the map before growing a tree, to
367         // prevent recursive reference from causing infinite loop.
368         core.put( systemId, dom );
369         if(root)
370             rootDocuments.add(systemId);
371 
372         try {
373             XMLReader reader = parserFactory.newSAXParser().getXMLReader();
374             reader.setContentHandler(getParserHandler(dom));
375             if(errorReceiver!=null)
376                 reader.setErrorHandler(errorReceiver);
377             if(entityResolver!=null)
378                 reader.setEntityResolver(entityResolver);
379             reader.parse(inputSource);
380         } catch( ParserConfigurationException e ) {
381             // in practice, this exception won't happen.
382             errorReceiver.error(e.getMessage(),e);
383             core.remove(systemId);
384             rootDocuments.remove(systemId);
385             return null;
386         } catch( IOException e ) {
387             errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
388             core.remove(systemId);
389             rootDocuments.remove(systemId);
390             return null;
391         }
392 
393         return dom;
394     }
395 
396     public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
397         Document dom = documentBuilder.newDocument();
398 
399         systemId = Options.normalizeSystemId(systemId);
400 
401         if(root)
402             rootDocuments.add(systemId);
403 
404         if(systemId==null)
405             throw new IllegalArgumentException("system id cannot be null");
406         core.put( systemId, dom );
407 
408         new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();
409 
410         return dom;
411     }
412 
413     /**
414      * Performs internalization.
415      *
416      * This method should be called only once, only after all the
417      * schemas are parsed.
418      *
419      * @return
420      *      the returned bindings need to be applied after schema
421      *      components are built.
422      */
423     public SCDBasedBindingSet transform(boolean enableSCD) {
424         return Internalizer.transform(this, enableSCD, options.disableXmlSecurity);
425     }
426 
427     /**
428      * Performs the schema correctness check by using JAXP 1.3.
429      *
430      * <p>
431      * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
432      * doesn't handle inclusions very correctly (it ends up parsing it
433      * from its original source, not in this tree), and because
434      * it doesn't handle two documents for the same namespace very
435      * well.
436      *
437      * <p>
438      * We should eventually fix JAXP (and Xerces), but meanwhile
439      * this weaker and potentially wrong correctness check is still
440      * better than nothing when used inside JAX-WS (JAXB CLI and Ant
441      * does a better job of checking this.)
442      *
443      * <p>
444      * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
445      */
446     public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
447         List<SAXSource> sources = new ArrayList<SAXSource>();
448         for( String systemId : getRootDocuments() ) {
449             Document dom = get(systemId);
450             if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
451                 continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
452 
453             SAXSource ss = createSAXSource(systemId);
454             try {
455                 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
456             } catch (SAXException e) {
457                 throw new AssertionError(e);    // Xerces wants this. See 6395322.
458             }
459             sources.add(ss);
460         }
461 
462         try {
463             allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0]));
464         } catch (SAXException e) {
465             // error should have been reported.
466         } catch (RuntimeException re) {
467             // JAXP RI isn't very trustworthy when it comes to schema error check,
468             // and we know some cases where it just dies with NPE. So handle it gracefully.
469             // this masks a bug in the JAXP RI, but we need a release that we have to make.
470             try {
471                 sf.getErrorHandler().warning(
472                     new SAXParseException(Messages.format(
473                         Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()),
474                         null,null,-1,-1,re));
475             } catch (SAXException e) {
476                 // ignore
477             }
478         }
479     }
480 
481     /**
482      * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
483      * (instead of parsing the original source identified by the system ID.)
484      */
485     public @NotNull SAXSource createSAXSource(String systemId) {
486         ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
487             // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
488             // handlers, since SAX allows handlers to be changed while parsing.
489             @Override
490             public void parse(InputSource input) throws SAXException, IOException {
491                 createParser().parse(input, this, this, this);
492             }
493 
494             @Override
495             public void parse(String systemId) throws SAXException, IOException {
496                 parse(new InputSource(systemId));
497             }
498         });
499 
500         return new SAXSource(reader,new InputSource(systemId));
501     }
502 
503     /**
504      * Creates {@link XMLParser} for XSOM which reads documents from
505      * this DOMForest rather than doing a fresh parse.
506      *
507      * The net effect is that XSOM will read transformed XML Schemas
508      * instead of the original documents.
509      */
510     public XMLParser createParser() {
511         return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity)));
512     }
513 
514     public EntityResolver getEntityResolver() {
515         return entityResolver;
516     }
517 
518     public void setEntityResolver(EntityResolver entityResolver) {
519         this.entityResolver = entityResolver;
520     }
521 
522     public ErrorReceiver getErrorHandler() {
523         return errorReceiver;
524     }
525 
526     public void setErrorHandler(ErrorReceiver errorHandler) {
527         this.errorReceiver = errorHandler;
528     }
529 
530     /**
531      * Gets all the parsed documents.
532      */
533     public Document[] listDocuments() {
534         return core.values().toArray(new Document[core.size()]);
535     }
536 
537     /**
538      * Gets all the system IDs of the documents.
539      */
540     public String[] listSystemIDs() {
541         return core.keySet().toArray(new String[core.keySet().size()]);
542     }
543 
544     /**
545      * Dumps the contents of the forest to the specified stream.
546      *
547      * This is a debug method. As such, error handling is sloppy.
548      */
549     @SuppressWarnings("CallToThreadDumpStack")
550     public void dump( OutputStream out ) throws IOException {
551         try {
552             // create identity transformer
553             boolean disableXmlSecurity = false;
554             if (options != null) {
555                 disableXmlSecurity = options.disableXmlSecurity;
556             }
557             TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity);
558             Transformer it = tf.newTransformer();
559 
560             for (Map.Entry<String, Document> e : core.entrySet()) {
561                 out.write( ("---<< "+e.getKey()+'\n').getBytes() );
562 
563                 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
564                 dw.setIndentStep("  ");
565                 it.transform( new DOMSource(e.getValue()),
566                     new SAXResult(dw));
567 
568                 out.write( "\n\n\n".getBytes() );
569             }
570         } catch( TransformerException e ) {
571             e.printStackTrace();
572         }
573     }
574 }