View Javadoc
1   /*
2    * reserved comment block
3    * DO NOT REMOVE OR ALTER!
4    */
5   /*
6    * Copyright 1999-2004 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  /*
21   * $Id: SAX2RTFDTM.java,v 1.2.4.1 2005/09/15 08:15:13 suresh_emailid Exp $
22   */
23  package com.sun.org.apache.xml.internal.dtm.ref.sax2dtm;
24  
25  import javax.xml.transform.Source;
26  
27  import com.sun.org.apache.xml.internal.dtm.DTM;
28  import com.sun.org.apache.xml.internal.dtm.DTMManager;
29  import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
30  import com.sun.org.apache.xml.internal.utils.IntStack;
31  import com.sun.org.apache.xml.internal.utils.IntVector;
32  import com.sun.org.apache.xml.internal.utils.StringVector;
33  import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
34  
35  import org.xml.sax.SAXException;
36  
37  /**
38   * This is a subclass of SAX2DTM which has been modified to meet the needs of
39   * Result Tree Frameworks (RTFs). The differences are:
40   *
41   * 1) Multiple XML trees may be appended to the single DTM. This means
42   * that the root node of each document is _not_ node 0. Some code has
43   * had to be deoptimized to support this mode of operation, and an
44   * explicit mechanism for obtaining the Node Handle of the root node
45   * has been provided.
46   *
47   * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
48   * most recently added trees off the end of the DTM as stylesheet elements
49   * (and thus variable contexts) are exited.
50   *
51   * PLEASE NOTE that this class may be _heavily_ dependent upon the
52   * internals of the SAX2DTM superclass, and must be maintained in
53   * parallel with that code.  Arguably, they should be conditionals
54   * within a single class... but they have deen separated for
55   * performance reasons. (In fact, one could even argue about which is
56   * the superclass and which is the subclass; the current arrangement
57   * is as much about preserving stability of existing code during
58   * development as anything else.)
59   *
60   * %REVIEW% In fact, since the differences are so minor, I think it
61   * may be possible/practical to fold them back into the base
62   * SAX2DTM. Consider that as a future code-size optimization.
63   * */
64  public class SAX2RTFDTM extends SAX2DTM
65  {
66    /** Set true to monitor SAX events and similar diagnostic info. */
67    private static final boolean DEBUG = false;
68  
69    /** Most recently started Document, or null if the DTM is empty.  */
70    private int m_currentDocumentNode=NULL;
71  
72    /** Tail-pruning mark: Number of nodes in use */
73    IntStack mark_size=new IntStack();
74    /** Tail-pruning mark: Number of data items in use */
75    IntStack mark_data_size=new IntStack();
76    /** Tail-pruning mark: Number of size-of-data fields in use */
77    IntStack mark_char_size=new IntStack();
78    /** Tail-pruning mark: Number of dataOrQName slots in use */
79    IntStack mark_doq_size=new IntStack();
80    /** Tail-pruning mark: Number of namespace declaration sets in use
81     * %REVIEW% I don't think number of NS sets is ever different from number
82     * of NS elements. We can probabably reduce these to a single stack and save
83     * some storage.
84     * */
85    IntStack mark_nsdeclset_size=new IntStack();
86    /** Tail-pruning mark: Number of naespace declaration elements in use
87     * %REVIEW% I don't think number of NS sets is ever different from number
88     * of NS elements. We can probabably reduce these to a single stack and save
89     * some storage.
90     */
91    IntStack mark_nsdeclelem_size=new IntStack();
92  
93    /**
94     * Tail-pruning mark:  initial number of nodes in use
95     */
96    int m_emptyNodeCount;
97  
98    /**
99     * Tail-pruning mark:  initial number of namespace declaration sets
100    */
101   int m_emptyNSDeclSetCount;
102 
103   /**
104    * Tail-pruning mark:  initial number of namespace declaration elements
105    */
106   int m_emptyNSDeclSetElemsCount;
107 
108   /**
109    * Tail-pruning mark:  initial number of data items in use
110    */
111   int m_emptyDataCount;
112 
113   /**
114    * Tail-pruning mark:  initial number of characters in use
115    */
116   int m_emptyCharsCount;
117 
118   /**
119    * Tail-pruning mark:  default initial number of dataOrQName slots in use
120    */
121   int m_emptyDataQNCount;
122 
123   public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
124                  DTMWSFilter whiteSpaceFilter,
125                  XMLStringFactory xstringfactory,
126                  boolean doIndexing)
127   {
128     super(mgr, source, dtmIdentity, whiteSpaceFilter,
129           xstringfactory, doIndexing);
130 
131     // NEVER track source locators for RTFs; they aren't meaningful. I think.
132     // (If we did track them, we'd need to tail-prune these too.)
133     //com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.m_source_location;
134     m_useSourceLocationProperty=false;
135     m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
136                                                      : null;
137     m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
138     m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
139 
140     // Record initial sizes of fields that are pushed and restored
141     // for RTF tail-pruning.  More entries can be popped than pushed, so
142     // we need this to mark the primordial state of the DTM.
143     m_emptyNodeCount = m_size;
144     m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
145                                  ? 0 : m_namespaceDeclSets.size();
146     m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
147                                       ? 0 : m_namespaceDeclSetElements.size();
148     m_emptyDataCount = m_data.size();
149     m_emptyCharsCount = m_chars.size();
150     m_emptyDataQNCount = m_dataOrQName.size();
151   }
152 
153   /**
154    * Given a DTM, find the owning document node. In the case of
155    * SAX2RTFDTM, which may contain multiple documents, this returns
156    * the <b>most recently started</b> document, or null if the DTM is
157    * empty or no document is currently under construction.
158    *
159    * %REVIEW% Should we continue to report the most recent after
160    * construction has ended? I think not, given that it may have been
161    * tail-pruned.
162    *
163    *  @return int Node handle of Document node, or null if this DTM does not
164    *  contain an "active" document.
165    * */
166   public int getDocument()
167   {
168     return makeNodeHandle(m_currentDocumentNode);
169   }
170 
171   /**
172    * Given a node handle, find the owning document node, using DTM semantics
173    * (Document owns itself) rather than DOM semantics (Document has no owner).
174    *
175    * (I'm counting on the fact that getOwnerDocument() is implemented on top
176    * of this call, in the superclass, to avoid having to rewrite that one.
177    * Be careful if that code changes!)
178    *
179    * @param nodeHandle the id of the node.
180    * @return int Node handle of owning document
181    */
182   public int getDocumentRoot(int nodeHandle)
183   {
184     for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
185       if (_type(id)==DTM.DOCUMENT_NODE) {
186         return makeNodeHandle(id);
187       }
188     }
189 
190     return DTM.NULL; // Safety net; should never happen
191   }
192 
193   /**
194    * Given a node identifier, find the owning document node.  Unlike the DOM,
195    * this considers the owningDocument of a Document to be itself. Note that
196    * in shared DTMs this may not be zero.
197    *
198    * @param nodeIdentifier the id of the starting node.
199    * @return int Node identifier of the root of this DTM tree
200    */
201   protected int _documentRoot(int nodeIdentifier)
202   {
203     if(nodeIdentifier==NULL) return NULL;
204 
205     for (int parent=_parent(nodeIdentifier);
206          parent!=NULL;
207          nodeIdentifier=parent,parent=_parent(nodeIdentifier))
208       ;
209 
210     return nodeIdentifier;
211   }
212 
213   /**
214    * Receive notification of the beginning of a new RTF document.
215    *
216    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
217    * might want to consider folding the start/endDocument changes back
218    * into the main SAX2DTM so we don't have to expose so many fields
219    * (even as Protected) and carry the additional code.
220    *
221    * @throws SAXException Any SAX exception, possibly
222    *            wrapping another exception.
223    * @see org.xml.sax.ContentHandler#startDocument
224    * */
225   public void startDocument() throws SAXException
226   {
227     // Re-initialize the tree append process
228     m_endDocumentOccured = false;
229     m_prefixMappings = new java.util.Vector();
230     m_contextIndexes = new IntStack();
231     m_parents = new IntStack();
232 
233     m_currentDocumentNode=m_size;
234     super.startDocument();
235   }
236 
237   /**
238    * Receive notification of the end of the document.
239    *
240    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
241    * might want to consider folding the start/endDocument changes back
242    * into the main SAX2DTM so we don't have to expose so many fields
243    * (even as Protected).
244    *
245    * @throws SAXException Any SAX exception, possibly
246    *            wrapping another exception.
247    * @see org.xml.sax.ContentHandler#endDocument
248    * */
249   public void endDocument() throws SAXException
250   {
251     charactersFlush();
252 
253     m_nextsib.setElementAt(NULL,m_currentDocumentNode);
254 
255     if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
256       m_firstch.setElementAt(NULL,m_currentDocumentNode);
257 
258     if (DTM.NULL != m_previous)
259       m_nextsib.setElementAt(DTM.NULL,m_previous);
260 
261     m_parents = null;
262     m_prefixMappings = null;
263     m_contextIndexes = null;
264 
265     m_currentDocumentNode= NULL; // no longer open
266     m_endDocumentOccured = true;
267   }
268 
269 
270   /** "Tail-pruning" support for RTFs.
271    *
272    * This function pushes information about the current size of the
273    * DTM's data structures onto a stack, for use by popRewindMark()
274    * (which see).
275    *
276    * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
277    * RTFs will not be indexed, so I can simply panic if that case
278    * arises. Hey, it works...
279    * */
280   public void pushRewindMark()
281   {
282     if(m_indexing || m_elemIndexes!=null)
283       throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
284 
285     // Values from DTMDefaultBase
286     // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
287     mark_size.push(m_size);
288     mark_nsdeclset_size.push((m_namespaceDeclSets==null)
289                                    ? 0
290                                    : m_namespaceDeclSets.size());
291     mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
292                                    ? 0
293                                    : m_namespaceDeclSetElements.size());
294 
295     // Values from SAX2DTM
296     mark_data_size.push(m_data.size());
297     mark_char_size.push(m_chars.size());
298     mark_doq_size.push(m_dataOrQName.size());
299   }
300 
301   /** "Tail-pruning" support for RTFs.
302    *
303    * This function pops the information previously saved by
304    * pushRewindMark (which see) and uses it to discard all nodes added
305    * to the DTM after that time. We expect that this will allow us to
306    * reuse storage more effectively.
307    *
308    * This is _not_ intended to be called while a document is still being
309    * constructed -- only between endDocument and the next startDocument
310    *
311    * %REVIEW% WARNING: This is the first use of some of the truncation
312    * methods.  If Xalan blows up after this is called, that's a likely
313    * place to check.
314    *
315    * %REVIEW% Our original design for DTMs permitted them to share
316    * string pools.  If there any risk that this might be happening, we
317    * can _not_ rewind and recover the string storage. One solution
318    * might to assert that DTMs used for RTFs Must Not take advantage
319    * of that feature, but this seems excessively fragile. Another, much
320    * less attractive, would be to just let them leak... Nah.
321    *
322    * @return true if and only if the pop completely emptied the
323    * RTF. That response is used when determining how to unspool
324    * RTF-started-while-RTF-open situations.
325    * */
326   public boolean popRewindMark()
327   {
328     boolean top=mark_size.empty();
329 
330     m_size=top ? m_emptyNodeCount : mark_size.pop();
331     m_exptype.setSize(m_size);
332     m_firstch.setSize(m_size);
333     m_nextsib.setSize(m_size);
334     m_prevsib.setSize(m_size);
335     m_parent.setSize(m_size);
336 
337     m_elemIndexes=null;
338 
339     int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
340     if (m_namespaceDeclSets!=null) {
341       m_namespaceDeclSets.setSize(ds);
342     }
343 
344     int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
345     if (m_namespaceDeclSetElements!=null) {
346       m_namespaceDeclSetElements.setSize(ds1);
347     }
348 
349     // Values from SAX2DTM - m_data always has a reserved entry
350     m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
351     m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
352     m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
353 
354     // Return true iff DTM now empty
355     return m_size==0;
356   }
357 
358   /** @return true if a DTM tree is currently under construction.
359    * */
360   public boolean isTreeIncomplete()
361   {
362     return !m_endDocumentOccured;
363   }
364 }