View Javadoc
1   /*
2    * reserved comment block
3    * DO NOT REMOVE OR ALTER!
4    */
5   /*
6    * Copyright 1999-2004 The Apache Software Foundation.
7    *
8    * Licensed under the Apache License, Version 2.0 (the "License");
9    * you may not use this file except in compliance with the License.
10   * You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  /*
21   * $Id: DOM2DTM.java,v 1.2.4.1 2005/09/15 08:15:10 suresh_emailid Exp $
22   */
23  package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;
24  
25  import java.util.Vector;
26  
27  import javax.xml.transform.SourceLocator;
28  import javax.xml.transform.dom.DOMSource;
29  
30  import com.sun.org.apache.xml.internal.dtm.DTM;
31  import com.sun.org.apache.xml.internal.dtm.DTMManager;
32  import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
33  import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;
34  import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
35  import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;
36  import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;
37  import com.sun.org.apache.xml.internal.res.XMLErrorResources;
38  import com.sun.org.apache.xml.internal.res.XMLMessages;
39  import com.sun.org.apache.xml.internal.utils.FastStringBuffer;
40  import com.sun.org.apache.xml.internal.utils.QName;
41  import com.sun.org.apache.xml.internal.utils.StringBufferPool;
42  import com.sun.org.apache.xml.internal.utils.TreeWalker;
43  import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;
44  import com.sun.org.apache.xml.internal.utils.XMLString;
45  import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
46  import org.w3c.dom.Attr;
47  import org.w3c.dom.Document;
48  import org.w3c.dom.DocumentType;
49  import org.w3c.dom.Element;
50  import org.w3c.dom.Entity;
51  import org.w3c.dom.NamedNodeMap;
52  import org.w3c.dom.Node;
53  import org.xml.sax.ContentHandler;
54  
55  /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
56   * DTM API.
57   *
58   * Note that it doesn't necessarily represent a full Document
59   * tree. You can wrap a DOM2DTM around a specific node and its subtree
60   * and the right things should happen. (I don't _think_ we currently
61   * support DocumentFrgment nodes as roots, though that might be worth
62   * considering.)
63   *
64   * Note too that we do not currently attempt to track document
65   * mutation. If you alter the DOM after wrapping DOM2DTM around it,
66   * all bets are off.
67   * */
68  public class DOM2DTM extends DTMDefaultBaseIterators
69  {
70    static final boolean JJK_DEBUG=false;
71    static final boolean JJK_NEWCODE=true;
72  
73    /** Manefest constant
74     */
75    static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
76  
77    /** The current position in the DOM tree. Last node examined for
78     * possible copying to DTM. */
79    transient private Node m_pos;
80    /** The current position in the DTM tree. Who children get appended to. */
81    private int m_last_parent=0;
82    /** The current position in the DTM tree. Who children reference as their
83     * previous sib. */
84    private int m_last_kid=NULL;
85  
86    /** The top of the subtree.
87     * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
88     * */
89    transient private Node m_root;
90  
91    /** True iff the first element has been processed. This is used to control
92        synthesis of the implied xml: namespace declaration node. */
93    boolean m_processedFirstElement=false;
94  
95    /** true if ALL the nodes in the m_root subtree have been processed;
96     * false if our incremental build has not yet finished scanning the
97     * DOM tree.  */
98    transient private boolean m_nodesAreProcessed;
99  
100   /** The node objects.  The instance part of the handle indexes
101    * directly into this vector.  Each DTM node may actually be
102    * composed of several DOM nodes (for example, if logically-adjacent
103    * Text/CDATASection nodes in the DOM have been coalesced into a
104    * single DTM Text node); this table points only to the first in
105    * that sequence. */
106   protected Vector m_nodes = new Vector();
107 
108   /**
109    * Construct a DOM2DTM object from a DOM node.
110    *
111    * @param mgr The DTMManager who owns this DTM.
112    * @param domSource the DOM source that this DTM will wrap.
113    * @param dtmIdentity The DTM identity ID for this DTM.
114    * @param whiteSpaceFilter The white space filter for this DTM, which may
115    *                         be null.
116    * @param xstringfactory XMLString factory for creating character content.
117    * @param doIndexing true if the caller considers it worth it to use
118    *                   indexing schemes.
119    */
120   public DOM2DTM(DTMManager mgr, DOMSource domSource,
121                  int dtmIdentity, DTMWSFilter whiteSpaceFilter,
122                  XMLStringFactory xstringfactory,
123                  boolean doIndexing)
124   {
125     super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
126           xstringfactory, doIndexing);
127 
128     // Initialize DOM navigation
129     m_pos=m_root = domSource.getNode();
130     // Initialize DTM navigation
131     m_last_parent=m_last_kid=NULL;
132     m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
133 
134     // Apparently the domSource root may not actually be the
135     // Document node. If it's an Element node, we need to immediately
136     // add its attributes. Adapted from nextNode().
137     // %REVIEW% Move this logic into addNode and recurse? Cleaner!
138     //
139     // (If it's an EntityReference node, we're probably scrod. For now
140     // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
141                 //
142                 // %ISSUE% What about inherited namespaces in this case?
143                 // Do we need to special-case initialize them into the DTM model?
144     if(ELEMENT_NODE == m_root.getNodeType())
145     {
146       NamedNodeMap attrs=m_root.getAttributes();
147       int attrsize=(attrs==null) ? 0 : attrs.getLength();
148       if(attrsize>0)
149       {
150         int attrIndex=NULL; // start with no previous sib
151         for(int i=0;i<attrsize;++i)
152         {
153           // No need to force nodetype in this case;
154           // addNode() will take care of switching it from
155           // Attr to Namespace if necessary.
156           attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
157           m_firstch.setElementAt(DTM.NULL,attrIndex);
158         }
159         // Terminate list of attrs, and make sure they aren't
160         // considered children of the element
161         m_nextsib.setElementAt(DTM.NULL,attrIndex);
162 
163         // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
164       } // if attrs exist
165     } //if(ELEMENT_NODE)
166 
167     // Initialize DTM-completed status
168     m_nodesAreProcessed = false;
169   }
170 
171   /**
172    * Construct the node map from the node.
173    *
174    * @param node The node that is to be added to the DTM.
175    * @param parentIndex The current parent index.
176    * @param previousSibling The previous sibling index.
177    * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
178    *    Used to force nodes to Text rather than CDATASection when their
179    *    coalesced value includes ordinary Text nodes (current DTM behavior).
180    *
181    * @return The index identity of the node that was added.
182    */
183   protected int addNode(Node node, int parentIndex,
184                         int previousSibling, int forceNodeType)
185   {
186     int nodeIndex = m_nodes.size();
187 
188     // Have we overflowed a DTM Identity's addressing range?
189     if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
190     {
191       try
192       {
193         if(m_mgr==null)
194           throw new ClassCastException();
195 
196                                 // Handle as Extended Addressing
197         DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
198         int id=mgrD.getFirstFreeDTMID();
199         mgrD.addDTM(this,id,nodeIndex);
200         m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
201       }
202       catch(ClassCastException e)
203       {
204         // %REVIEW% Wrong error message, but I've been told we're trying
205         // not to add messages right not for I18N reasons.
206         // %REVIEW% Should this be a Fatal Error?
207         error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
208       }
209     }
210 
211     m_size++;
212     // ensureSize(nodeIndex);
213 
214     int type;
215     if(NULL==forceNodeType)
216         type = node.getNodeType();
217     else
218         type=forceNodeType;
219 
220     // %REVIEW% The Namespace Spec currently says that Namespaces are
221     // processed in a non-namespace-aware manner, by matching the
222     // QName, even though there is in fact a namespace assigned to
223     // these nodes in the DOM. If and when that changes, we will have
224     // to consider whether we check the namespace-for-namespaces
225     // rather than the node name.
226     //
227     // %TBD% Note that the DOM does not necessarily explicitly declare
228     // all the namespaces it uses. DOM Level 3 will introduce a
229     // namespace-normalization operation which reconciles that, and we
230     // can request that users invoke it or otherwise ensure that the
231     // tree is namespace-well-formed before passing the DOM to Xalan.
232     // But if they don't, what should we do about it? We probably
233     // don't want to alter the source DOM (and may not be able to do
234     // so if it's read-only). The best available answer might be to
235     // synthesize additional DTM Namespace Nodes that don't correspond
236     // to DOM Attr Nodes.
237     if (Node.ATTRIBUTE_NODE == type)
238     {
239       String name = node.getNodeName();
240 
241       if (name.startsWith("xmlns:") || name.equals("xmlns"))
242       {
243         type = DTM.NAMESPACE_NODE;
244       }
245     }
246 
247     m_nodes.addElement(node);
248 
249     m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
250     m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
251     m_prevsib.setElementAt(previousSibling,nodeIndex);
252     m_parent.setElementAt(parentIndex,nodeIndex);
253 
254     if(DTM.NULL != parentIndex &&
255        type != DTM.ATTRIBUTE_NODE &&
256        type != DTM.NAMESPACE_NODE)
257     {
258       // If the DTM parent had no children, this becomes its first child.
259       if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
260         m_firstch.setElementAt(nodeIndex,parentIndex);
261     }
262 
263     String nsURI = node.getNamespaceURI();
264 
265     // Deal with the difference between Namespace spec and XSLT
266     // definitions of local name. (The former says PIs don't have
267     // localnames; the latter says they do.)
268     String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ?
269                          node.getNodeName() :
270                          node.getLocalName();
271 
272     // Hack to make DOM1 sort of work...
273     if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
274         && null == localName)
275       localName = node.getNodeName(); // -sb
276 
277     ExpandedNameTable exnt = m_expandedNameTable;
278 
279     // %TBD% Nodes created with the old non-namespace-aware DOM
280     // calls createElement() and createAttribute() will never have a
281     // localname. That will cause their expandedNameID to be just the
282     // nodeType... which will keep them from being matched
283     // successfully by name. Since the DOM makes no promise that
284     // those will participate in namespace processing, this is
285     // officially accepted as Not Our Fault. But it might be nice to
286     // issue a diagnostic message!
287     if(node.getLocalName()==null &&
288        (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
289       {
290         // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
291       }
292 
293     int expandedNameID = (null != localName)
294        ? exnt.getExpandedTypeID(nsURI, localName, type) :
295          exnt.getExpandedTypeID(type);
296 
297     m_exptype.setElementAt(expandedNameID,nodeIndex);
298 
299     indexNode(expandedNameID, nodeIndex);
300 
301     if (DTM.NULL != previousSibling)
302       m_nextsib.setElementAt(nodeIndex,previousSibling);
303 
304     // This should be done after m_exptype has been set, and probably should
305     // always be the last thing we do
306     if (type == DTM.NAMESPACE_NODE)
307         declareNamespaceInContext(parentIndex,nodeIndex);
308 
309     return nodeIndex;
310   }
311 
312   /**
313    * Get the number of nodes that have been added.
314    */
315   public int getNumberOfNodes()
316   {
317     return m_nodes.size();
318   }
319 
320  /**
321    * This method iterates to the next node that will be added to the table.
322    * Each call to this method adds a new node to the table, unless the end
323    * is reached, in which case it returns null.
324    *
325    * @return The true if a next node is found or false if
326    *         there are no more nodes.
327    */
328   protected boolean nextNode()
329   {
330     // Non-recursive one-fetch-at-a-time depth-first traversal with
331     // attribute/namespace nodes and white-space stripping.
332     // Navigating the DOM is simple, navigating the DTM is simple;
333     // keeping track of both at once is a trifle baroque but at least
334     // we've avoided most of the special cases.
335     if (m_nodesAreProcessed)
336       return false;
337 
338     // %REVIEW% Is this local copy Really Useful from a performance
339     // point of view?  Or is this a false microoptimization?
340     Node pos=m_pos;
341     Node next=null;
342     int nexttype=NULL;
343 
344     // Navigate DOM tree
345     do
346       {
347         // Look down to first child.
348         if (pos.hasChildNodes())
349           {
350             next = pos.getFirstChild();
351 
352             // %REVIEW% There's probably a more elegant way to skip
353             // the doctype. (Just let it go and Suppress it?
354             if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
355               next=next.getNextSibling();
356 
357             // Push DTM context -- except for children of Entity References,
358             // which have no DTM equivalent and cause no DTM navigation.
359             if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
360               {
361                 m_last_parent=m_last_kid;
362                 m_last_kid=NULL;
363                 // Whitespace-handler context stacking
364                 if(null != m_wsfilter)
365                 {
366                   short wsv =
367                     m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
368                   boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
369                     ? getShouldStripWhitespace()
370                     : (DTMWSFilter.STRIP == wsv);
371                   pushShouldStripWhitespace(shouldStrip);
372                 } // if(m_wsfilter)
373               }
374           }
375 
376         // If that fails, look up and right (but not past root!)
377         else
378           {
379             if(m_last_kid!=NULL)
380               {
381                 // Last node posted at this level had no more children
382                 // If it has _no_ children, we need to record that.
383                 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
384                   m_firstch.setElementAt(NULL,m_last_kid);
385               }
386 
387             while(m_last_parent != NULL)
388               {
389                 // %REVIEW% There's probably a more elegant way to
390                 // skip the doctype. (Just let it go and Suppress it?
391                 next = pos.getNextSibling();
392                 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
393                   next=next.getNextSibling();
394 
395                 if(next!=null)
396                   break; // Found it!
397 
398                 // No next-sibling found. Pop the DOM.
399                 pos=pos.getParentNode();
400                 if(pos==null)
401                   {
402                     // %TBD% Should never arise, but I want to be sure of that...
403                     if(JJK_DEBUG)
404                       {
405                         System.out.println("***** DOM2DTM Pop Control Flow problem");
406                         for(;;); // Freeze right here!
407                       }
408                   }
409 
410                 // The only parents in the DTM are Elements.  However,
411                 // the DOM could contain EntityReferences.  If we
412                 // encounter one, pop it _without_ popping DTM.
413                 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
414                   {
415                     // Nothing needs doing
416                     if(JJK_DEBUG)
417                       System.out.println("***** DOM2DTM popping EntRef");
418                   }
419                 else
420                   {
421                     popShouldStripWhitespace();
422                     // Fix and pop DTM
423                     if(m_last_kid==NULL)
424                       m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
425                     else
426                       m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
427                     m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
428                   }
429               }
430             if(m_last_parent==NULL)
431               next=null;
432           }
433 
434         if(next!=null)
435           nexttype=next.getNodeType();
436 
437         // If it's an entity ref, advance past it.
438         //
439         // %REVIEW% Should we let this out the door and just suppress it?
440         // More work, but simpler code, more likely to be correct, and
441         // it doesn't happen very often. We'd get rid of the loop too.
442         if (ENTITY_REFERENCE_NODE == nexttype)
443           pos=next;
444       }
445     while (ENTITY_REFERENCE_NODE == nexttype);
446 
447     // Did we run out of the tree?
448     if(next==null)
449       {
450         m_nextsib.setElementAt(NULL,0);
451         m_nodesAreProcessed = true;
452         m_pos=null;
453 
454         if(JJK_DEBUG)
455           {
456             System.out.println("***** DOM2DTM Crosscheck:");
457             for(int i=0;i<m_nodes.size();++i)
458               System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
459           }
460 
461         return false;
462       }
463 
464     // Text needs some special handling:
465     //
466     // DTM may skip whitespace. This is handled by the suppressNode flag, which
467     // when true will keep the DTM node from being created.
468     //
469     // DTM only directly records the first DOM node of any logically-contiguous
470     // sequence. The lastTextNode value will be set to the last node in the
471     // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
472     // advance next over this whole block. Should be simpler than special-casing
473     // the above loop for "Was the logically-preceeding sibling a text node".
474     //
475     // Finally, a DTM node should be considered a CDATASection only if all the
476     // contiguous text it covers is CDATASections. The first Text should
477     // force DTM to Text.
478 
479     boolean suppressNode=false;
480     Node lastTextNode=null;
481 
482     nexttype=next.getNodeType();
483 
484     // nexttype=pos.getNodeType();
485     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
486       {
487         // If filtering, initially assume we're going to suppress the node
488         suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
489 
490         // Scan logically contiguous text (siblings, plus "flattening"
491         // of entity reference boundaries).
492         Node n=next;
493         while(n!=null)
494           {
495             lastTextNode=n;
496             // Any Text node means DTM considers it all Text
497             if(TEXT_NODE == n.getNodeType())
498               nexttype=TEXT_NODE;
499             // Any non-whitespace in this sequence blocks whitespace
500             // suppression
501             suppressNode &=
502               XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
503 
504             n=logicalNextDOMTextNode(n);
505           }
506       }
507 
508     // Special handling for PIs: Some DOMs represent the XML
509     // Declaration as a PI. This is officially incorrect, per the DOM
510     // spec, but is considered a "wrong but tolerable" temporary
511     // workaround pending proper handling of these fields in DOM Level
512     // 3. We want to recognize and reject that case.
513     else if(PROCESSING_INSTRUCTION_NODE==nexttype)
514       {
515         suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
516       }
517 
518 
519     if(!suppressNode)
520       {
521         // Inserting next. NOTE that we force the node type; for
522         // coalesced Text, this records CDATASections adjacent to
523         // ordinary Text as Text.
524         int nextindex=addNode(next,m_last_parent,m_last_kid,
525                               nexttype);
526 
527         m_last_kid=nextindex;
528 
529         if(ELEMENT_NODE == nexttype)
530           {
531             int attrIndex=NULL; // start with no previous sib
532             // Process attributes _now_, rather than waiting.
533             // Simpler control flow, makes NS cache available immediately.
534             NamedNodeMap attrs=next.getAttributes();
535             int attrsize=(attrs==null) ? 0 : attrs.getLength();
536             if(attrsize>0)
537               {
538                 for(int i=0;i<attrsize;++i)
539                   {
540                     // No need to force nodetype in this case;
541                     // addNode() will take care of switching it from
542                     // Attr to Namespace if necessary.
543                     attrIndex=addNode(attrs.item(i),
544                                       nextindex,attrIndex,NULL);
545                     m_firstch.setElementAt(DTM.NULL,attrIndex);
546 
547                     // If the xml: prefix is explicitly declared
548                     // we don't need to synthesize one.
549                     //
550                     // NOTE that XML Namespaces were not originally
551                     // defined as being namespace-aware (grrr), and
552                     // while the W3C is planning to fix this it's
553                     // safer for now to test the QName and trust the
554                     // parsers to prevent anyone from redefining the
555                     // reserved xmlns: prefix
556                     if(!m_processedFirstElement
557                        && "xmlns:xml".equals(attrs.item(i).getNodeName()))
558                       m_processedFirstElement=true;
559                   }
560                 // Terminate list of attrs, and make sure they aren't
561                 // considered children of the element
562               } // if attrs exist
563             if(!m_processedFirstElement)
564             {
565               // The DOM might not have an explicit declaration for the
566               // implicit "xml:" prefix, but the XPath data model
567               // requires that this appear as a Namespace Node so we
568               // have to synthesize one. You can think of this as
569               // being a default attribute defined by the XML
570               // Namespaces spec rather than by the DTD.
571               attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
572                                                                                                                                         (Element)next,"xml",NAMESPACE_DECL_NS,
573                                                                                                                                         makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
574                                                                                                                                         ),
575                                 nextindex,attrIndex,NULL);
576               m_firstch.setElementAt(DTM.NULL,attrIndex);
577               m_processedFirstElement=true;
578             }
579             if(attrIndex!=NULL)
580               m_nextsib.setElementAt(DTM.NULL,attrIndex);
581           } //if(ELEMENT_NODE)
582       } // (if !suppressNode)
583 
584     // Text postprocessing: Act on values stored above
585     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
586       {
587         // %TBD% If nexttype was forced to TEXT, patch the DTM node
588 
589         next=lastTextNode;      // Advance the DOM cursor over contiguous text
590       }
591 
592     // Remember where we left off.
593     m_pos=next;
594     return true;
595   }
596 
597 
598   /**
599    * Return an DOM node for the given node.
600    *
601    * @param nodeHandle The node ID.
602    *
603    * @return A node representation of the DTM node.
604    */
605   public Node getNode(int nodeHandle)
606   {
607 
608     int identity = makeNodeIdentity(nodeHandle);
609 
610     return (Node) m_nodes.elementAt(identity);
611   }
612 
613   /**
614    * Get a Node from an identity index.
615    *
616    * NEEDSDOC @param nodeIdentity
617    *
618    * NEEDSDOC ($objectName$) @return
619    */
620   protected Node lookupNode(int nodeIdentity)
621   {
622     return (Node) m_nodes.elementAt(nodeIdentity);
623   }
624 
625   /**
626    * Get the next node identity value in the list, and call the iterator
627    * if it hasn't been added yet.
628    *
629    * @param identity The node identity (index).
630    * @return identity+1, or DTM.NULL.
631    */
632   protected int getNextNodeIdentity(int identity)
633   {
634 
635     identity += 1;
636 
637     if (identity >= m_nodes.size())
638     {
639       if (!nextNode())
640         identity = DTM.NULL;
641     }
642 
643     return identity;
644   }
645 
646   /**
647    * Get the handle from a Node.
648    * <p>%OPT% This will be pretty slow.</p>
649    *
650    * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
651    * walk down DTM reconstructing path) might be considerably faster
652    * on later nodes in large documents. That might also imply improving
653    * this call to handle nodes which would be in this DTM but
654    * have not yet been built, which might or might not be a Good Thing.</p>
655    *
656    * %REVIEW% This relies on being able to test node-identity via
657    * object-identity. DTM2DOM proxying is a great example of a case where
658    * that doesn't work. DOM Level 3 will provide the isSameNode() method
659    * to fix that, but until then this is going to be flaky.
660    *
661    * @param node A node, which may be null.
662    *
663    * @return The node handle or <code>DTM.NULL</code>.
664    */
665   private int getHandleFromNode(Node node)
666   {
667     if (null != node)
668     {
669       int len = m_nodes.size();
670       boolean isMore;
671       int i = 0;
672       do
673       {
674         for (; i < len; i++)
675         {
676           if (m_nodes.elementAt(i) == node)
677             return makeNodeHandle(i);
678         }
679 
680         isMore = nextNode();
681 
682         len = m_nodes.size();
683 
684       }
685       while(isMore || i < len);
686     }
687 
688     return DTM.NULL;
689   }
690 
691   /** Get the handle from a Node. This is a more robust version of
692    * getHandleFromNode, intended to be usable by the public.
693    *
694    * <p>%OPT% This will be pretty slow.</p>
695    *
696    * %REVIEW% This relies on being able to test node-identity via
697    * object-identity. DTM2DOM proxying is a great example of a case where
698    * that doesn't work. DOM Level 3 will provide the isSameNode() method
699    * to fix that, but until then this is going to be flaky.
700    *
701    * @param node A node, which may be null.
702    *
703    * @return The node handle or <code>DTM.NULL</code>.  */
704   public int getHandleOfNode(Node node)
705   {
706     if (null != node)
707     {
708       // Is Node actually within the same document? If not, don't search!
709       // This would be easier if m_root was always the Document node, but
710       // we decided to allow wrapping a DTM around a subtree.
711       if((m_root==node) ||
712          (m_root.getNodeType()==DOCUMENT_NODE &&
713           m_root==node.getOwnerDocument()) ||
714          (m_root.getNodeType()!=DOCUMENT_NODE &&
715           m_root.getOwnerDocument()==node.getOwnerDocument())
716          )
717         {
718           // If node _is_ in m_root's tree, find its handle
719           //
720           // %OPT% This check may be improved significantly when DOM
721           // Level 3 nodeKey and relative-order tests become
722           // available!
723           for(Node cursor=node;
724               cursor!=null;
725               cursor=
726                 (cursor.getNodeType()!=ATTRIBUTE_NODE)
727                 ? cursor.getParentNode()
728                 : ((org.w3c.dom.Attr)cursor).getOwnerElement())
729             {
730               if(cursor==m_root)
731                 // We know this node; find its handle.
732                 return getHandleFromNode(node);
733             } // for ancestors of node
734         } // if node and m_root in same Document
735     } // if node!=null
736 
737     return DTM.NULL;
738   }
739 
740   /**
741    * Retrieves an attribute node by by qualified name and namespace URI.
742    *
743    * @param nodeHandle int Handle of the node upon which to look up this attribute..
744    * @param namespaceURI The namespace URI of the attribute to
745    *   retrieve, or null.
746    * @param name The local name of the attribute to
747    *   retrieve.
748    * @return The attribute node handle with the specified name (
749    *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
750    *   attribute.
751    */
752   public int getAttributeNode(int nodeHandle, String namespaceURI,
753                               String name)
754   {
755 
756     // %OPT% This is probably slower than it needs to be.
757     if (null == namespaceURI)
758       namespaceURI = "";
759 
760     int type = getNodeType(nodeHandle);
761 
762     if (DTM.ELEMENT_NODE == type)
763     {
764 
765       // Assume that attributes immediately follow the element.
766       int identity = makeNodeIdentity(nodeHandle);
767 
768       while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
769       {
770         // Assume this can not be null.
771         type = _type(identity);
772 
773                                 // %REVIEW%
774                                 // Should namespace nodes be retrievable DOM-style as attrs?
775                                 // If not we need a separate function... which may be desirable
776                                 // architecturally, but which is ugly from a code point of view.
777                                 // (If we REALLY insist on it, this code should become a subroutine
778                                 // of both -- retrieve the node, then test if the type matches
779                                 // what you're looking for.)
780         if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
781         {
782           Node node = lookupNode(identity);
783           String nodeuri = node.getNamespaceURI();
784 
785           if (null == nodeuri)
786             nodeuri = "";
787 
788           String nodelocalname = node.getLocalName();
789 
790           if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
791             return makeNodeHandle(identity);
792         }
793 
794         else // if (DTM.NAMESPACE_NODE != type)
795         {
796           break;
797         }
798       }
799     }
800 
801     return DTM.NULL;
802   }
803 
804   /**
805    * Get the string-value of a node as a String object
806    * (see http://www.w3.org/TR/xpath#data-model
807    * for the definition of a node's string-value).
808    *
809    * @param nodeHandle The node ID.
810    *
811    * @return A string object that represents the string-value of the given node.
812    */
813   public XMLString getStringValue(int nodeHandle)
814   {
815 
816     int type = getNodeType(nodeHandle);
817     Node node = getNode(nodeHandle);
818     // %TBD% If an element only has one text node, we should just use it
819     // directly.
820     if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
821     || DTM.DOCUMENT_FRAGMENT_NODE == type)
822     {
823       FastStringBuffer buf = StringBufferPool.get();
824       String s;
825 
826       try
827       {
828         getNodeData(node, buf);
829 
830         s = (buf.length() > 0) ? buf.toString() : "";
831       }
832       finally
833       {
834         StringBufferPool.free(buf);
835       }
836 
837       return m_xstrf.newstr( s );
838     }
839     else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
840     {
841       // If this is a DTM text node, it may be made of multiple DOM text
842       // nodes -- including navigating into Entity References. DOM2DTM
843       // records the first node in the sequence and requires that we
844       // pick up the others when we retrieve the DTM node's value.
845       //
846       // %REVIEW% DOM Level 3 is expected to add a "whole text"
847       // retrieval method which performs this function for us.
848       FastStringBuffer buf = StringBufferPool.get();
849       while(node!=null)
850       {
851         buf.append(node.getNodeValue());
852         node=logicalNextDOMTextNode(node);
853       }
854       String s=(buf.length() > 0) ? buf.toString() : "";
855       StringBufferPool.free(buf);
856       return m_xstrf.newstr( s );
857     }
858     else
859       return m_xstrf.newstr( node.getNodeValue() );
860   }
861 
862   /**
863    * Determine if the string-value of a node is whitespace
864    *
865    * @param nodeHandle The node Handle.
866    *
867    * @return Return true if the given node is whitespace.
868    */
869   public boolean isWhitespace(int nodeHandle)
870   {
871         int type = getNodeType(nodeHandle);
872     Node node = getNode(nodeHandle);
873         if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
874     {
875       // If this is a DTM text node, it may be made of multiple DOM text
876       // nodes -- including navigating into Entity References. DOM2DTM
877       // records the first node in the sequence and requires that we
878       // pick up the others when we retrieve the DTM node's value.
879       //
880       // %REVIEW% DOM Level 3 is expected to add a "whole text"
881       // retrieval method which performs this function for us.
882       FastStringBuffer buf = StringBufferPool.get();
883       while(node!=null)
884       {
885         buf.append(node.getNodeValue());
886         node=logicalNextDOMTextNode(node);
887       }
888      boolean b = buf.isWhitespace(0, buf.length());
889       StringBufferPool.free(buf);
890      return b;
891     }
892     return false;
893   }
894 
895   /**
896    * Retrieve the text content of a DOM subtree, appending it into a
897    * user-supplied FastStringBuffer object. Note that attributes are
898    * not considered part of the content of an element.
899    * <p>
900    * There are open questions regarding whitespace stripping.
901    * Currently we make no special effort in that regard, since the standard
902    * DOM doesn't yet provide DTD-based information to distinguish
903    * whitespace-in-element-context from genuine #PCDATA. Note that we
904    * should probably also consider xml:space if/when we address this.
905    * DOM Level 3 may solve the problem for us.
906    * <p>
907    * %REVIEW% Actually, since this method operates on the DOM side of the
908    * fence rather than the DTM side, it SHOULDN'T do
909    * any special handling. The DOM does what the DOM does; if you want
910    * DTM-level abstractions, use DTM-level methods.
911    *
912    * @param node Node whose subtree is to be walked, gathering the
913    * contents of all Text or CDATASection nodes.
914    * @param buf FastStringBuffer into which the contents of the text
915    * nodes are to be concatenated.
916    */
917   protected static void getNodeData(Node node, FastStringBuffer buf)
918   {
919 
920     switch (node.getNodeType())
921     {
922     case Node.DOCUMENT_FRAGMENT_NODE :
923     case Node.DOCUMENT_NODE :
924     case Node.ELEMENT_NODE :
925     {
926       for (Node child = node.getFirstChild(); null != child;
927               child = child.getNextSibling())
928       {
929         getNodeData(child, buf);
930       }
931     }
932     break;
933     case Node.TEXT_NODE :
934     case Node.CDATA_SECTION_NODE :
935     case Node.ATTRIBUTE_NODE :  // Never a child but might be our starting node
936       buf.append(node.getNodeValue());
937       break;
938     case Node.PROCESSING_INSTRUCTION_NODE :
939       // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
940       break;
941     default :
942       // ignore
943       break;
944     }
945   }
946 
947   /**
948    * Given a node handle, return its DOM-style node name. This will
949    * include names such as #text or #document.
950    *
951    * @param nodeHandle the id of the node.
952    * @return String Name of this node, which may be an empty string.
953    * %REVIEW% Document when empty string is possible...
954    * %REVIEW-COMMENT% It should never be empty, should it?
955    */
956   public String getNodeName(int nodeHandle)
957   {
958 
959     Node node = getNode(nodeHandle);
960 
961     // Assume non-null.
962     return node.getNodeName();
963   }
964 
965   /**
966    * Given a node handle, return the XPath node name.  This should be
967    * the name as described by the XPath data model, NOT the DOM-style
968    * name.
969    *
970    * @param nodeHandle the id of the node.
971    * @return String Name of this node, which may be an empty string.
972    */
973   public String getNodeNameX(int nodeHandle)
974   {
975 
976     String name;
977     short type = getNodeType(nodeHandle);
978 
979     switch (type)
980     {
981     case DTM.NAMESPACE_NODE :
982     {
983       Node node = getNode(nodeHandle);
984 
985       // assume not null.
986       name = node.getNodeName();
987       if(name.startsWith("xmlns:"))
988       {
989         name = QName.getLocalPart(name);
990       }
991       else if(name.equals("xmlns"))
992       {
993         name = "";
994       }
995     }
996     break;
997     case DTM.ATTRIBUTE_NODE :
998     case DTM.ELEMENT_NODE :
999     case DTM.ENTITY_REFERENCE_NODE :
1000     case DTM.PROCESSING_INSTRUCTION_NODE :
1001     {
1002       Node node = getNode(nodeHandle);
1003 
1004       // assume not null.
1005       name = node.getNodeName();
1006     }
1007     break;
1008     default :
1009       name = "";
1010     }
1011 
1012     return name;
1013   }
1014 
1015   /**
1016    * Given a node handle, return its XPath-style localname.
1017    * (As defined in Namespaces, this is the portion of the name after any
1018    * colon character).
1019    *
1020    * @param nodeHandle the id of the node.
1021    * @return String Local name of this node.
1022    */
1023   public String getLocalName(int nodeHandle)
1024   {
1025     if(JJK_NEWCODE)
1026     {
1027       int id=makeNodeIdentity(nodeHandle);
1028       if(NULL==id) return null;
1029       Node newnode=(Node)m_nodes.elementAt(id);
1030       String newname=newnode.getLocalName();
1031       if (null == newname)
1032       {
1033         // XSLT treats PIs, and possibly other things, as having QNames.
1034         String qname = newnode.getNodeName();
1035         if('#'==qname.charAt(0))
1036         {
1037           //  Match old default for this function
1038           // This conversion may or may not be necessary
1039           newname="";
1040         }
1041         else
1042         {
1043           int index = qname.indexOf(':');
1044           newname = (index < 0) ? qname : qname.substring(index + 1);
1045         }
1046       }
1047       return newname;
1048     }
1049     else
1050     {
1051       String name;
1052       short type = getNodeType(nodeHandle);
1053       switch (type)
1054       {
1055       case DTM.ATTRIBUTE_NODE :
1056       case DTM.ELEMENT_NODE :
1057       case DTM.ENTITY_REFERENCE_NODE :
1058       case DTM.NAMESPACE_NODE :
1059       case DTM.PROCESSING_INSTRUCTION_NODE :
1060         {
1061           Node node = getNode(nodeHandle);
1062 
1063           // assume not null.
1064           name = node.getLocalName();
1065 
1066           if (null == name)
1067           {
1068             String qname = node.getNodeName();
1069             int index = qname.indexOf(':');
1070 
1071             name = (index < 0) ? qname : qname.substring(index + 1);
1072           }
1073         }
1074         break;
1075       default :
1076         name = "";
1077       }
1078       return name;
1079     }
1080   }
1081 
1082   /**
1083    * Given a namespace handle, return the prefix that the namespace decl is
1084    * mapping.
1085    * Given a node handle, return the prefix used to map to the namespace.
1086    *
1087    * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
1088    * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
1089    *
1090    * @param nodeHandle the id of the node.
1091    * @return String prefix of this node's name, or "" if no explicit
1092    * namespace prefix was given.
1093    */
1094   public String getPrefix(int nodeHandle)
1095   {
1096 
1097     String prefix;
1098     short type = getNodeType(nodeHandle);
1099 
1100     switch (type)
1101     {
1102     case DTM.NAMESPACE_NODE :
1103     {
1104       Node node = getNode(nodeHandle);
1105 
1106       // assume not null.
1107       String qname = node.getNodeName();
1108       int index = qname.indexOf(':');
1109 
1110       prefix = (index < 0) ? "" : qname.substring(index + 1);
1111     }
1112     break;
1113     case DTM.ATTRIBUTE_NODE :
1114     case DTM.ELEMENT_NODE :
1115     {
1116       Node node = getNode(nodeHandle);
1117 
1118       // assume not null.
1119       String qname = node.getNodeName();
1120       int index = qname.indexOf(':');
1121 
1122       prefix = (index < 0) ? "" : qname.substring(0, index);
1123     }
1124     break;
1125     default :
1126       prefix = "";
1127     }
1128 
1129     return prefix;
1130   }
1131 
1132   /**
1133    * Given a node handle, return its DOM-style namespace URI
1134    * (As defined in Namespaces, this is the declared URI which this node's
1135    * prefix -- or default in lieu thereof -- was mapped to.)
1136    *
1137    * <p>%REVIEW% Null or ""? -sb</p>
1138    *
1139    * @param nodeHandle the id of the node.
1140    * @return String URI value of this node's namespace, or null if no
1141    * namespace was resolved.
1142    */
1143   public String getNamespaceURI(int nodeHandle)
1144   {
1145     if(JJK_NEWCODE)
1146     {
1147       int id=makeNodeIdentity(nodeHandle);
1148       if(id==NULL) return null;
1149       Node node=(Node)m_nodes.elementAt(id);
1150       return node.getNamespaceURI();
1151     }
1152     else
1153     {
1154       String nsuri;
1155       short type = getNodeType(nodeHandle);
1156 
1157       switch (type)
1158       {
1159       case DTM.ATTRIBUTE_NODE :
1160       case DTM.ELEMENT_NODE :
1161       case DTM.ENTITY_REFERENCE_NODE :
1162       case DTM.NAMESPACE_NODE :
1163       case DTM.PROCESSING_INSTRUCTION_NODE :
1164         {
1165           Node node = getNode(nodeHandle);
1166 
1167           // assume not null.
1168           nsuri = node.getNamespaceURI();
1169 
1170           // %TBD% Handle DOM1?
1171         }
1172         break;
1173       default :
1174         nsuri = null;
1175       }
1176 
1177       return nsuri;
1178     }
1179 
1180   }
1181 
1182   /** Utility function: Given a DOM Text node, determine whether it is
1183    * logically followed by another Text or CDATASection node. This may
1184    * involve traversing into Entity References.
1185    *
1186    * %REVIEW% DOM Level 3 is expected to add functionality which may
1187    * allow us to retire this.
1188    */
1189   private Node logicalNextDOMTextNode(Node n)
1190   {
1191         Node p=n.getNextSibling();
1192         if(p==null)
1193         {
1194                 // Walk out of any EntityReferenceNodes that ended with text
1195                 for(n=n.getParentNode();
1196                         n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1197                         n=n.getParentNode())
1198                 {
1199                         p=n.getNextSibling();
1200                         if(p!=null)
1201                                 break;
1202                 }
1203         }
1204         n=p;
1205         while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1206         {
1207                 // Walk into any EntityReferenceNodes that start with text
1208                 if(n.hasChildNodes())
1209                         n=n.getFirstChild();
1210                 else
1211                         n=n.getNextSibling();
1212         }
1213         if(n!=null)
1214         {
1215                 // Found a logical next sibling. Is it text?
1216                 int ntype=n.getNodeType();
1217                 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1218                         n=null;
1219         }
1220         return n;
1221   }
1222 
1223   /**
1224    * Given a node handle, return its node value. This is mostly
1225    * as defined by the DOM, but may ignore some conveniences.
1226    * <p>
1227    *
1228    * @param nodeHandle The node id.
1229    * @return String Value of this node, or null if not
1230    * meaningful for this node type.
1231    */
1232   public String getNodeValue(int nodeHandle)
1233   {
1234     // The _type(nodeHandle) call was taking the lion's share of our
1235     // time, and was wrong anyway since it wasn't coverting handle to
1236     // identity. Inlined it.
1237     int type = _exptype(makeNodeIdentity(nodeHandle));
1238     type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1239 
1240     if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1241       return getNode(nodeHandle).getNodeValue();
1242 
1243     // If this is a DTM text node, it may be made of multiple DOM text
1244     // nodes -- including navigating into Entity References. DOM2DTM
1245     // records the first node in the sequence and requires that we
1246     // pick up the others when we retrieve the DTM node's value.
1247     //
1248     // %REVIEW% DOM Level 3 is expected to add a "whole text"
1249     // retrieval method which performs this function for us.
1250     Node node = getNode(nodeHandle);
1251     Node n=logicalNextDOMTextNode(node);
1252     if(n==null)
1253       return node.getNodeValue();
1254 
1255     FastStringBuffer buf = StringBufferPool.get();
1256         buf.append(node.getNodeValue());
1257     while(n!=null)
1258     {
1259       buf.append(n.getNodeValue());
1260       n=logicalNextDOMTextNode(n);
1261     }
1262     String s = (buf.length() > 0) ? buf.toString() : "";
1263     StringBufferPool.free(buf);
1264     return s;
1265   }
1266 
1267   /**
1268    *   A document type declaration information item has the following properties:
1269    *
1270    *     1. [system identifier] The system identifier of the external subset, if
1271    *        it exists. Otherwise this property has no value.
1272    *
1273    * @return the system identifier String object, or null if there is none.
1274    */
1275   public String getDocumentTypeDeclarationSystemIdentifier()
1276   {
1277 
1278     Document doc;
1279 
1280     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1281       doc = (Document) m_root;
1282     else
1283       doc = m_root.getOwnerDocument();
1284 
1285     if (null != doc)
1286     {
1287       DocumentType dtd = doc.getDoctype();
1288 
1289       if (null != dtd)
1290       {
1291         return dtd.getSystemId();
1292       }
1293     }
1294 
1295     return null;
1296   }
1297 
1298   /**
1299    * Return the public identifier of the external subset,
1300    * normalized as described in 4.2.2 External Entities [XML]. If there is
1301    * no external subset or if it has no public identifier, this property
1302    * has no value.
1303    *
1304    * @return the public identifier String object, or null if there is none.
1305    */
1306   public String getDocumentTypeDeclarationPublicIdentifier()
1307   {
1308 
1309     Document doc;
1310 
1311     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1312       doc = (Document) m_root;
1313     else
1314       doc = m_root.getOwnerDocument();
1315 
1316     if (null != doc)
1317     {
1318       DocumentType dtd = doc.getDoctype();
1319 
1320       if (null != dtd)
1321       {
1322         return dtd.getPublicId();
1323       }
1324     }
1325 
1326     return null;
1327   }
1328 
1329   /**
1330    * Returns the <code>Element</code> whose <code>ID</code> is given by
1331    * <code>elementId</code>. If no such element exists, returns
1332    * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1333    * has this <code>ID</code>. Attributes (including those
1334    * with the name "ID") are not of type ID unless so defined by DTD/Schema
1335    * information available to the DTM implementation.
1336    * Implementations that do not know whether attributes are of type ID or
1337    * not are expected to return <code>DTM.NULL</code>.
1338    *
1339    * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1340    * and this operation searches only within a single document, right?
1341    * Wouldn't want collisions between DTMs in the same process.</p>
1342    *
1343    * @param elementId The unique <code>id</code> value for an element.
1344    * @return The handle of the matching element.
1345    */
1346   public int getElementById(String elementId)
1347   {
1348 
1349     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1350         ? (Document) m_root : m_root.getOwnerDocument();
1351 
1352     if(null != doc)
1353     {
1354       Node elem = doc.getElementById(elementId);
1355       if(null != elem)
1356       {
1357         int elemHandle = getHandleFromNode(elem);
1358 
1359         if(DTM.NULL == elemHandle)
1360         {
1361           int identity = m_nodes.size()-1;
1362           while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1363           {
1364             Node node = getNode(identity);
1365             if(node == elem)
1366             {
1367               elemHandle = getHandleFromNode(elem);
1368               break;
1369             }
1370            }
1371         }
1372 
1373         return elemHandle;
1374       }
1375 
1376     }
1377     return DTM.NULL;
1378   }
1379 
1380   /**
1381    * The getUnparsedEntityURI function returns the URI of the unparsed
1382    * entity with the specified name in the same document as the context
1383    * node (see [3.3 Unparsed Entities]). It returns the empty string if
1384    * there is no such entity.
1385    * <p>
1386    * XML processors may choose to use the System Identifier (if one
1387    * is provided) to resolve the entity, rather than the URI in the
1388    * Public Identifier. The details are dependent on the processor, and
1389    * we would have to support some form of plug-in resolver to handle
1390    * this properly. Currently, we simply return the System Identifier if
1391    * present, and hope that it a usable URI or that our caller can
1392    * map it to one.
1393    * TODO: Resolve Public Identifiers... or consider changing function name.
1394    * <p>
1395    * If we find a relative URI
1396    * reference, XML expects it to be resolved in terms of the base URI
1397    * of the document. The DOM doesn't do that for us, and it isn't
1398    * entirely clear whether that should be done here; currently that's
1399    * pushed up to a higher level of our application. (Note that DOM Level
1400    * 1 didn't store the document's base URI.)
1401    * TODO: Consider resolving Relative URIs.
1402    * <p>
1403    * (The DOM's statement that "An XML processor may choose to
1404    * completely expand entities before the structure model is passed
1405    * to the DOM" refers only to parsed entities, not unparsed, and hence
1406    * doesn't affect this function.)
1407    *
1408    * @param name A string containing the Entity Name of the unparsed
1409    * entity.
1410    *
1411    * @return String containing the URI of the Unparsed Entity, or an
1412    * empty string if no such entity exists.
1413    */
1414   public String getUnparsedEntityURI(String name)
1415   {
1416 
1417     String url = "";
1418     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
1419         ? (Document) m_root : m_root.getOwnerDocument();
1420 
1421     if (null != doc)
1422     {
1423       DocumentType doctype = doc.getDoctype();
1424 
1425       if (null != doctype)
1426       {
1427         NamedNodeMap entities = doctype.getEntities();
1428         if(null == entities)
1429           return url;
1430         Entity entity = (Entity) entities.getNamedItem(name);
1431         if(null == entity)
1432           return url;
1433 
1434         String notationName = entity.getNotationName();
1435 
1436         if (null != notationName)  // then it's unparsed
1437         {
1438           // The draft says: "The XSLT processor may use the public
1439           // identifier to generate a URI for the entity instead of the URI
1440           // specified in the system identifier. If the XSLT processor does
1441           // not use the public identifier to generate the URI, it must use
1442           // the system identifier; if the system identifier is a relative
1443           // URI, it must be resolved into an absolute URI using the URI of
1444           // the resource containing the entity declaration as the base
1445           // URI [RFC2396]."
1446           // So I'm falling a bit short here.
1447           url = entity.getSystemId();
1448 
1449           if (null == url)
1450           {
1451             url = entity.getPublicId();
1452           }
1453           else
1454           {
1455             // This should be resolved to an absolute URL, but that's hard
1456             // to do from here.
1457           }
1458         }
1459       }
1460     }
1461 
1462     return url;
1463   }
1464 
1465   /**
1466    *     5. [specified] A flag indicating whether this attribute was actually
1467    *        specified in the start-tag of its element, or was defaulted from the
1468    *        DTD.
1469    *
1470    * @param attributeHandle the attribute handle
1471    * @return <code>true</code> if the attribute was specified;
1472    *         <code>false</code> if it was defaulted.
1473    */
1474   public boolean isAttributeSpecified(int attributeHandle)
1475   {
1476     int type = getNodeType(attributeHandle);
1477 
1478     if (DTM.ATTRIBUTE_NODE == type)
1479     {
1480       Attr attr = (Attr)getNode(attributeHandle);
1481       return attr.getSpecified();
1482     }
1483     return false;
1484   }
1485 
1486   /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1487    * we're wrapped around an existing DOM.
1488    *
1489    * @param source The IncrementalSAXSource that we want to recieve events from
1490    * on demand.
1491    */
1492   public void setIncrementalSAXSource(IncrementalSAXSource source)
1493   {
1494   }
1495 
1496   /** getContentHandler returns "our SAX builder" -- the thing that
1497    * someone else should send SAX events to in order to extend this
1498    * DTM model.
1499    *
1500    * @return null if this model doesn't respond to SAX events,
1501    * "this" if the DTM object has a built-in SAX ContentHandler,
1502    * the IncrmentalSAXSource if we're bound to one and should receive
1503    * the SAX stream via it for incremental build purposes...
1504    * */
1505   public org.xml.sax.ContentHandler getContentHandler()
1506   {
1507       return null;
1508   }
1509 
1510   /**
1511    * Return this DTM's lexical handler.
1512    *
1513    * %REVIEW% Should this return null if constrution already done/begun?
1514    *
1515    * @return null if this model doesn't respond to lexical SAX events,
1516    * "this" if the DTM object has a built-in SAX ContentHandler,
1517    * the IncrementalSAXSource if we're bound to one and should receive
1518    * the SAX stream via it for incremental build purposes...
1519    */
1520   public org.xml.sax.ext.LexicalHandler getLexicalHandler()
1521   {
1522 
1523     return null;
1524   }
1525 
1526 
1527   /**
1528    * Return this DTM's EntityResolver.
1529    *
1530    * @return null if this model doesn't respond to SAX entity ref events.
1531    */
1532   public org.xml.sax.EntityResolver getEntityResolver()
1533   {
1534 
1535     return null;
1536   }
1537 
1538   /**
1539    * Return this DTM's DTDHandler.
1540    *
1541    * @return null if this model doesn't respond to SAX dtd events.
1542    */
1543   public org.xml.sax.DTDHandler getDTDHandler()
1544   {
1545 
1546     return null;
1547   }
1548 
1549   /**
1550    * Return this DTM's ErrorHandler.
1551    *
1552    * @return null if this model doesn't respond to SAX error events.
1553    */
1554   public org.xml.sax.ErrorHandler getErrorHandler()
1555   {
1556 
1557     return null;
1558   }
1559 
1560   /**
1561    * Return this DTM's DeclHandler.
1562    *
1563    * @return null if this model doesn't respond to SAX Decl events.
1564    */
1565   public org.xml.sax.ext.DeclHandler getDeclHandler()
1566   {
1567 
1568     return null;
1569   }
1570 
1571   /** @return true iff we're building this model incrementally (eg
1572    * we're partnered with a IncrementalSAXSource) and thus require that the
1573    * transformation and the parse run simultaneously. Guidance to the
1574    * DTMManager.
1575    * */
1576   public boolean needsTwoThreads()
1577   {
1578     return false;
1579   }
1580 
1581   // ========== Direct SAX Dispatch, for optimization purposes ========
1582 
1583   /**
1584    * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1585    * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1586    * the definition of <CODE>S</CODE></A> for details.
1587    * @param   ch      Character to check as XML whitespace.
1588    * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
1589    */
1590   private static boolean isSpace(char ch)
1591   {
1592     return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
1593   }
1594 
1595   /**
1596    * Directly call the
1597    * characters method on the passed ContentHandler for the
1598    * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1599    * for the definition of a node's string-value). Multiple calls to the
1600    * ContentHandler's characters methods may well occur for a single call to
1601    * this method.
1602    *
1603    * @param nodeHandle The node ID.
1604    * @param ch A non-null reference to a ContentHandler.
1605    *
1606    * @throws org.xml.sax.SAXException
1607    */
1608   public void dispatchCharactersEvents(
1609           int nodeHandle, org.xml.sax.ContentHandler ch,
1610           boolean normalize)
1611             throws org.xml.sax.SAXException
1612   {
1613     if(normalize)
1614     {
1615       XMLString str = getStringValue(nodeHandle);
1616       str = str.fixWhiteSpace(true, true, false);
1617       str.dispatchCharactersEvents(ch);
1618     }
1619     else
1620     {
1621       int type = getNodeType(nodeHandle);
1622       Node node = getNode(nodeHandle);
1623       dispatchNodeData(node, ch, 0);
1624           // Text coalition -- a DTM text node may represent multiple
1625           // DOM nodes.
1626           if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1627           {
1628                   while( null != (node=logicalNextDOMTextNode(node)) )
1629                   {
1630                       dispatchNodeData(node, ch, 0);
1631                   }
1632           }
1633     }
1634   }
1635 
1636   /**
1637    * Retrieve the text content of a DOM subtree, appending it into a
1638    * user-supplied FastStringBuffer object. Note that attributes are
1639    * not considered part of the content of an element.
1640    * <p>
1641    * There are open questions regarding whitespace stripping.
1642    * Currently we make no special effort in that regard, since the standard
1643    * DOM doesn't yet provide DTD-based information to distinguish
1644    * whitespace-in-element-context from genuine #PCDATA. Note that we
1645    * should probably also consider xml:space if/when we address this.
1646    * DOM Level 3 may solve the problem for us.
1647    * <p>
1648    * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1649    * routine _shouldn't_ perform any processing beyond what the DOM already
1650    * does, and that whitespace stripping and so on belong at the DTM level.
1651    * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1652    *
1653    * @param node Node whose subtree is to be walked, gathering the
1654    * contents of all Text or CDATASection nodes.
1655    */
1656   protected static void dispatchNodeData(Node node,
1657                                          org.xml.sax.ContentHandler ch,
1658                                          int depth)
1659             throws org.xml.sax.SAXException
1660   {
1661 
1662     switch (node.getNodeType())
1663     {
1664     case Node.DOCUMENT_FRAGMENT_NODE :
1665     case Node.DOCUMENT_NODE :
1666     case Node.ELEMENT_NODE :
1667     {
1668       for (Node child = node.getFirstChild(); null != child;
1669               child = child.getNextSibling())
1670       {
1671         dispatchNodeData(child, ch, depth+1);
1672       }
1673     }
1674     break;
1675     case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1676     case Node.COMMENT_NODE :
1677       if(0 != depth)
1678         break;
1679         // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1680         // to perform Text Coalition. That should only be done in DTM space.
1681     case Node.TEXT_NODE :
1682     case Node.CDATA_SECTION_NODE :
1683     case Node.ATTRIBUTE_NODE :
1684       String str = node.getNodeValue();
1685       if(ch instanceof CharacterNodeHandler)
1686       {
1687         ((CharacterNodeHandler)ch).characters(node);
1688       }
1689       else
1690       {
1691         ch.characters(str.toCharArray(), 0, str.length());
1692       }
1693       break;
1694 //    /* case Node.PROCESSING_INSTRUCTION_NODE :
1695 //      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1696 //      break; */
1697     default :
1698       // ignore
1699       break;
1700     }
1701   }
1702 
1703   TreeWalker m_walker = new TreeWalker(null);
1704 
1705   /**
1706    * Directly create SAX parser events from a subtree.
1707    *
1708    * @param nodeHandle The node ID.
1709    * @param ch A non-null reference to a ContentHandler.
1710    *
1711    * @throws org.xml.sax.SAXException
1712    */
1713   public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
1714           throws org.xml.sax.SAXException
1715   {
1716     TreeWalker treeWalker = m_walker;
1717     ContentHandler prevCH = treeWalker.getContentHandler();
1718 
1719     if(null != prevCH)
1720     {
1721       treeWalker = new TreeWalker(null);
1722     }
1723     treeWalker.setContentHandler(ch);
1724 
1725     try
1726     {
1727       Node node = getNode(nodeHandle);
1728       treeWalker.traverseFragment(node);
1729     }
1730     finally
1731     {
1732       treeWalker.setContentHandler(null);
1733     }
1734   }
1735 
1736   public interface CharacterNodeHandler
1737   {
1738     public void characters(Node node)
1739             throws org.xml.sax.SAXException;
1740   }
1741 
1742   /**
1743    * For the moment all the run time properties are ignored by this
1744    * class.
1745    *
1746    * @param property a <code>String</code> value
1747    * @param value an <code>Object</code> value
1748    */
1749   public void setProperty(String property, Object value)
1750   {
1751   }
1752 
1753   /**
1754    * No source information is available for DOM2DTM, so return
1755    * <code>null</code> here.
1756    *
1757    * @param node an <code>int</code> value
1758    * @return null
1759    */
1760   public SourceLocator getSourceLocatorFor(int node)
1761   {
1762     return null;
1763   }
1764 
1765 }