in src/org/apache/xml/dtm/ref/dom2dtm/DOM2DTM.java [327:594]
protected boolean nextNode()
{
// Non-recursive one-fetch-at-a-time depth-first traversal with
// attribute/namespace nodes and white-space stripping.
// Navigating the DOM is simple, navigating the DTM is simple;
// keeping track of both at once is a trifle baroque but at least
// we've avoided most of the special cases.
if (m_nodesAreProcessed)
return false;
// %REVIEW% Is this local copy Really Useful from a performance
// point of view? Or is this a false microoptimization?
Node pos=m_pos;
Node next=null;
int nexttype=NULL;
// Navigate DOM tree
do
{
// Look down to first child.
if (pos.hasChildNodes())
{
next = pos.getFirstChild();
// %REVIEW% There's probably a more elegant way to skip
// the doctype. (Just let it go and Suppress it?
if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
next=next.getNextSibling();
// Push DTM context -- except for children of Entity References,
// which have no DTM equivalent and cause no DTM navigation.
if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
{
m_last_parent=m_last_kid;
m_last_kid=NULL;
// Whitespace-handler context stacking
if(null != m_wsfilter)
{
short wsv =
m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
? getShouldStripWhitespace()
: (DTMWSFilter.STRIP == wsv);
pushShouldStripWhitespace(shouldStrip);
} // if(m_wsfilter)
}
}
// If that fails, look up and right (but not past root!)
else
{
if(m_last_kid!=NULL)
{
// Last node posted at this level had no more children
// If it has _no_ children, we need to record that.
if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
m_firstch.setElementAt(NULL,m_last_kid);
}
while(m_last_parent != NULL)
{
// %REVIEW% There's probably a more elegant way to
// skip the doctype. (Just let it go and Suppress it?
next = pos.getNextSibling();
if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
next=next.getNextSibling();
if(next!=null)
break; // Found it!
// No next-sibling found. Pop the DOM.
pos=pos.getParentNode();
if(pos==null)
{
// %TBD% Should never arise, but I want to be sure of that...
if(JJK_DEBUG)
{
System.out.println("***** DOM2DTM Pop Control Flow problem");
for(;;); // Freeze right here!
}
}
// The only parents in the DTM are Elements. However,
// the DOM could contain EntityReferences. If we
// encounter one, pop it _without_ popping DTM.
if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
{
// Nothing needs doing
if(JJK_DEBUG)
System.out.println("***** DOM2DTM popping EntRef");
}
else
{
popShouldStripWhitespace();
// Fix and pop DTM
if(m_last_kid==NULL)
m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
else
m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
}
}
if(m_last_parent==NULL)
next=null;
}
if(next!=null)
nexttype=next.getNodeType();
// If it's an entity ref, advance past it.
//
// %REVIEW% Should we let this out the door and just suppress it?
// More work, but simpler code, more likely to be correct, and
// it doesn't happen very often. We'd get rid of the loop too.
if (ENTITY_REFERENCE_NODE == nexttype)
pos=next;
}
while (ENTITY_REFERENCE_NODE == nexttype);
// Did we run out of the tree?
if(next==null)
{
m_nextsib.setElementAt(NULL,0);
m_nodesAreProcessed = true;
m_pos=null;
if(JJK_DEBUG)
{
System.out.println("***** DOM2DTM Crosscheck:");
for(int i=0;i<m_nodes.size();++i)
System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
}
return false;
}
// Text needs some special handling:
//
// DTM may skip whitespace. This is handled by the suppressNode flag, which
// when true will keep the DTM node from being created.
//
// DTM only directly records the first DOM node of any logically-contiguous
// sequence. The lastTextNode value will be set to the last node in the
// contiguous sequence, and -- AFTER the DTM addNode -- can be used to
// advance next over this whole block. Should be simpler than special-casing
// the above loop for "Was the logically-preceeding sibling a text node".
//
// Finally, a DTM node should be considered a CDATASection only if all the
// contiguous text it covers is CDATASections. The first Text should
// force DTM to Text.
boolean suppressNode=false;
Node lastTextNode=null;
nexttype=next.getNodeType();
// nexttype=pos.getNodeType();
if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
{
// If filtering, initially assume we're going to suppress the node
suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
// Scan logically contiguous text (siblings, plus "flattening"
// of entity reference boundaries).
Node n=next;
while(n!=null)
{
lastTextNode=n;
// Any Text node means DTM considers it all Text
if(TEXT_NODE == n.getNodeType())
nexttype=TEXT_NODE;
// Any non-whitespace in this sequence blocks whitespace
// suppression
suppressNode &=
XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
n=logicalNextDOMTextNode(n);
}
}
// Special handling for PIs: Some DOMs represent the XML
// Declaration as a PI. This is officially incorrect, per the DOM
// spec, but is considered a "wrong but tolerable" temporary
// workaround pending proper handling of these fields in DOM Level
// 3. We want to recognize and reject that case.
else if(PROCESSING_INSTRUCTION_NODE==nexttype)
{
suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
}
if(!suppressNode)
{
// Inserting next. NOTE that we force the node type; for
// coalesced Text, this records CDATASections adjacent to
// ordinary Text as Text.
int nextindex=addNode(next,m_last_parent,m_last_kid,
nexttype);
m_last_kid=nextindex;
if(ELEMENT_NODE == nexttype)
{
int attrIndex=NULL; // start with no previous sib
// Process attributes _now_, rather than waiting.
// Simpler control flow, makes NS cache available immediately.
NamedNodeMap attrs=next.getAttributes();
int attrsize=(attrs==null) ? 0 : attrs.getLength();
if(attrsize>0)
{
for(int i=0;i<attrsize;++i)
{
// No need to force nodetype in this case;
// addNode() will take care of switching it from
// Attr to Namespace if necessary.
attrIndex=addNode(attrs.item(i),
nextindex,attrIndex,NULL);
m_firstch.setElementAt(DTM.NULL,attrIndex);
// If the xml: prefix is explicitly declared
// we don't need to synthesize one.
//
// NOTE that XML Namespaces were not originally
// defined as being namespace-aware (grrr), and
// while the W3C is planning to fix this it's
// safer for now to test the QName and trust the
// parsers to prevent anyone from redefining the
// reserved xmlns: prefix
if(!m_processedFirstElement
&& "xmlns:xml".equals(attrs.item(i).getNodeName()))
m_processedFirstElement=true;
}
// Terminate list of attrs, and make sure they aren't
// considered children of the element
} // if attrs exist
if(!m_processedFirstElement)
{
// The DOM might not have an explicit declaration for the
// implicit "xml:" prefix, but the XPath data model
// requires that this appear as a Namespace Node so we
// have to synthesize one. You can think of this as
// being a default attribute defined by the XML
// Namespaces spec rather than by the DTD.
attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
(Element)next,"xml",NAMESPACE_DECL_NS,
makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
),
nextindex,attrIndex,NULL);
m_firstch.setElementAt(DTM.NULL,attrIndex);
m_processedFirstElement=true;
}
if(attrIndex!=NULL)
m_nextsib.setElementAt(DTM.NULL,attrIndex);
} //if(ELEMENT_NODE)
} // (if !suppressNode)
// Text postprocessing: Act on values stored above
if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
{
// %TBD% If nexttype was forced to TEXT, patch the DTM node
next=lastTextNode; // Advance the DOM cursor over contiguous text
}
// Remember where we left off.
m_pos=next;
return true;
}