in batik-dom/src/main/java/org/apache/batik/dom/AbstractDocument.java [1283:1625]
protected boolean normalizeDocument(Element e,
boolean cdataSections,
boolean comments,
boolean elementContentWhitepace,
boolean namespaceDeclarations,
boolean namespaces,
boolean splitCdataSections,
DOMErrorHandler errorHandler) {
AbstractElement ae = (AbstractElement) e;
Node n = e.getFirstChild();
while (n != null) {
int nt = n.getNodeType();
if (nt == Node.TEXT_NODE
|| !cdataSections && nt == Node.CDATA_SECTION_NODE) {
// coalesce text nodes
Node t = n;
StringBuffer sb = new StringBuffer();
sb.append(t.getNodeValue());
n = n.getNextSibling();
while (n != null && (n.getNodeType() == Node.TEXT_NODE
|| !cdataSections && n.getNodeType() == Node.CDATA_SECTION_NODE) ) {
sb.append(n.getNodeValue());
Node next = n.getNextSibling();
e.removeChild(n);
n = next;
}
String s = sb.toString();
if (s.length() == 0) {
Node next = n.getNextSibling(); // todo: Jlint says: n can be NULL
e.removeChild(n);
n = next;
continue;
}
if (!s.equals(t.getNodeValue())) {
if (!cdataSections && nt == Node.TEXT_NODE) {
n = createTextNode(s);
e.replaceChild(n, t);
} else {
n = t;
t.setNodeValue(s);
}
} else {
n = t;
}
if (!elementContentWhitepace) {
// remove element content whitespace text nodes
nt = n.getNodeType();
if (nt == Node.TEXT_NODE) {
AbstractText tn = (AbstractText) n;
if (tn.isElementContentWhitespace()) {
Node next = n.getNextSibling();
e.removeChild(n);
n = next;
continue;
}
}
}
if (nt == Node.CDATA_SECTION_NODE && splitCdataSections) {
if (!splitCdata(e, n, errorHandler)) {
return false;
}
}
} else if (nt == Node.CDATA_SECTION_NODE && splitCdataSections) {
// split CDATA sections
if (!splitCdata(e, n, errorHandler)) {
return false;
}
} else if (nt == Node.COMMENT_NODE && !comments) {
// remove comments
Node next = n.getPreviousSibling();
if (next == null) {
next = n.getNextSibling();
}
e.removeChild(n);
n = next;
continue;
}
n = n.getNextSibling();
}
NamedNodeMap nnm = e.getAttributes();
LinkedList toRemove = new LinkedList();
HashMap names = new HashMap(); // todo names is not used ?
for (int i = 0; i < nnm.getLength(); i++) {
Attr a = (Attr) nnm.item(i);
String prefix = a.getPrefix(); // todo : this breaks when a is null
if (a != null && XMLConstants.XMLNS_PREFIX.equals(prefix)
|| a.getNodeName().equals(XMLConstants.XMLNS_PREFIX)) {
if (!namespaceDeclarations) {
// remove namespace declarations
toRemove.add(a);
} else {
// namespace normalization
String ns = a.getNodeValue();
if (a.getNodeValue().equals(XMLConstants.XMLNS_NAMESPACE_URI)
|| !ns.equals(XMLConstants.XMLNS_NAMESPACE_URI)) {
// XXX report error
} else {
names.put(prefix, ns);
}
}
}
}
if (!namespaceDeclarations) {
// remove namespace declarations
for (Object aToRemove : toRemove) {
e.removeAttributeNode((Attr) aToRemove);
}
} else {
if (namespaces) {
// normalize element namespace
String ens = e.getNamespaceURI();
if (ens != null) {
String eprefix = e.getPrefix();
if (!compareStrings(ae.lookupNamespaceURI(eprefix), ens)) {
e.setAttributeNS
(XMLConstants.XMLNS_NAMESPACE_URI,
eprefix == null ? XMLConstants.XMLNS_PREFIX : "xmlns:" + eprefix,
ens);
}
} else {
if (e.getLocalName() == null) {
// report error
} else {
if (ae.lookupNamespaceURI(null) == null) {
e.setAttributeNS
(XMLConstants.XMLNS_NAMESPACE_URI,
XMLConstants.XMLNS_PREFIX,
"");
}
}
}
// normalize attribute namespaces
nnm = e.getAttributes();
for (int i = 0; i < nnm.getLength(); i++) {
Attr a = (Attr) nnm.item(i);
String ans = a.getNamespaceURI();
if (ans != null) {
String apre = a.getPrefix();
if (apre != null
&& (apre.equals(XMLConstants.XML_PREFIX)
|| apre.equals(XMLConstants.XMLNS_PREFIX))
|| ans.equals(XMLConstants.XMLNS_NAMESPACE_URI)) {
continue;
}
String aprens = apre == null ? null : ae.lookupNamespaceURI(apre);
if (apre == null
|| aprens == null
|| !aprens.equals(ans)) {
String newpre = ae.lookupPrefix(ans);
if (newpre != null) {
a.setPrefix(newpre);
} else {
if (apre != null
&& ae.lookupNamespaceURI(apre) == null) {
e.setAttributeNS
(XMLConstants.XMLNS_NAMESPACE_URI,
XMLConstants.XMLNS_PREFIX + ':' + apre,
ans);
} else {
int index = 1;
for (;;) {
newpre = "NS" + index;
if (ae.lookupPrefix(newpre) == null) {
e.setAttributeNS
(XMLConstants.XMLNS_NAMESPACE_URI,
XMLConstants.XMLNS_PREFIX + ':' + newpre,
ans);
a.setPrefix(newpre);
break;
}
}
}
}
}
} else {
if (a.getLocalName() == null) {
// report error
}
}
}
}
}
// check well-formedness
nnm = e.getAttributes();
for (int i = 0; i < nnm.getLength(); i++) {
Attr a = (Attr) nnm.item(i);
if (!checkName(a.getNodeName())) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_IN_NODE_NAME_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.name",
new Object[] { a.getNodeName() },
a,
null))) {
return false;
}
}
}
if (!checkChars(a.getNodeValue())) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.character",
new Object[] {(int) Node.ATTRIBUTE_NODE,
a.getNodeName(),
a.getNodeValue() },
a,
null))) {
return false;
}
}
}
}
for (Node m = e.getFirstChild(); m != null; m = m.getNextSibling()) {
int nt = m.getNodeType();
String s;
switch (nt) {
case Node.TEXT_NODE:
s = m.getNodeValue();
if (!checkChars(s)) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.character",
new Object[] {(int) m.getNodeType(),
m.getNodeName(),
s },
m,
null))) {
return false;
}
}
}
break;
case Node.COMMENT_NODE:
s = m.getNodeValue();
if (!checkChars(s)
|| s.indexOf(XMLConstants.XML_DOUBLE_DASH) != -1
|| s.charAt(s.length() - 1) == '-') {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.character",
new Object[] {(int) m.getNodeType(),
m.getNodeName(),
s },
m,
null))) {
return false;
}
}
}
break;
case Node.CDATA_SECTION_NODE:
s = m.getNodeValue();
if (!checkChars(s)
|| s.indexOf(XMLConstants.XML_CDATA_END) != -1) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.character",
new Object[] {(int) m.getNodeType(),
m.getNodeName(),
s },
m,
null))) {
return false;
}
}
}
break;
case Node.PROCESSING_INSTRUCTION_NODE:
if (m.getNodeName().equalsIgnoreCase
(XMLConstants.XML_PREFIX)) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_IN_NODE_NAME_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.name",
new Object[] { m.getNodeName() },
m,
null))) {
return false;
}
}
}
s = m.getNodeValue();
if (!checkChars(s)
|| s.indexOf(XMLConstants
.XML_PROCESSING_INSTRUCTION_END) != -1) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.character",
new Object[] {(int) m.getNodeType(),
m.getNodeName(),
s },
m,
null))) {
return false;
}
}
}
break;
case Node.ELEMENT_NODE:
if (!checkName(m.getNodeName())) {
if (errorHandler != null) {
if (!errorHandler.handleError(createDOMError(
DOMConstants.DOM_INVALID_CHARACTER_IN_NODE_NAME_ERROR,
DOMError.SEVERITY_ERROR,
"wf.invalid.name",
new Object[] { m.getNodeName() },
m,
null))) {
return false;
}
}
}
if (!normalizeDocument((Element) m,
cdataSections,
comments,
elementContentWhitepace,
namespaceDeclarations,
namespaces,
splitCdataSections,
errorHandler)) {
return false;
}
break;
}
}
return true;
}