in daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala [696:796]
private def removeAttributes1(
n: Node,
ns: Seq[NS],
parentScope: Option[NamespaceBinding]
): NodeSeq = {
val res = n match {
case e @ Elem(prefix, label, attributes, scope, children @ _*) => {
val filteredScope = if (ns.length > 0) filterScope(scope, ns) else xml.TopScope
// If the filtered scope is logically the same as the parent scope, use
// the parent scope. Scala uses references to determine if scopes are
// the same during pretty printing. However, scopes are immutable, so
// the filter algorithm creates new scopes. Because of this, we need to
// ignore the newly filtered scope if it is logically the same as the
// parent so that the scala pretty printer doesn't see them as
// different scopes.
val newScope = parentScope match {
case Some(ps) => if (ps == filteredScope) ps else filteredScope
case None => filteredScope
}
val newChildren: NodeSeq = children.flatMap { removeAttributes1(_, ns, Some(newScope)) }
// Important to merge adjacent text. Otherwise when comparing
// two structures that print out the same, they might not be equal
// because they have different length lists of text nodes
//
// Ex: <foo>A</foo> creates an element containing TWO
// text nodes. But coming from the Daffodil Infoset, a string like
// that would be just one text node.
// Similarly <foo>abc<![CDATA[def]]>ghi</foo> has 3 child nodes.
// The middle one is PCData. The two around it are Text.
// Both Text and PCData are Atom[String].
// Note: as of 2018-04-30, Mike Beckerle said: I am unable to reproduce the above.
// The first example: <foo>A</foo>.child returns an array buffer with 1 child in it
// which is a Text node. The <foo>abc<![CDATA[def]]>ghi</foo> also has only
// one Text node. That said, this is from typing them at the scala shell.
//
// I suspect the above observations require that the scala.xml.parsing.ConstructingParser
// is used. We do use this, because while the regular XML loader coalesces
// text nodes well, but doesn't preserve whitespace for CDATA regions well. That's why we use the
// scala.xml.parser.ConstructingParser, which doesn't coalesce text nodes
// so well, and that's what motivates this explicit coalesce pass.
//
// See test test_scala_loader_cdata_bug - which characterizes the behavior
// that is problematic for us in the standard loader, and why we have to use
// the ConstructingParser.
//
val textMergedChildren = coalesceAdjacentTextNodes(newChildren)
val newPrefix = if (prefixInScope(prefix, newScope)) prefix else null
val newAttributes = attributes.filter { m =>
m match {
case xsiNilAttr @ PrefixedAttribute(_, "nil", Text("true"), _)
if (NS(xsiNilAttr.getNamespace(e)) == XMLUtils.XSI_NAMESPACE) => {
true
}
//
// This tolerates xsi:nil='true' when xsi has no definition at all.
case xsiNilAttr @ PrefixedAttribute("xsi", "nil", Text("true"), _)
if (xsiNilAttr.getNamespace(e) == null) => {
true
}
case dafIntAttr @ PrefixedAttribute(pre, _, _, _)
if (pre ne null) && (dafIntAttr.getNamespace(
e
) == XMLUtils.DAFFODIL_INTERNAL_NAMESPACE.toString) => {
Assert.invariant(pre != "")
false // drop dafint attributes.
}
case xsiTypeAttr @ PrefixedAttribute(_, "type", _, _)
if (NS(xsiTypeAttr.getNamespace(e)) == XMLUtils.XSI_NAMESPACE) => {
// TODO: actually check xsi:type attributes are correct according
// to the schema--requires schema-awareness in TDML Runner.
// Do not hide xsi:type since it is used for hints for type aware
// comparisons.
true
}
case xsiTypeAttr @ PrefixedAttribute("xsi", "type", _, _) => {
// TODO: actually check xsi:type attributes are correct according
// to the schema--requires schema-awareness in TDML Runner.
// Do not hide xsi:type since it is used for hints for type aware
// comparisons.
true
}
case attr =>
true // keep all other attributes
}
}
Elem(newPrefix, label, newAttributes, newScope, true, textMergedChildren: _*)
}
case c: scala.xml.Comment => NodeSeq.Empty // remove comments
case other => other
}
res
}