in daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala [707:826]
def load(
source: DaffodilSchemaSource,
optSchemaURI: Option[URI],
addPositionAttributes: Boolean = false
): scala.xml.Node =
load(source, optSchemaURI, addPositionAttributes, normalizeCRLFtoLF = true)
/**
* package private constructor gives access to normalizeCRLFtoLF feature.
*
* @param source The URI for the XML document which may be a XML or DFDL schema, or just XML data.
* @param optSchemaURI Optional URI for XML schema for the XML source document.
* @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements.
* Defaults to false.
* @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true,
* but some special case situations may require preservation of CRLF/CR.
* @return an scala.xml.Node (Element actually) which is the document element of the source.
*/
private[xml] def load(
source: DaffodilSchemaSource,
optSchemaURI: Option[URI],
addPositionAttributes: Boolean,
normalizeCRLFtoLF: Boolean
): scala.xml.Node = {
//
// First we invoke the validator to explicitly validate the XML against
// the XML Schema (not necessarily a DFDL schema), via the
// javax.xml.validation.Validator's validate method.
//
optSchemaURI.foreach { schemaURI =>
val validator = XercesValidator.fromURIs(Seq(schemaURI))
val inputStream = source.uriForLoading.toURL.openStream()
validator.validateXML(inputStream, errorHandler)
inputStream.close()
//
// Next we have to invoke a regular xerces loader, setup for validation
// because that will actually interpret things like xsi:schemaLocation attributes
// of the root element.
//
// The check of xsi:schemaLocation schemas seems to be the only reason we
// have to run this additional test.
//
// Possibly xsi:noNamespaceSchemaLocation would have the same issue, but as of
// this writing, we have no tests that use that.
//
val parser = parserFromURI(optSchemaURI)
val xrdr: XMLReader = {
val r = parser.getXMLReader()
// We must use XMLReader setProperty() function to set the entity resolver--calling
// setEntityResolver with the Xerces XML reader causes validation to fail for some
// reason (we get a "cvc-elt.1.a: Cannot find the declaration of element 'schema'" error).
// We call the right function below, but unfortunately, scala-xml calls
// setEntityResolver in loadDocument(), which cannot be disabled and scala-xml does not
// want to change. To avoid this, we wrap the Xerces XMLReader in an XMLFilterImpl and
// override setEntityResolver to a no-op. However, XMLFilterImpl parse() calls
// setEntityResolver() on the XMLReader, which for the same reason as before causes
// issues. To fix this, we can override parse() to just pass through to the parent, but
// that means we must override the various set/get handler functions to also pass
// through to the parent.
val w = new XMLFilterImpl(r) {
override def setEntityResolver(resolver: EntityResolver): Unit = {} // no-op
override def parse(input: InputSource): Unit = getParent.parse(input)
override def setContentHandler(handler: ContentHandler): Unit =
getParent.setContentHandler(handler)
override def setDTDHandler(handler: DTDHandler): Unit =
getParent.setDTDHandler(handler)
override def setErrorHandler(handler: ErrorHandler): Unit =
getParent.setErrorHandler(handler)
override def getContentHandler(): ContentHandler =
getParent.getContentHandler()
override def getDTDHandler(): DTDHandler =
getParent.getDTDHandler()
override def getErrorHandler(): ErrorHandler =
getParent.getErrorHandler()
}
w.setErrorHandler(errorHandler)
w.setProperty("http://apache.org/xml/properties/internal/entity-resolver", resolver)
w
}
val saxSource = scala.xml.Source.fromSysId(source.uriForLoading.toString)
try {
// it is important that we call loadDocument to parse/validate the XML instead of
// directly calling xrdr.parse. Although loadDocument does eventually call xrdr.parse,
// it first modifies the reader in a number of ways to prepare it for use with this
// FactoryAdapter, as well as initialize private state that is used by ContentHandler
// functions.
loadDocument(saxSource, xrdr)
} catch {
// can be thrown by the resolver if a schemaLocation of
// an import/include cannot be resolved.
// Regular Xerces doesn't report that as an error.
case spe: SAXParseException => errorHandler.error(spe)
}
// no result, as the errors are reported separately
}
//
// To get reliable xml nodes including conversion of CDATA syntax into
// PCData nodes, we have to use a different loader.
//
val constructingLoader =
new DaffodilConstructingLoader(
source.uriForLoading,
errorHandler,
addPositionAttributes,
normalizeCRLFtoLF
)
val res =
try {
constructingLoader.load() // construct the XML objects for us.
} catch {
case e: SAXParseException => // fatal. We can't successfully load.
throw e // good place for a breakpoint
} finally {
constructingLoader.input.close()
}
res
}