in daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala [203:318]
private def resolveCommon(
nsURI: String,
systemId: String,
baseURIString: String
): Option[URI] = {
init
if (nsURI == null && systemId == null && baseURIString == null) return None
Logger.log.debug(s"nsURI = ${nsURI}, baseURI = ${baseURIString}, systemId = ${systemId}")
val resolvedUri = delegate.resolveURI(nsURI)
val resolvedSystem = delegate.resolveSystem(systemId)
// An Include in a schema with a target namespace should resolve to the systemId and ignore the nsURI
// because the nsURI will resolve to the including schema file.
// This will cause the including schema to be repeatedly parsed resulting in a stack overflow.
lazy val systemIdUri = if (systemId != null) {
new URI(systemId)
} else {
null
}
/**
* Xerces has a bug where it absolutizes systemId i.e the user supplies
* {{{
* <xs:schema...
* ... xsi:schemaLocation="urn:some:namespace /some/path.xsd"
* }}}
* Xerces takes that schemaLocation URI and absolutizes it to {{{ file:/some/path.xsd }}}
* and passes that to our resolveEntity and in turn resolveCommon, which while it's able
* to find the namespace, fails to set the resolvedUri since the file:/some/path.xsd will
* never match anything resolved from our catalog since that'd return something like
* {{{ file:/some/absolute/path/to/some/path.xsd }}}
*
* This is a workaround to that bug where we convert systemId to a URI and check if the
* path (from URI.getPath) matches the end of resolvedUri. Note: This can ignore absolute
* URIs passed in for schemaLocation, but those are edge cases where the user expects
* the namespace to match a different file (i.e what they provide in the schemalocation)
* than what we find in the catalog.
*/
lazy val systemIdPath = if (systemIdUri != null && systemIdUri.getScheme == "file") {
systemIdUri.getPath
} else {
systemId
}
val resolvedId = {
if (resolvedSystem != null && resolvedSystem != resolvedUri) {
resolvedSystem
} else if (
resolvedUri != null && ((systemId == null) || (systemId != null && resolvedUri.endsWith(
systemIdPath
)))
) {
resolvedUri
} else
null // We were unable to resolve the file based on the URI or systemID, so we will return null.
}
val result = (resolvedId, systemId) match {
case (null, null) => {
// This happens in numerous unit tests.
//
// It seems that in some situations the resolver is called
// to attempt to resolve things certain ways. Such as
// providing just the namespace URI, without the systemId.
//
// So the inability to resolve, in this case anyway, is not an error.
//
None
}
case (null, sysId) => {
// We did not get a catalog resolution of the nsURI. We now look for the systemID (which
// comes from the schemaLocation attribute) on classpath or as a file.
val optURI =
try {
val baseURI = new URI(baseURIString)
// DaffodilXMLLoader implements a Xerces API, and Xerces doesn't have a concept of diagnostic path
// that we need for a URISchemaSource to pass in as the context for resolveSchemaLocation.
// We could use some heuristic to come up with a diagnostic path (e.g. call uriToDiagosticPath).
// But we don't actually use the diagnosticPath returned by resolveSchemaLocation here, all we care
// about is getting a URI for Xerces. So the diagnostic path in the schema source doesn't really
// matter as long as it doesn't break resolveSchemaLocation, and an empty diagnostic path works
// fine for that.
val uriSchemaSource = URISchemaSource(Paths.get("").toFile, baseURI)
val contextURI = Some(uriSchemaSource)
val resolved = XMLUtils.resolveSchemaLocation(sysId, contextURI)
// we drop the boolean return part of resolveSchemaLocation because we don't care here
// if a relative schemaLocation was resolved absolutely. Daffodil will detect that
// elsewhere and output a warning.
resolved.map(_._1.uri)
} catch {
case e: IllegalArgumentException =>
throw new SAXParseException(
s"Invalid or unsupported schemaLocation URI: ${e.getMessage}",
null
)
}
optURI match {
case Some(uri) => Logger.log.debug(s"Found schemaLocation: ${uri}.")
case None => {
// We have to explicitly throw this, because returning with a no-resolve does not
// cause Xerces to report an error. Instead you just get later errors about symbols
// that can't be resolved, but it never mentions that an include/import didn't work.
throw new SAXParseException(s"Unable to resolve schemaLocation: $systemId", null)
}
}
optURI
}
case (resolved, _) => {
Logger.log.debug(s"Found via XML Catalog: ${resolved}.")
Some(new URI(resolved))
}
}
result
}