def load()

in daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala [707:826]


  def load(
    source: DaffodilSchemaSource,
    optSchemaURI: Option[URI],
    addPositionAttributes: Boolean = false
  ): scala.xml.Node =
    load(source, optSchemaURI, addPositionAttributes, normalizeCRLFtoLF = true)

  /**
   * package private constructor gives access to normalizeCRLFtoLF feature.
   *
   * @param source The URI for the XML document which may be a XML or DFDL schema, or just XML data.
   * @param optSchemaURI Optional URI for XML schema for the XML source document.
   * @param addPositionAttributes True to add dafint:file dafint:line attributes to all elements.
   *                              Defaults to false.
   * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. This should usually be true,
   *                          but some special case situations may require preservation of CRLF/CR.
   * @return an scala.xml.Node (Element actually) which is the document element of the source.
   */
  private[xml] def load(
    source: DaffodilSchemaSource,
    optSchemaURI: Option[URI],
    addPositionAttributes: Boolean,
    normalizeCRLFtoLF: Boolean
  ): scala.xml.Node = {
    //
    // First we invoke the validator to explicitly validate the XML against
    // the XML Schema (not necessarily a DFDL schema), via the
    // javax.xml.validation.Validator's validate method.
    //
    optSchemaURI.foreach { schemaURI =>
      val validator = XercesValidator.fromURIs(Seq(schemaURI))
      val inputStream = source.uriForLoading.toURL.openStream()
      validator.validateXML(inputStream, errorHandler)
      inputStream.close()
      //
      // Next we have to invoke a regular xerces loader, setup for validation
      // because that will actually interpret things like xsi:schemaLocation attributes
      // of the root element.
      //
      // The check of xsi:schemaLocation schemas seems to be the only reason we
      // have to run this additional test.
      //
      // Possibly xsi:noNamespaceSchemaLocation would have the same issue, but as of
      // this writing, we have no tests that use that.
      //
      val parser = parserFromURI(optSchemaURI)
      val xrdr: XMLReader = {
        val r = parser.getXMLReader()

        // We must use XMLReader setProperty() function to set the entity resolver--calling
        // setEntityResolver with the Xerces XML reader causes validation to fail for some
        // reason (we get a "cvc-elt.1.a: Cannot find the declaration of element 'schema'" error).
        // We call the right function below, but unfortunately, scala-xml calls
        // setEntityResolver in loadDocument(), which cannot be disabled and scala-xml does not
        // want to change. To avoid this, we wrap the Xerces XMLReader in an XMLFilterImpl and
        // override setEntityResolver to a no-op. However, XMLFilterImpl parse() calls
        // setEntityResolver() on the XMLReader, which for the same reason as before causes
        // issues. To fix this, we can override parse() to just pass through to the parent, but
        // that means we must override the various set/get handler functions to also pass
        // through to the parent.
        val w = new XMLFilterImpl(r) {
          override def setEntityResolver(resolver: EntityResolver): Unit = {} // no-op
          override def parse(input: InputSource): Unit = getParent.parse(input)

          override def setContentHandler(handler: ContentHandler): Unit =
            getParent.setContentHandler(handler)
          override def setDTDHandler(handler: DTDHandler): Unit =
            getParent.setDTDHandler(handler)
          override def setErrorHandler(handler: ErrorHandler): Unit =
            getParent.setErrorHandler(handler)
          override def getContentHandler(): ContentHandler =
            getParent.getContentHandler()
          override def getDTDHandler(): DTDHandler =
            getParent.getDTDHandler()
          override def getErrorHandler(): ErrorHandler =
            getParent.getErrorHandler()
        }
        w.setErrorHandler(errorHandler)
        w.setProperty("http://apache.org/xml/properties/internal/entity-resolver", resolver)
        w
      }

      val saxSource = scala.xml.Source.fromSysId(source.uriForLoading.toString)
      try {
        // it is important that we call loadDocument to parse/validate the XML instead of
        // directly calling xrdr.parse. Although loadDocument does eventually call xrdr.parse,
        // it first modifies the reader in a number of ways to prepare it for use with this
        // FactoryAdapter, as well as initialize private state that is used by ContentHandler
        // functions.
        loadDocument(saxSource, xrdr)
      } catch {
        // can be thrown by the resolver if a schemaLocation of
        // an import/include cannot be resolved.
        // Regular Xerces doesn't report that as an error.
        case spe: SAXParseException => errorHandler.error(spe)
      }
      // no result, as the errors are reported separately
    }
    //
    // To get reliable xml nodes including conversion of CDATA syntax into
    // PCData nodes, we have to use a different loader.
    //
    val constructingLoader =
      new DaffodilConstructingLoader(
        source.uriForLoading,
        errorHandler,
        addPositionAttributes,
        normalizeCRLFtoLF
      )
    val res =
      try {
        constructingLoader.load() // construct the XML objects for us.
      } catch {
        case e: SAXParseException => // fatal. We can't successfully load.
          throw e // good place for a breakpoint
      } finally {
        constructingLoader.input.close()
      }
    res
  }