def escapeCharacter()

in daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/TextDelimitedUnparser.scala [256:395]


  def escapeCharacter(
    input: DataInputStream,
    field: DFAField,
    delims: Array[DFADelimiter],
    hasEscCharAsDelimiter: Boolean,
    escapeChar: Char,
    escapeEscapeChar: MaybeChar,
    state: UState
  ): (String, Boolean) = {
    Assert.invariant(delims != null)
    Assert.invariant(field != null)
    if (hasEscCharAsDelimiter)
      UnparseError(
        One(context.schemaFileLocation),
        One(state.currentLocation),
        "The dfdl:terminator and dfdl:separator may not begin with the dfdl:escapeCharacter: '%s'.",
        escapeChar
      )

    val successes: ArrayBuffer[(DFADelimiter, Registers)] = ArrayBuffer.empty
    val fieldReg: Registers = state.dfaRegistersPool.getFromPool("escapeCharacter1")

    val delimIter = new AllDelimiterIterator(ArrayBuffer(ArraySeq.unsafeWrapArray(delims): _*))

    fieldReg.reset(state, input, delimIter)

    var stillSearching: Boolean = true
    fieldReg.state = 0 // initial state is 0
    var numCharsInserted: Int = 0
    var escapeOccurred: Boolean = false
    var beforeDelimiter: DataInputStream.MarkPos = DataInputStream.MarkPos.NoMarkPos

    while (stillSearching) {

      // We want to examine each character and if it's not part of a
      // delimiter append it to the 'field' member.  If it is part of
      // a delimiter we want to perform a longest match.  We then
      // append the 'escape' character to the 'field' member followed
      // by the matched delimiter.  We then start the process again
      // starting with the character following that of the matched
      // delimiter until we reach end of data.
      //
      field.run(fieldReg)
      val dfaStatus = fieldReg.status
      beforeDelimiter = input.markPos

      fieldReg.actionNum = 0

      dfaStatus match {
        case StateKind.EndOfData => stillSearching = false
        case StateKind.Failed => stillSearching = false
        case StateKind.Paused => {
          // If we got here, that means we found a character that could be the
          // beginning of a delimiter. So we must search through the delimiters
          // and see if any match
          delimIter.reset()
          while (delimIter.hasNext()) {
            val d = delimIter.next()
            val delimReg: Registers = state.dfaRegistersPool.getFromPool("escapeCharacter2")
            delimReg.reset(state, input, delimIter)
            input.resetPos(beforeDelimiter)
            beforeDelimiter = input.markPos
            d.run(delimReg)
            val delimStatus = delimReg.status
            delimStatus match {
              case StateKind.Succeeded => {
                // found a matching delmiter that we may need to escape. It is
                // possible that there is another delimiter that is a
                // longer match or is matched earlier, so add it to a list
                // and we will determine that later.
                successes += (d -> delimReg)
              }
              case _ => {
                // this delim did not match, ignore it and discard its register
                state.dfaRegistersPool.returnToPool(delimReg)
                input.resetPos(beforeDelimiter)
              }
            }
          }

          if (successes.isEmpty) {
            // did not match any delimiters, go to the next rule in the
            // field, DFA effectively resuming the field parse. This is possible
            // if the field.run() call found a character that could
            // potentially start a delimiter, but it ended up not matching
            // any delimiters.
            fieldReg.actionNum = fieldReg.actionNum + 1
          } else {
            // matched a delimiter, need to handle escaping it
            val (matchedDelim, matchedReg) = longestMatch(successes).get
            if (
              matchedDelim.lookingFor.length() == 1 && matchedDelim.lookingFor(0) =#= escapeChar
            ) {
              if (escapeEscapeChar.isDefined)
                fieldReg.appendToField(escapeEscapeChar.get)
              else
                UnparseError(
                  One(context.schemaFileLocation),
                  One(state.currentLocation),
                  "escapeEscapeCharacter was not defined but the escapeCharacter (%s) was present in the data.",
                  escapeChar
                )
            } else { fieldReg.appendToField(escapeChar) }

            val delim = matchedReg.delimString
            delim.foreach { fieldReg.appendToField(_) }

            // position the input stream after the winning (longest)
            // delimiter
            //
            input.resetPos(beforeDelimiter)
            Assert.invariant(input.skipChars(delim.length, state))
            fieldReg.resetChars(state)
            successes.foreach { case (d, r) => state.dfaRegistersPool.returnToPool(r) }
            successes.clear()
            stillSearching = true

            escapeOccurred = true

            numCharsInserted += 1

            // resume field parse

            fieldReg.actionNum = 0
            fieldReg.state = 0

          }
        }
      }
    }
    // No need to now advance the input, because we're unparsing, and we're done
    // so this input is going to be discarded since it existed only to enable
    // us to reuse the DFA for determining when to escape data while unparsing.
    val resString = fieldReg.resultString.toString

    state.dfaRegistersPool.returnToPool(fieldReg)
    state.dfaRegistersPool.finalCheck()

    (resString, escapeOccurred)
  }