in daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/TextDelimitedUnparser.scala [256:395]
def escapeCharacter(
input: DataInputStream,
field: DFAField,
delims: Array[DFADelimiter],
hasEscCharAsDelimiter: Boolean,
escapeChar: Char,
escapeEscapeChar: MaybeChar,
state: UState
): (String, Boolean) = {
Assert.invariant(delims != null)
Assert.invariant(field != null)
if (hasEscCharAsDelimiter)
UnparseError(
One(context.schemaFileLocation),
One(state.currentLocation),
"The dfdl:terminator and dfdl:separator may not begin with the dfdl:escapeCharacter: '%s'.",
escapeChar
)
val successes: ArrayBuffer[(DFADelimiter, Registers)] = ArrayBuffer.empty
val fieldReg: Registers = state.dfaRegistersPool.getFromPool("escapeCharacter1")
val delimIter = new AllDelimiterIterator(ArrayBuffer(ArraySeq.unsafeWrapArray(delims): _*))
fieldReg.reset(state, input, delimIter)
var stillSearching: Boolean = true
fieldReg.state = 0 // initial state is 0
var numCharsInserted: Int = 0
var escapeOccurred: Boolean = false
var beforeDelimiter: DataInputStream.MarkPos = DataInputStream.MarkPos.NoMarkPos
while (stillSearching) {
// We want to examine each character and if it's not part of a
// delimiter append it to the 'field' member. If it is part of
// a delimiter we want to perform a longest match. We then
// append the 'escape' character to the 'field' member followed
// by the matched delimiter. We then start the process again
// starting with the character following that of the matched
// delimiter until we reach end of data.
//
field.run(fieldReg)
val dfaStatus = fieldReg.status
beforeDelimiter = input.markPos
fieldReg.actionNum = 0
dfaStatus match {
case StateKind.EndOfData => stillSearching = false
case StateKind.Failed => stillSearching = false
case StateKind.Paused => {
// If we got here, that means we found a character that could be the
// beginning of a delimiter. So we must search through the delimiters
// and see if any match
delimIter.reset()
while (delimIter.hasNext()) {
val d = delimIter.next()
val delimReg: Registers = state.dfaRegistersPool.getFromPool("escapeCharacter2")
delimReg.reset(state, input, delimIter)
input.resetPos(beforeDelimiter)
beforeDelimiter = input.markPos
d.run(delimReg)
val delimStatus = delimReg.status
delimStatus match {
case StateKind.Succeeded => {
// found a matching delmiter that we may need to escape. It is
// possible that there is another delimiter that is a
// longer match or is matched earlier, so add it to a list
// and we will determine that later.
successes += (d -> delimReg)
}
case _ => {
// this delim did not match, ignore it and discard its register
state.dfaRegistersPool.returnToPool(delimReg)
input.resetPos(beforeDelimiter)
}
}
}
if (successes.isEmpty) {
// did not match any delimiters, go to the next rule in the
// field, DFA effectively resuming the field parse. This is possible
// if the field.run() call found a character that could
// potentially start a delimiter, but it ended up not matching
// any delimiters.
fieldReg.actionNum = fieldReg.actionNum + 1
} else {
// matched a delimiter, need to handle escaping it
val (matchedDelim, matchedReg) = longestMatch(successes).get
if (
matchedDelim.lookingFor.length() == 1 && matchedDelim.lookingFor(0) =#= escapeChar
) {
if (escapeEscapeChar.isDefined)
fieldReg.appendToField(escapeEscapeChar.get)
else
UnparseError(
One(context.schemaFileLocation),
One(state.currentLocation),
"escapeEscapeCharacter was not defined but the escapeCharacter (%s) was present in the data.",
escapeChar
)
} else { fieldReg.appendToField(escapeChar) }
val delim = matchedReg.delimString
delim.foreach { fieldReg.appendToField(_) }
// position the input stream after the winning (longest)
// delimiter
//
input.resetPos(beforeDelimiter)
Assert.invariant(input.skipChars(delim.length, state))
fieldReg.resetChars(state)
successes.foreach { case (d, r) => state.dfaRegistersPool.returnToPool(r) }
successes.clear()
stillSearching = true
escapeOccurred = true
numCharsInserted += 1
// resume field parse
fieldReg.actionNum = 0
fieldReg.state = 0
}
}
}
}
// No need to now advance the input, because we're unparsing, and we're done
// so this input is going to be discarded since it existed only to enable
// us to reuse the DFA for determining when to escape data while unparsing.
val resString = fieldReg.resultString.toString
state.dfaRegistersPool.returnToPool(fieldReg)
state.dfaRegistersPool.finalCheck()
(resString, escapeOccurred)
}