in daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/TextDelimitedUnparser.scala [81:250]
final def escapeBlock(
input: DataInputStream,
field: DFAField,
delims: Array[DFADelimiter],
blockEnd: DFADelimiter,
escapeEscapeChar: MaybeChar,
state: UState
): (String, Boolean) = {
Assert.invariant(delims != null)
Assert.invariant(field != null)
val blockEndDelimIter = new AllDelimiterIterator(ArrayBuffer(blockEnd))
val successes: ArrayBuffer[(DFADelimiter, Registers)] = ArrayBuffer.empty
// We need to recognize the blockEnd in addition to the other pieces of
// text we should escape
//
val fieldReg: Registers = state.dfaRegistersPool.getFromPool("escapeBlock1")
val fieldEscapesIter = {
val ab = ArrayBuffer(ArraySeq.unsafeWrapArray(blockEnd +: delims): _*)
new AllDelimiterIterator(ab)
}
val delimIter = new AllDelimiterIterator(ArrayBuffer(ArraySeq.unsafeWrapArray(delims): _*))
fieldReg.reset(state, input, fieldEscapesIter)
var stillSearching: Boolean = true
var numCharsInserted: Int = 0
var shouldGenerateEscapeBlock: Boolean = false
var beforeDelimiter: DataInputStream.MarkPos = DataInputStream.MarkPos.NoMarkPos
while (stillSearching) {
// We want to examine each character and if it's not part of a
// delimiter append it to the 'field' member. If it is part of
// a delimiter we want to perform a longest match. We then
// append the 'escape' character to the 'field' member followed
// by the matched delimiter. We then start the process again
// starting with the character following that of the matched
// delimiter until we reach end of data.
//
Assert.invariant(beforeDelimiter =#= DataInputStream.MarkPos.NoMarkPos)
field.run(fieldReg)
val dfaStatus = fieldReg.status
fieldReg.actionNum = 0 // unnecessary?
beforeDelimiter = input.markPos
dfaStatus match {
case StateKind.EndOfData => stillSearching = false
case StateKind.Failed => stillSearching = false
case StateKind.Paused => {
// If we got here, that means we found a character that could be the
// beginning of a delimiter. This could be many different things
// (parent separator, block end, etc), so we must figure that out
// We check for a blockEnd first, if it exists then we MUST
// generate an escape block
val blockEndReg: Registers = state.dfaRegistersPool.getFromPool("escapeBlock2")
blockEndReg.reset(state, input, blockEndDelimIter)
blockEnd.run(blockEndReg)
val blockEndStatus = blockEndReg.status
blockEndStatus match {
case StateKind.Succeeded if (!escapeEscapeChar.isDefined) => {
// Found an escapeEnd, which requires an escapeEscapeChar, but one was not provided
beforeDelimiter = DataInputStream.MarkPos.NoMarkPos
UnparseError(
One(context.schemaFileLocation),
One(state.currentLocation),
"escapeEscapeCharacter was not defined but the escapeBlockEnd (%s) was present in the data.",
blockEnd.lookingFor
)
}
case StateKind.Succeeded => {
// Found an escapeEnd, that means we must insert an escapeEscapeChar
val afterBlockEnd =
input.markPos // save position immediately after the blockEnd we found.
//
// note. The appendToField code assumes that a character needs to be read from
// the input. However, the input has already been advanced past the blockEnd
// (In call to blockEnd.run above.
//
// TODO: scrutinize DFA code. Why does appendToField call commitOneChar anyway?
// It may not need to do that anymore, and that would allow us to get rid of the
// input.markPos above, and input.resetPos below.
//
fieldReg.appendToField(escapeEscapeChar.get)
numCharsInserted += 1 // this is how many escape characters we've inserted to escape the delims/blockEnds in the data
val blockEnd = blockEndReg.delimString
fieldReg.appendToField(blockEnd)
input.resetPos(afterBlockEnd) // we want to resume scanning after the blockEnd.
shouldGenerateEscapeBlock = true
beforeDelimiter = DataInputStream.MarkPos.NoMarkPos
fieldReg.actionNum = 0
fieldReg.state = 0
// now go around the while loop again
}
case _ => {
// We did not find a block end, so check for the other pieces
// of text we should generate an escape block for (e.g. separators, terminators)
delimIter.reset()
while (delimIter.hasNext()) {
val d = delimIter.next()
val delimReg: Registers = state.dfaRegistersPool.getFromPool("escapeBlock3")
input.resetPos(beforeDelimiter)
beforeDelimiter = input.markPos
delimReg.reset(state, input, delimIter)
d.run(delimReg)
val delimStatus = delimReg.status
delimStatus match {
case StateKind.Succeeded => {
// found a matching delmiter that may need escaping. It is
// possible that there is another delimiter that is a
// longer match or is matched earlier, so add it to a list
// and we will determine that later.
successes += (d -> delimReg)
}
case _ => {
// this delim did not match, ignore it and discard its register
state.dfaRegistersPool.returnToPool(delimReg)
}
}
}
input.resetPos(beforeDelimiter)
beforeDelimiter = DataInputStream.MarkPos.NoMarkPos
fieldReg.resetChars(state)
if (successes.isEmpty) {
// did not match any delimiters, go to the next rule in the
// field DFA, effectively resuming the field parse. This is possible
// if the field.run() call found a character that could
// potentially start a delimiter, but it ended up not matching
// any delimiters.
fieldReg.actionNum = fieldReg.actionNum + 1
} else {
// matched a delimiter, need to handle escaping it
val (_, matchedReg) = longestMatch(successes).get
val delim = matchedReg.delimString
fieldReg.appendToField(
delim
) // the delim just becomes field content, because we already had an escape block start.
successes.foreach { case (d, r) => state.dfaRegistersPool.returnToPool(r) }
successes.clear()
shouldGenerateEscapeBlock = true
fieldReg.actionNum = 0
fieldReg.state = 0
}
// now go around the while loop again
} // end case StateKind.Failed for finding the block end.
} // end blockEndStatus.status match
state.dfaRegistersPool.returnToPool(blockEndReg)
} // end case StateKind.Paused for finding any of block end or a delimiter
} // end dfaStatus match
} // end while stillSearching
//
// The only way we drop out here is if stillSearching is false.
// That happens on end of data (end of the infoset string.. we're unparsing here)
// It means we're done.
//
// No need to now advance the input, because we're unparsing, and we're done
// so this input is going to be discarded since it existed only to enable
// us to reuse the DFA for determining when to escape data while unparsing.
val resString = fieldReg.resultString.toString
state.dfaRegistersPool.returnToPool(fieldReg)
state.dfaRegistersPool.finalCheck()
(resString, shouldGenerateEscapeBlock)
}