final def parse()

in daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/TextDelimitedParser.scala [45:163]


  final def parse(
    state: PState,
    input: DataInputStream,
    field: DFAField,
    delimIter: DelimiterIterator,
    isDelimRequired: Boolean
  ): Maybe[ParseResult] = {
    Assert.invariant(field != null)

    val lmt = new LongestMatchTracker()

    val fieldReg: Registers = state.dfaRegistersPool.getFromPool("TextDelimitedParserBase1")
    try { // to insure the fieldReg are returned to the pool even if there is an unexpected throw from the I/O layer
      fieldReg.reset(state, input, delimIter) // Initialization

      var stillSearching: Boolean = true
      var beforeDelimiter: DataInputStream.MarkPos = DataInputStream.MarkPos.NoMarkPos
      while (stillSearching) {

        Assert.invariant(beforeDelimiter =#= DataInputStream.MarkPos.NoMarkPos)
        field.run(fieldReg)
        beforeDelimiter = input.markPos

        fieldReg.status match {
          case StateKind.EndOfData => stillSearching = false
          case StateKind.Failed => stillSearching = false
          case StateKind.Paused => {

            delimIter.reset()
            while (delimIter.hasNext()) {
              val d = delimIter.next()
              input.resetPos(beforeDelimiter)
              beforeDelimiter = input.markPos
              val delimReg: Registers =
                state.dfaRegistersPool.getFromPool("TextDelimitedParserBase2")
              delimReg.reset(state, input, delimIter)
              d.run(delimReg)
              if (delimReg.status == StateKind.Succeeded) {
                lmt.successfulMatch(
                  delimReg.matchStartPos,
                  delimReg.delimString,
                  d,
                  delimIter.currentIndex
                )
              }
              state.dfaRegistersPool.returnToPool(delimReg)
            }
            if (!lmt.longestMatches.isEmpty) {
              stillSearching = false
            } else {
              // resume field parse
              // TODO: Please explain here why this is the way one resumes the field dfa?
              // TODO: The above assignment to the actionNum is the only reason that actionNum can't just
              // be a local variable in the run-the-rules loop.
              //
              // I'd like this code better if the flow was different.
              //
              // Right now: When scanning to isolate a field, when we hit a character that could be the first
              // character of some delimiter, we return with status PAUSED, which means PAUSE to see if there is
              // a complete delimiter.
              // If so then we're done with the field. If not, however, then we go around the loop and resume ...
              // and the rub is we resume in the middle of the rules for the current state. This is subtle, and
              // error prone.
              //
              // A better flow would (a) encapsulate all this redundant code better (b) on encountering the
              // first character of a delimiter, transition to a state that represents that we found a possible
              // first character of a delimiter. Then that
              // state's rules would be guarded by finding the longest match delimiter. If found transition to a
              // state indicating a field has been isolated. If the delimiter is not found, then accumulate the character
              // as a constituent of the field, and transition to the start state.
              //
              input.resetPos(
                beforeDelimiter
              ) // reposition input to where we were trying to find a delimiter (but did not)
              beforeDelimiter = DataInputStream.MarkPos.NoMarkPos
              fieldReg.actionNum =
                fieldReg.actionNum + 1 // but force it to goto next rule so it won't just retry what it just did.
              stillSearching = true
            }
          }
        }
      }
      Assert.invariant(beforeDelimiter != DataInputStream.MarkPos.NoMarkPos)
      input.resetPos(beforeDelimiter)
      val result = {
        if (lmt.longestMatches.isEmpty) {
          // there were no delimiter matches
          if (isDelimRequired) Nope
          else {
            val fieldValue: Maybe[String] = {
              val str = fieldReg.resultString.toString
              // TODO: Performance - avoid this copying of the string. We should be able to trim
              // on a CharSequence which is a base of both String and StringBuilder
              // Difficulty is the only common base to String and StringBuilder is CharSequence which is
              // pretty sparse.
              val fieldNoPadding = trimByJustification(str)
              One(fieldNoPadding)
            }
            One(new ParseResult(fieldValue, Nope, lmt.longestMatches))
          }
        } else {
          val fieldValue: Maybe[String] = {
            val str = fieldReg.resultString.toString // TODO: Performance see above.
            val fieldNoPadding = trimByJustification(str)
            One(fieldNoPadding)
          }
          val delim: Maybe[String] = {
            One(lmt.longestMatchedString)
          }

          One(new ParseResult(fieldValue, delim, lmt.longestMatches))
        }
      }
      result
    } finally {
      state.dfaRegistersPool.returnToPool(fieldReg)
      state.dfaRegistersPool.finalCheck()
    }
  }