Status Advance()

in tensorflow_text/core/kernels/sentence_fragmenter.cc [80:140]


  Status Advance(const UnicodeUtil *util, const Document &document, int index,
                 bool *result) {
    const Token &token = document.tokens()[index];
    const tstring &word = token.word();
    bool no_transition = false;

    bool is_terminal_punc = false;
    TF_RETURN_IF_ERROR(util->IsTerminalPunc(word, &is_terminal_punc));

    bool is_ellipsis = false;
    TF_RETURN_IF_ERROR(util->IsEllipsis(word, &is_ellipsis));

    bool is_close_punc = false;
    TF_RETURN_IF_ERROR(util->IsClosePunc(word, &is_close_punc));

    switch (state_) {
      case INITIAL_STATE:
        if (is_terminal_punc || is_ellipsis ||
            IsPeriodSeparatedAcronym(token) ||
            TokenHasProperty(Token::EMOTICON, token)) {
          first_terminal_punc_index_ = index;
          state_ = COLLECTING_TERMINAL_PUNC;
        }
        break;
      case COLLECTING_TERMINAL_PUNC:

        if (is_terminal_punc || is_ellipsis ||
            TokenHasProperty(Token::EMOTICON, token)) {
          // Stay in COLLECTING_TERMINAL_PUNC state.
        } else if (is_close_punc) {
          first_close_punc_index_ = index;
          state_ = COLLECTING_CLOSE_PUNC;
        } else {
          no_transition = true;
        }
        break;
      case COLLECTING_CLOSE_PUNC:
        if (is_close_punc || is_ellipsis ||
            TokenHasProperty(Token::EMOTICON, token)) {
          // Stay in COLLECTING_CLOSE_PUNC state. We effectively ignore
          // emoticons and ellipses and continue to accept closing punctuation
          // after them.
        } else {
          no_transition = true;
        }
        break;
    }

    if (no_transition) {
      *result = false;
      return Status::OK();
    } else {
      limit_index_ = index + 1;
      if (state_ == COLLECTING_TERMINAL_PUNC) {
        // We've gotten terminal punctuation, but no close punctuation yet.
        first_close_punc_index_ = limit_index_;
      }
      *result = true;
      return Status::OK();
    }
  }