folly::StringPiece Tokenizer::getNextToken()

in thrift/lib/cpp2/util/DebugString.cpp [148:235]


folly::StringPiece Tokenizer::getNextToken() {
  enum {
    INITIAL_WHITESPACE = 0,
    REGULAR_TOKEN, // anything not inside "" or inside <>
    IN_QUOTES, // inside ""
    IN_QUOTES_IN_ESCAPE, // inside "", and saw \ escape
    IN_LT_TAG, // inside <>, with ltCount unmatched <'s
    TOKEN_DONE
  };
  int8_t state = INITIAL_WHITESPACE;
  int8_t ltCount = 0;

  size_t tokenStartPos = 0, tokenEndPos = 0;
  size_t pos = 0;
  size_t sz = text_.size();
  while (pos < sz && state != TOKEN_DONE) {
    char c = text_[pos++];
    switch (state) {
      case INITIAL_WHITESPACE:
        if (isspace(c)) {
          continue;
        }
        tokenStartPos = pos - 1;
        if (c == '<') { // inmatched '<' tag
          state = IN_LT_TAG;
          ltCount++;
        } else if (c == '"') { // unmatched quote
          state = IN_QUOTES;
        } else if (c == '[' || c == '{' || c == '}' || c == ']') {
          tokenEndPos = pos;
          state = TOKEN_DONE;
        } else {
          state = REGULAR_TOKEN;
        }
        break;
      case REGULAR_TOKEN: // Anything not inside "" or <>
        if (c == '"' || isspace(c)) {
          // Stop the token on seeing "" or space.
          tokenEndPos = pos - 1;
          state = TOKEN_DONE;
        }
        if (c == '<') {
          // Keep attached <> as part of token, e.g. list<i32>
          state = IN_LT_TAG;
          ltCount++;
        }
        break;
      case IN_QUOTES:
        if (c == '\\') {
          state = IN_QUOTES_IN_ESCAPE;
        } else if (c == '"') {
          tokenEndPos = pos; // end token
          state = TOKEN_DONE;
        }
        break;
      case IN_QUOTES_IN_ESCAPE:
        state = IN_QUOTES;
        break;
      case IN_LT_TAG:
        if (c == '>' && --ltCount == 0) {
          tokenEndPos = pos; // end token
          state = TOKEN_DONE;
        } else if (c == '<') {
          ++ltCount;
        }
        break;
    }
  }
  if (state == REGULAR_TOKEN) {
    tokenEndPos = pos;
    state = TOKEN_DONE;
  }
  if (state != TOKEN_DONE) {
    text_.clear();
    return text_;
  }
  assert(tokenEndPos <= sz);
  folly::StringPiece ret =
      text_.subpiece(tokenStartPos, tokenEndPos - tokenStartPos);
  text_.advance(tokenEndPos);
  // clear out ','
  if (ret.endsWith(',')) {
    ret.pop_back();
  } else if (text_.startsWith(',')) {
    text_.pop_front();
  }
  return ret;
}