in thrift/lib/cpp2/util/DebugString.cpp [148:235]
folly::StringPiece Tokenizer::getNextToken() {
enum {
INITIAL_WHITESPACE = 0,
REGULAR_TOKEN, // anything not inside "" or inside <>
IN_QUOTES, // inside ""
IN_QUOTES_IN_ESCAPE, // inside "", and saw \ escape
IN_LT_TAG, // inside <>, with ltCount unmatched <'s
TOKEN_DONE
};
int8_t state = INITIAL_WHITESPACE;
int8_t ltCount = 0;
size_t tokenStartPos = 0, tokenEndPos = 0;
size_t pos = 0;
size_t sz = text_.size();
while (pos < sz && state != TOKEN_DONE) {
char c = text_[pos++];
switch (state) {
case INITIAL_WHITESPACE:
if (isspace(c)) {
continue;
}
tokenStartPos = pos - 1;
if (c == '<') { // inmatched '<' tag
state = IN_LT_TAG;
ltCount++;
} else if (c == '"') { // unmatched quote
state = IN_QUOTES;
} else if (c == '[' || c == '{' || c == '}' || c == ']') {
tokenEndPos = pos;
state = TOKEN_DONE;
} else {
state = REGULAR_TOKEN;
}
break;
case REGULAR_TOKEN: // Anything not inside "" or <>
if (c == '"' || isspace(c)) {
// Stop the token on seeing "" or space.
tokenEndPos = pos - 1;
state = TOKEN_DONE;
}
if (c == '<') {
// Keep attached <> as part of token, e.g. list<i32>
state = IN_LT_TAG;
ltCount++;
}
break;
case IN_QUOTES:
if (c == '\\') {
state = IN_QUOTES_IN_ESCAPE;
} else if (c == '"') {
tokenEndPos = pos; // end token
state = TOKEN_DONE;
}
break;
case IN_QUOTES_IN_ESCAPE:
state = IN_QUOTES;
break;
case IN_LT_TAG:
if (c == '>' && --ltCount == 0) {
tokenEndPos = pos; // end token
state = TOKEN_DONE;
} else if (c == '<') {
++ltCount;
}
break;
}
}
if (state == REGULAR_TOKEN) {
tokenEndPos = pos;
state = TOKEN_DONE;
}
if (state != TOKEN_DONE) {
text_.clear();
return text_;
}
assert(tokenEndPos <= sz);
folly::StringPiece ret =
text_.subpiece(tokenStartPos, tokenEndPos - tokenStartPos);
text_.advance(tokenEndPos);
// clear out ','
if (ret.endsWith(',')) {
ret.pop_back();
} else if (text_.startsWith(',')) {
text_.pop_front();
}
return ret;
}