in paimon-common/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java [112:193]
private static List<Token> tokenize(String chars) {
final List<Token> tokens = new ArrayList<>();
final StringBuilder builder = new StringBuilder();
for (int cursor = 0; cursor < chars.length(); cursor++) {
char curChar = chars.charAt(cursor);
switch (curChar) {
case CHAR_BEGIN_SUBTYPE:
tokens.add(
new Token(
TokenType.BEGIN_SUBTYPE,
cursor,
Character.toString(CHAR_BEGIN_SUBTYPE)));
break;
case CHAR_END_SUBTYPE:
tokens.add(
new Token(
TokenType.END_SUBTYPE,
cursor,
Character.toString(CHAR_END_SUBTYPE)));
break;
case CHAR_BEGIN_PARAMETER:
tokens.add(
new Token(
TokenType.BEGIN_PARAMETER,
cursor,
Character.toString(CHAR_BEGIN_PARAMETER)));
break;
case CHAR_END_PARAMETER:
tokens.add(
new Token(
TokenType.END_PARAMETER,
cursor,
Character.toString(CHAR_END_PARAMETER)));
break;
case CHAR_LIST_SEPARATOR:
tokens.add(
new Token(
TokenType.LIST_SEPARATOR,
cursor,
Character.toString(CHAR_LIST_SEPARATOR)));
break;
case CHAR_DOT:
tokens.add(
new Token(
TokenType.IDENTIFIER_SEPARATOR,
cursor,
Character.toString(CHAR_DOT)));
break;
case CHAR_STRING:
builder.setLength(0);
cursor = consumeEscaped(builder, chars, cursor, CHAR_STRING);
tokens.add(new Token(TokenType.LITERAL_STRING, cursor, builder.toString()));
break;
case CHAR_IDENTIFIER:
builder.setLength(0);
cursor = consumeEscaped(builder, chars, cursor, CHAR_IDENTIFIER);
tokens.add(new Token(TokenType.IDENTIFIER, cursor, builder.toString()));
break;
default:
if (Character.isWhitespace(curChar)) {
continue;
}
if (isDigit(curChar)) {
builder.setLength(0);
cursor = consumeInt(builder, chars, cursor);
tokens.add(new Token(TokenType.LITERAL_INT, cursor, builder.toString()));
break;
}
builder.setLength(0);
cursor = consumeIdentifier(builder, chars, cursor);
final String token = builder.toString();
final String normalizedToken = token.toUpperCase();
if (KEYWORDS.contains(normalizedToken)) {
tokens.add(new Token(TokenType.KEYWORD, cursor, normalizedToken));
} else {
tokens.add(new Token(TokenType.IDENTIFIER, cursor, token));
}
}
}
return tokens;
}