in v1/src/main/java/com/google/cloud/teleport/spanner/common/SizedType.java [186:377]
public static SizedType parseSpannerType(String spannerType, Dialect dialect) {
switch (dialect) {
case GOOGLE_STANDARD_SQL:
{
if (spannerType.equals("BOOL")) {
return t(Type.bool(), null);
}
if (spannerType.equals("INT64")) {
return t(Type.int64(), null);
}
if (spannerType.equals("FLOAT32")) {
return t(Type.float32(), null);
}
if (spannerType.equals("FLOAT64")) {
return t(Type.float64(), null);
}
if (spannerType.equals("UUID")) {
return t(Type.uuid(), null);
}
if (spannerType.startsWith("STRING")) {
String sizeStr = spannerType.substring(7, spannerType.length() - 1);
int size = sizeStr.equals("MAX") ? -1 : Integer.parseInt(sizeStr);
return t(Type.string(), size);
}
if (spannerType.startsWith("BYTES")) {
String sizeStr = spannerType.substring(6, spannerType.length() - 1);
int size = sizeStr.equals("MAX") ? -1 : Integer.parseInt(sizeStr);
return t(Type.bytes(), size);
}
if (spannerType.equals("TIMESTAMP")) {
return t(Type.timestamp(), null);
}
if (spannerType.equals("DATE")) {
return t(Type.date(), null);
}
if (spannerType.equals("NUMERIC")) {
return t(Type.numeric(), null);
}
if (spannerType.equals("JSON")) {
return t(Type.json(), null);
}
if (spannerType.equals("TOKENLIST")) {
return t(Type.tokenlist(), null);
}
if (spannerType.startsWith("ARRAY<")) {
// Substring "ARRAY<xxx> or ARRAY<xxx>(vector_length)"
// Handle vector_length annotation
Matcher m = EMBEDDING_VECTOR_PATTERN.matcher(spannerType);
if (m.find()) {
String spannerArrayType = m.group(1);
Integer arrayLength = Integer.parseInt(m.group(2));
SizedType itemType = parseSpannerType(spannerArrayType, dialect);
return t(Type.array(itemType.type), itemType.size, arrayLength);
}
String spannerArrayType = spannerType.substring(6, spannerType.length() - 1);
SizedType itemType = parseSpannerType(spannerArrayType, dialect);
return t(Type.array(itemType.type), itemType.size);
}
if (spannerType.startsWith("PROTO<")) {
// Substring "PROTO<xxx>"
String spannerProtoType = spannerType.substring(6, spannerType.length() - 1);
return t(Type.proto(spannerProtoType), null);
}
if (spannerType.startsWith("ENUM<")) {
// Substring "ENUM<xxx>"
String spannerEnumType = spannerType.substring(5, spannerType.length() - 1);
return t(Type.protoEnum(spannerEnumType), null);
}
if (spannerType.startsWith("STRUCT<")) {
// Substring "STRUCT<xxx>"
String spannerStructType = spannerType.substring(7, spannerType.length() - 1);
ImmutableList.Builder<Type.StructField> fields = ImmutableList.builder();
int current = 0;
// Parse each struct field. These type names are coming from information schema and are
// expected to be correctly formatted. Fields are specified as NAME TYPE and separated
// with commas. Since TYPE can be another struct we cannot simply split on commas, but
// instead we will count opening braces and ignore any commas that are part of field
// type specification.
while (current < spannerStructType.length()) {
int i = current;
// Skip whitespace.
for (; isWhitespace(spannerStructType.charAt(i)); ++i) {}
current = i;
// Read the name.
for (; !isWhitespace(spannerStructType.charAt(i)); ++i) {}
String fieldName = spannerStructType.substring(current, i);
// Skip whitespace.
for (; isWhitespace(spannerStructType.charAt(i)); ++i) {}
current = i;
// Find the end of the type.
int bracketCount = 0;
for (; i < spannerStructType.length(); ++i) {
char c = spannerStructType.charAt(i);
if (c == '<') {
++bracketCount;
} else if (c == '>') {
if (--bracketCount < 0) {
break;
}
} else if (c == ',') {
if (bracketCount == 0) {
break;
}
}
}
if (bracketCount != 0) {
throw new IllegalArgumentException("Unknown spanner type " + spannerType);
}
// Read the type.
SizedType fieldType =
parseSpannerType(spannerStructType.substring(current, i), dialect);
fields.add(Type.StructField.of(fieldName, fieldType.type));
current = i + 1;
}
return t(Type.struct(fields.build()), null);
}
break;
}
case POSTGRESQL:
{
// Handle vector_length annotation
Matcher m = PG_EMBEDDING_VECTOR_PATTERN.matcher(spannerType);
if (m.find()) {
// Substring "xxx[] vector length yyy"
String spannerArrayType = m.group(1);
Integer arrayLength = Integer.parseInt(m.group(2));
SizedType itemType = parseSpannerType(spannerArrayType, dialect);
return t(Type.pgArray(itemType.type), itemType.size, arrayLength);
}
if (spannerType.endsWith("[]")) {
// Substring "xxx[]"
// Must check array type first
String spannerArrayType = spannerType.substring(0, spannerType.length() - 2);
SizedType itemType = parseSpannerType(spannerArrayType, dialect);
return t(Type.pgArray(itemType.type), itemType.size);
}
if (spannerType.equals("boolean")) {
return t(Type.pgBool(), null);
}
if (spannerType.equals("bigint")) {
return t(Type.pgInt8(), null);
}
if (spannerType.equals("real")) {
return t(Type.pgFloat4(), null);
}
if (spannerType.equals("double precision")) {
return t(Type.pgFloat8(), null);
}
if (spannerType.equals("text")) {
return t(Type.pgText(), -1);
}
if (spannerType.equals("uuid")) {
return t(Type.pgUuid(), null);
}
if (spannerType.startsWith("character varying")) {
int size = -1;
if (spannerType.length() > 18) {
String sizeStr = spannerType.substring(18, spannerType.length() - 1);
size = Integer.parseInt(sizeStr);
}
return t(Type.pgVarchar(), size);
}
if (spannerType.equals("bytea")) {
return t(Type.pgBytea(), -1);
}
if (spannerType.equals("timestamp with time zone")) {
return t(Type.pgTimestamptz(), null);
}
if (spannerType.equals("numeric")) {
return t(Type.pgNumeric(), null);
}
if (spannerType.equals("jsonb")) {
return t(Type.pgJsonb(), null);
}
if (spannerType.equals("date")) {
return t(Type.pgDate(), null);
}
if (spannerType.equals("spanner.commit_timestamp")) {
return t(Type.pgSpannerCommitTimestamp(), null);
}
if (spannerType.equals("spanner.tokenlist")) {
return t(Type.pgSpannerTokenlist(), null);
}
break;
}
default:
break;
}
throw new IllegalArgumentException("Unknown spanner type " + spannerType);
}