in odps-data-carrier/meta-processor/src/main/java/com/aliyun/odps/datacarrier/metaprocessor/HiveTypeTransformer.java [79:191]
public static TypeTransformResult toOdpsType(String hiveType, ODPS_VERSION odpsVersion,
boolean hiveCompatible) {
hiveType = hiveType.toUpperCase().trim();
String transformedType = null;
Risk risk = Risk.getNoRisk();
if (hiveType.matches(TINYINT)) {
transformedType = "TINYINT";
} else if (hiveType.matches(SMALLINT)) {
transformedType = "SMALLINT";
} else if (hiveType.matches(INT)) {
transformedType = "INT";
} else if (hiveType.matches(BIGINT)) {
transformedType = "BIGINT";
} else if (hiveType.matches(FLOAT)) {
transformedType = "FLOAT";
} else if (hiveType.matches(DOUBLE)) {
transformedType = "DOUBLE";
} else if (hiveType.matches(DECIMAL)) {
transformedType = "DECIMAL";
risk = Risk.getInCompatibleTypeRisk(hiveType, transformedType,
DECIMAL_INCOMPATIBILITY_REASON);
} else if (hiveType.matches(TIMESTAMP)) {
transformedType = "TIMESTAMP";
risk = Risk.getInCompatibleTypeRisk(hiveType, transformedType,
TIMESTAMP_INCOMPATIBILITY_REASON);
} else if (hiveType.matches(DATE)) {
// If odps version is 2.0 and hive.compatible is true, transformedType = DATE
if (ODPS_VERSION.ODPS_V2.equals(odpsVersion) && hiveCompatible) {
transformedType = hiveType;
} else {
transformedType = "DATETIME";
}
} else if (hiveType.matches(STRING)) {
transformedType = "STRING";
risk = Risk.getInCompatibleTypeRisk(hiveType, transformedType, STRING_INCOMPATIBILITY_REASON);
} else if (hiveType.matches(VARCHAR)) {
Pattern pattern = Pattern.compile(VARCHAR);
Matcher matcher = pattern.matcher(hiveType);
matcher.matches();
transformedType = "VARCHAR" + matcher.group(1);
} else if (hiveType.matches(CHAR)) {
if (ODPS_VERSION.ODPS_V2.equals(odpsVersion) && hiveCompatible) {
transformedType = hiveType;
} else {
Pattern pattern = Pattern.compile(CHAR);
Matcher matcher = pattern.matcher(hiveType);
matcher.matches();
transformedType = "STRING";
}
} else if (hiveType.matches(BOOLEAN)) {
transformedType = "BOOLEAN";
} else if (hiveType.matches(BINARY)) {
transformedType = "BINARY";
} else if (hiveType.matches(ARRAY)) {
Pattern pattern = Pattern.compile(ARRAY);
Matcher matcher = pattern.matcher(hiveType);
matcher.matches();
TypeTransformResult elementTypeTransformResult =
toOdpsType(matcher.group(1).trim(), odpsVersion, hiveCompatible);
transformedType = "ARRAY" + "<" + elementTypeTransformResult.getTransformedType() + ">";
} else if (hiveType.matches(MAP)) {
Pattern pattern = Pattern.compile(MAP);
Matcher matcher = pattern.matcher(hiveType);
matcher.matches();
// The type of key in a map must be a primitive type, so there is no comma in its type
// definition. So we can split the type tuple of key and value by the first comma.
String typeTuple = matcher.group(1);
int firstCommaIdx = typeTuple.indexOf(',');
String keyType = typeTuple.substring(0, firstCommaIdx).trim();
String valueType = typeTuple.substring(firstCommaIdx + 1).trim();
TypeTransformResult keyTypeTransformResult =
toOdpsType(keyType, odpsVersion, hiveCompatible);
TypeTransformResult valueTypeTransformResult =
toOdpsType(valueType, odpsVersion, hiveCompatible);
transformedType = "MAP<" + keyTypeTransformResult.getTransformedType() + "," +
valueTypeTransformResult.getTransformedType() + ">";
} else if (hiveType.matches(STRUCT)) {
Pattern pattern = Pattern.compile(STRUCT);
Matcher matcher = pattern.matcher(hiveType);
matcher.matches();
// Since the type definition of a struct can be very complex and may contain any possible
// character in a type definition, we have to split the type list properly so that we can
// handle them recursively later.
List<String> fieldDefinitions = splitStructFields(matcher.group(1));
List<String> odpsFieldDefinitions = new ArrayList<>();
for (String fieldDefinition : fieldDefinitions) {
// Remove comments, not supported
int commentIdx = fieldDefinition.toUpperCase().indexOf("COMMENT");
if (commentIdx != -1) {
fieldDefinition = fieldDefinition.substring(0, commentIdx);
}
// The type of a struct field can be another struct, which may contain colons. So we have
// to split the field definition by the first colon.
int firstColonIdx = fieldDefinition.indexOf(':');
String fieldName = fieldDefinition.substring(0, firstColonIdx).trim();
String fieldType = fieldDefinition.substring(firstColonIdx + 1).trim();
TypeTransformResult fieldTypeTransformResult =
toOdpsType(fieldType, odpsVersion, hiveCompatible);
odpsFieldDefinitions.add(
fieldName + ":" + fieldTypeTransformResult.getTransformedType());
}
transformedType = "STRUCT<" + String.join(",", odpsFieldDefinitions) + ">";
} else {
risk = Risk.getUnsupportedTypeRisk(hiveType);
}
return new TypeTransformResult(DATASOURCE_TYPE.HIVE, hiveType, transformedType, risk);
}