in paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java [283:402]
public static DataField convertToPaimonField(Type parquetType) {
LogicalTypeAnnotation logicalType = parquetType.getLogicalTypeAnnotation();
DataType paimonDataType;
if (parquetType.isPrimitive()) {
switch (parquetType.asPrimitiveType().getPrimitiveTypeName()) {
case BINARY:
if (logicalType instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
paimonDataType = DataTypes.STRING();
} else {
paimonDataType = DataTypes.BYTES();
}
break;
case BOOLEAN:
paimonDataType = DataTypes.BOOLEAN();
break;
case FLOAT:
paimonDataType = DataTypes.FLOAT();
break;
case DOUBLE:
paimonDataType = DataTypes.DOUBLE();
break;
case INT32:
if (logicalType instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType =
(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalType;
paimonDataType =
new DecimalType(decimalType.getPrecision(), decimalType.getScale());
} else if (logicalType
instanceof LogicalTypeAnnotation.IntLogicalTypeAnnotation) {
LogicalTypeAnnotation.IntLogicalTypeAnnotation intType =
(LogicalTypeAnnotation.IntLogicalTypeAnnotation) logicalType;
int bitWidth = intType.getBitWidth();
if (bitWidth == 8) {
paimonDataType = DataTypes.TINYINT();
} else if (bitWidth == 16) {
paimonDataType = DataTypes.SMALLINT();
} else {
paimonDataType = DataTypes.INT();
}
} else if (logicalType
instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation) {
paimonDataType = DataTypes.DATE();
} else if (logicalType
instanceof LogicalTypeAnnotation.TimeLogicalTypeAnnotation) {
paimonDataType = DataTypes.TIME();
} else {
paimonDataType = DataTypes.INT();
}
break;
case INT64:
if (logicalType instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType =
(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalType;
paimonDataType =
new DecimalType(decimalType.getPrecision(), decimalType.getScale());
} else if (logicalType
instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) {
LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampType =
(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) logicalType;
int precision =
timestampType
.getUnit()
.equals(LogicalTypeAnnotation.TimeUnit.MILLIS)
? 3
: 6;
paimonDataType =
timestampType.isAdjustedToUTC()
? new LocalZonedTimestampType(precision)
: new TimestampType(precision);
} else {
paimonDataType = DataTypes.BIGINT();
}
break;
case INT96:
paimonDataType = new TimestampType(9);
break;
case FIXED_LEN_BYTE_ARRAY:
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType =
(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalType;
paimonDataType =
new DecimalType(decimalType.getPrecision(), decimalType.getScale());
break;
default:
throw new UnsupportedOperationException("Unsupported type: " + parquetType);
}
if (parquetType.getRepetition().equals(Type.Repetition.REQUIRED)) {
paimonDataType = paimonDataType.notNull();
}
return new DataField(
parquetType.getId().intValue(), parquetType.getName(), paimonDataType);
} else {
GroupType groupType = parquetType.asGroupType();
if (logicalType instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
paimonDataType =
new ArrayType(
convertToPaimonField(parquetListElementType(groupType)).type());
} else if (logicalType instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation) {
Pair<Type, Type> keyValueType = parquetMapKeyValueType(groupType);
paimonDataType =
new MapType(
// Since parquet does not support nullable key, when converting
// back to Paimon, set as nullable by default.
convertToPaimonField(keyValueType.getLeft()).type().nullable(),
convertToPaimonField(keyValueType.getRight()).type());
} else {
paimonDataType =
new RowType(
groupType.getFields().stream()
.map(ParquetSchemaConverter::convertToPaimonField)
.collect(Collectors.toList()));
}
}
if (parquetType.getRepetition().equals(Type.Repetition.REQUIRED)) {
paimonDataType = paimonDataType.notNull();
}
return new DataField(parquetType.getId().intValue(), parquetType.getName(), paimonDataType);
}