in paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSchemaConverter.java [78:235]
private static Type convertToParquetType(String name, DataType type, int fieldId, int depth) {
Type.Repetition repetition =
type.isNullable() ? Type.Repetition.OPTIONAL : Type.Repetition.REQUIRED;
switch (type.getTypeRoot()) {
case CHAR:
case VARCHAR:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition)
.as(LogicalTypeAnnotation.stringType())
.named(name)
.withId(fieldId);
case BOOLEAN:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition)
.named(name)
.withId(fieldId);
case BINARY:
case VARBINARY:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition)
.named(name)
.withId(fieldId);
case DECIMAL:
int precision = ((DecimalType) type).getPrecision();
int scale = ((DecimalType) type).getScale();
if (is32BitDecimal(precision)) {
return Types.primitive(INT32, repetition)
.as(LogicalTypeAnnotation.decimalType(scale, precision))
.named(name)
.withId(fieldId);
} else if (is64BitDecimal(precision)) {
return Types.primitive(INT64, repetition)
.as(LogicalTypeAnnotation.decimalType(scale, precision))
.named(name)
.withId(fieldId);
} else {
return Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
.as(LogicalTypeAnnotation.decimalType(scale, precision))
.length(computeMinBytesForDecimalPrecision(precision))
.named(name)
.withId(fieldId);
}
case TINYINT:
return Types.primitive(INT32, repetition)
.as(LogicalTypeAnnotation.intType(8, true))
.named(name)
.withId(fieldId);
case SMALLINT:
return Types.primitive(INT32, repetition)
.as(LogicalTypeAnnotation.intType(16, true))
.named(name)
.withId(fieldId);
case INTEGER:
return Types.primitive(INT32, repetition).named(name).withId(fieldId);
case BIGINT:
return Types.primitive(INT64, repetition).named(name).withId(fieldId);
case FLOAT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition)
.named(name)
.withId(fieldId);
case DOUBLE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition)
.named(name)
.withId(fieldId);
case DATE:
return Types.primitive(INT32, repetition)
.as(LogicalTypeAnnotation.dateType())
.named(name)
.withId(fieldId);
case TIME_WITHOUT_TIME_ZONE:
return Types.primitive(INT32, repetition)
.as(
LogicalTypeAnnotation.timeType(
true, LogicalTypeAnnotation.TimeUnit.MILLIS))
.named(name)
.withId(fieldId);
case TIMESTAMP_WITHOUT_TIME_ZONE:
TimestampType timestampType = (TimestampType) type;
return createTimestampWithLogicalType(
name, timestampType.getPrecision(), repetition, false)
.withId(fieldId);
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) type;
return createTimestampWithLogicalType(
name, localZonedTimestampType.getPrecision(), repetition, true)
.withId(fieldId);
case ARRAY:
ArrayType arrayType = (ArrayType) type;
Type elementParquetType =
convertToParquetType(
LIST_ELEMENT_NAME,
arrayType.getElementType(),
fieldId,
depth + 1)
.withId(SpecialFields.getArrayElementFieldId(fieldId, depth + 1));
return ConversionPatterns.listOfElements(repetition, name, elementParquetType)
.withId(fieldId);
case MAP:
MapType mapType = (MapType) type;
DataType keyType = mapType.getKeyType();
if (keyType.isNullable()) {
// key is nullable, but Parquet does not support nullable keys, so we configure
// it as not nullable
keyType = keyType.copy(false);
}
Type mapKeyParquetType =
convertToParquetType(MAP_KEY_NAME, keyType, fieldId, depth + 1)
.withId(SpecialFields.getMapKeyFieldId(fieldId, depth + 1));
Type mapValueParquetType =
convertToParquetType(
MAP_VALUE_NAME, mapType.getValueType(), fieldId, depth + 1)
.withId(SpecialFields.getMapValueFieldId(fieldId, depth + 1));
return ConversionPatterns.mapType(
repetition,
name,
MAP_REPEATED_NAME,
mapKeyParquetType,
mapValueParquetType)
.withId(fieldId);
case MULTISET:
MultisetType multisetType = (MultisetType) type;
DataType elementType = multisetType.getElementType();
if (elementType.isNullable()) {
// element type is nullable, but Parquet does not support nullable map keys,
// so we configure it as not nullable
elementType = elementType.copy(false);
}
Type multisetKeyParquetType =
convertToParquetType(MAP_KEY_NAME, elementType, fieldId, depth + 1)
.withId(SpecialFields.getMapKeyFieldId(fieldId, depth + 1));
Type multisetValueParquetType =
convertToParquetType(MAP_VALUE_NAME, new IntType(false), fieldId, depth + 1)
.withId(SpecialFields.getMapValueFieldId(fieldId, depth + 1));
return ConversionPatterns.mapType(
repetition,
name,
MAP_REPEATED_NAME,
multisetKeyParquetType,
multisetValueParquetType)
.withId(fieldId);
case ROW:
RowType rowType = (RowType) type;
return new GroupType(repetition, name, convertToParquetTypes(rowType))
.withId(fieldId);
case VARIANT:
return Types.buildGroup(repetition)
.addField(
Types.primitive(
PrimitiveType.PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED)
.named(Variant.VALUE))
.addField(
Types.primitive(
PrimitiveType.PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED)
.named(Variant.METADATA))
.named(name);
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}