in paimon-format/src/main/java/org/apache/paimon/format/parquet/reader/ParquetSplitReaderUtil.java [392:537]
private static ParquetField constructField(DataField dataField, ColumnIO columnIO) {
boolean required = columnIO.getType().getRepetition() == REQUIRED;
int repetitionLevel = columnIO.getRepetitionLevel();
int definitionLevel = columnIO.getDefinitionLevel();
DataType type = dataField.type();
String filedName = dataField.name();
if (type instanceof RowType) {
GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
RowType rowType = (RowType) type;
ImmutableList.Builder<ParquetField> fieldsBuilder = ImmutableList.builder();
List<String> fieldNames = rowType.getFieldNames();
List<DataField> childrens = rowType.getFields();
for (int i = 0; i < childrens.size(); i++) {
fieldsBuilder.add(
constructField(
childrens.get(i),
lookupColumnByName(groupColumnIO, fieldNames.get(i))));
}
return new ParquetGroupField(
type,
repetitionLevel,
definitionLevel,
required,
fieldsBuilder.build(),
groupColumnIO.getFieldPath());
}
if (type instanceof VariantType) {
GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
ImmutableList.Builder<ParquetField> fieldsBuilder = ImmutableList.builder();
PrimitiveColumnIO value =
(PrimitiveColumnIO) lookupColumnByName(groupColumnIO, Variant.VALUE);
fieldsBuilder.add(
new ParquetPrimitiveField(
new BinaryType(),
required,
value.getColumnDescriptor(),
value.getId(),
value.getFieldPath()));
PrimitiveColumnIO metadata =
(PrimitiveColumnIO) lookupColumnByName(groupColumnIO, Variant.METADATA);
fieldsBuilder.add(
new ParquetPrimitiveField(
new BinaryType(),
required,
metadata.getColumnDescriptor(),
metadata.getId(),
metadata.getFieldPath()));
return new ParquetGroupField(
type,
repetitionLevel,
definitionLevel,
required,
fieldsBuilder.build(),
groupColumnIO.getFieldPath());
}
if (type instanceof MapType) {
GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
MapType mapType = (MapType) type;
ParquetField keyField =
constructField(
new DataField(0, "", mapType.getKeyType()),
keyValueColumnIO.getChild(0));
ParquetField valueField =
constructField(
new DataField(0, "", mapType.getValueType()),
keyValueColumnIO.getChild(1));
return new ParquetGroupField(
type,
repetitionLevel,
definitionLevel,
required,
ImmutableList.of(keyField, valueField),
groupColumnIO.getFieldPath());
}
if (type instanceof MultisetType) {
GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO);
MultisetType multisetType = (MultisetType) type;
ParquetField keyField =
constructField(
new DataField(0, "", multisetType.getElementType()),
keyValueColumnIO.getChild(0));
ParquetField valueField =
constructField(
new DataField(0, "", new IntType()), keyValueColumnIO.getChild(1));
return new ParquetGroupField(
type,
repetitionLevel,
definitionLevel,
required,
ImmutableList.of(keyField, valueField),
groupColumnIO.getFieldPath());
}
if (type instanceof ArrayType) {
ArrayType arrayType = (ArrayType) type;
ColumnIO elementTypeColumnIO;
if (columnIO instanceof GroupColumnIO) {
GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO;
if (!StringUtils.isNullOrWhitespaceOnly(filedName)) {
while (!Objects.equals(groupColumnIO.getName(), filedName)) {
groupColumnIO = (GroupColumnIO) groupColumnIO.getChild(0);
}
elementTypeColumnIO = groupColumnIO;
} else {
if (arrayType.getElementType() instanceof RowType) {
elementTypeColumnIO = groupColumnIO;
} else {
elementTypeColumnIO = groupColumnIO.getChild(0);
}
}
} else if (columnIO instanceof PrimitiveColumnIO) {
elementTypeColumnIO = columnIO;
} else {
throw new RuntimeException(String.format("Unknown ColumnIO, %s", columnIO));
}
ParquetField field =
constructField(
new DataField(0, "", arrayType.getElementType()),
getArrayElementColumn(elementTypeColumnIO));
if (repetitionLevel == field.getRepetitionLevel()) {
repetitionLevel = columnIO.getParent().getRepetitionLevel();
}
return new ParquetGroupField(
type,
repetitionLevel,
definitionLevel,
required,
ImmutableList.of(field),
columnIO.getFieldPath());
}
PrimitiveColumnIO primitiveColumnIO = (PrimitiveColumnIO) columnIO;
return new ParquetPrimitiveField(
type,
required,
primitiveColumnIO.getColumnDescriptor(),
primitiveColumnIO.getId(),
primitiveColumnIO.getFieldPath());
}