in amoro-format-mixed/amoro-mixed-spark/v3.5/amoro-mixed-spark-3.5/src/main/java/org/apache/amoro/spark/reader/SparkParquetReaders.java [216:296]
public ParquetValueReader<?> primitive(
org.apache.iceberg.types.Type.PrimitiveType expected, PrimitiveType primitive) {
ColumnDescriptor desc = type.getColumnDescription(currentPath());
if (primitive.getOriginalType() != null) {
switch (primitive.getOriginalType()) {
case ENUM:
case JSON:
case UTF8:
return new StringReader(desc);
case INT_8:
case INT_16:
case INT_32:
if (expected != null && expected.typeId() == Types.LongType.get().typeId()) {
return new ParquetValueReaders.IntAsLongReader(desc);
} else {
return new ParquetValueReaders.UnboxedReader(desc);
}
case DATE:
case INT_64:
case TIMESTAMP_MICROS:
return new ParquetValueReaders.UnboxedReader<>(desc);
case TIMESTAMP_MILLIS:
return new TimestampMillisReader(desc);
case DECIMAL:
DecimalMetadata decimal = primitive.getDecimalMetadata();
switch (primitive.getPrimitiveTypeName()) {
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
return new BinaryDecimalReader(desc, decimal.getScale());
case INT64:
return new LongDecimalReader(desc, decimal.getPrecision(), decimal.getScale());
case INT32:
return new IntegerDecimalReader(desc, decimal.getPrecision(), decimal.getScale());
default:
throw new UnsupportedOperationException(
"Unsupported base type for decimal: " + primitive.getPrimitiveTypeName());
}
case BSON:
return new ParquetValueReaders.ByteArrayReader(desc);
default:
throw new UnsupportedOperationException(
"Unsupported logical type: " + primitive.getOriginalType());
}
}
switch (primitive.getPrimitiveTypeName()) {
case FIXED_LEN_BYTE_ARRAY:
case BINARY:
// Change for mixed-format table ⬇
if (expected == Types.StringType.get()) {
return new StringReader(desc);
} else {
return new ParquetValueReaders.ByteArrayReader(desc);
}
// Change for mixed-format table ⬆
case INT32:
if (expected != null && expected.typeId() == org.apache.iceberg.types.Type.TypeID.LONG) {
return new ParquetValueReaders.IntAsLongReader(desc);
} else {
return new ParquetValueReaders.UnboxedReader<>(desc);
}
case FLOAT:
if (expected != null
&& expected.typeId() == org.apache.iceberg.types.Type.TypeID.DOUBLE) {
return new ParquetValueReaders.FloatAsDoubleReader(desc);
} else {
return new ParquetValueReaders.UnboxedReader<>(desc);
}
case BOOLEAN:
case INT64:
case DOUBLE:
return new ParquetValueReaders.UnboxedReader<>(desc);
case INT96:
// Impala & Spark used to write timestamps as INT96 without a logical type. For backwards
// compatibility we try to read INT96 as timestamps.
return new TimestampInt96Reader(desc);
default:
throw new UnsupportedOperationException("Unsupported type: " + primitive);
}
}