in amoro-format-mixed/amoro-mixed-hive/src/main/java/org/apache/iceberg/data/parquet/AdaptHiveBaseParquetReaders.java [228:335]
public ParquetValueReader<?> primitive(
org.apache.iceberg.types.Type.PrimitiveType expected, PrimitiveType primitive) {
if (expected == null) {
return null;
}
ColumnDescriptor desc = type.getColumnDescription(currentPath());
if (primitive.getOriginalType() != null) {
switch (primitive.getOriginalType()) {
case ENUM:
case JSON:
case UTF8:
return new ParquetValueReaders.StringReader(desc);
case INT_8:
case INT_16:
case INT_32:
if (expected.typeId() == org.apache.iceberg.types.Type.TypeID.LONG) {
return new ParquetValueReaders.IntAsLongReader(desc);
} else {
return new ParquetValueReaders.UnboxedReader<>(desc);
}
case INT_64:
return new ParquetValueReaders.UnboxedReader<>(desc);
case DATE:
return new DateReader(desc);
case TIMESTAMP_MICROS:
Types.TimestampType tsMicrosType = (Types.TimestampType) expected;
if (tsMicrosType.shouldAdjustToUTC()) {
return new TimestamptzReader(desc);
} else {
return new TimestampReader(desc);
}
case TIMESTAMP_MILLIS:
Types.TimestampType tsMillisType = (Types.TimestampType) expected;
if (tsMillisType.shouldAdjustToUTC()) {
return new TimestamptzMillisReader(desc);
} else {
return new TimestampMillisReader(desc);
}
case TIME_MICROS:
return new TimeReader(desc);
case TIME_MILLIS:
return new TimeMillisReader(desc);
case DECIMAL:
DecimalMetadata decimal = primitive.getDecimalMetadata();
switch (primitive.getPrimitiveTypeName()) {
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
return new ParquetValueReaders.BinaryAsDecimalReader(desc, decimal.getScale());
case INT64:
return new ParquetValueReaders.LongAsDecimalReader(desc, decimal.getScale());
case INT32:
return new ParquetValueReaders.IntegerAsDecimalReader(desc, decimal.getScale());
default:
throw new UnsupportedOperationException(
"Unsupported base type for decimal: " + primitive.getPrimitiveTypeName());
}
case BSON:
return new ParquetValueReaders.BytesReader(desc);
default:
throw new UnsupportedOperationException(
"Unsupported logical type: " + primitive.getOriginalType());
}
}
switch (primitive.getPrimitiveTypeName()) {
case FIXED_LEN_BYTE_ARRAY:
return new FixedReader(desc);
case BINARY:
// Change for mixed-hive table ⬇
if (expected == Types.StringType.get()) {
return new ParquetValueReaders.StringReader(desc);
} else {
return new ParquetValueReaders.BytesReader(desc);
}
// Change for mixed-hive table ⬆
case INT32:
if (expected.typeId() == org.apache.iceberg.types.Type.TypeID.LONG) {
return new ParquetValueReaders.IntAsLongReader(desc);
} else {
return new ParquetValueReaders.UnboxedReader<>(desc);
}
case FLOAT:
if (expected.typeId() == org.apache.iceberg.types.Type.TypeID.DOUBLE) {
return new ParquetValueReaders.FloatAsDoubleReader(desc);
} else {
return new ParquetValueReaders.UnboxedReader<>(desc);
}
case BOOLEAN:
case INT64:
case DOUBLE:
return new ParquetValueReaders.UnboxedReader<>(desc);
// Change for mixed-hive table ⬇
case INT96:
// Impala & Spark used to write timestamps as INT96 without a logical type. For backwards
// compatibility we try to read INT96 as timestamps.
Types.TimestampType tsMicrosType = (Types.TimestampType) expected;
if (tsMicrosType.shouldAdjustToUTC()) {
return new TimestampIntWithTZ96Reader(desc);
} else {
return new TimestampIntWithOutTZ96Reader(desc);
}
// Change for mixed-hive table ⬆
default:
throw new UnsupportedOperationException("Unsupported type: " + primitive);
}
}