in spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java [236:313]
public ParquetValueReader<?> primitive(
org.apache.iceberg.types.Type.PrimitiveType expected, PrimitiveType primitive) {
ColumnDescriptor desc = type.getColumnDescription(currentPath());
if (primitive.getOriginalType() != null) {
switch (primitive.getOriginalType()) {
case ENUM:
case JSON:
case UTF8:
return new StringReader(desc);
case INT_8:
case INT_16:
case INT_32:
if (expected != null && expected.typeId() == Types.LongType.get().typeId()) {
return new IntAsLongReader(desc);
} else {
return new UnboxedReader<>(desc);
}
case DATE:
case INT_64:
case TIMESTAMP_MICROS:
return new UnboxedReader<>(desc);
case TIMESTAMP_MILLIS:
return new TimestampMillisReader(desc);
case DECIMAL:
DecimalLogicalTypeAnnotation decimal =
(DecimalLogicalTypeAnnotation) primitive.getLogicalTypeAnnotation();
switch (primitive.getPrimitiveTypeName()) {
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
return new BinaryDecimalReader(desc, decimal.getScale());
case INT64:
return new LongDecimalReader(desc, decimal.getPrecision(), decimal.getScale());
case INT32:
return new IntegerDecimalReader(desc, decimal.getPrecision(), decimal.getScale());
default:
throw new UnsupportedOperationException(
"Unsupported base type for decimal: " + primitive.getPrimitiveTypeName());
}
case BSON:
return new ParquetValueReaders.ByteArrayReader(desc);
default:
throw new UnsupportedOperationException(
"Unsupported logical type: " + primitive.getOriginalType());
}
}
switch (primitive.getPrimitiveTypeName()) {
case FIXED_LEN_BYTE_ARRAY:
case BINARY:
if (expected != null && expected.typeId() == TypeID.UUID) {
return new UUIDReader(desc);
}
return new ParquetValueReaders.ByteArrayReader(desc);
case INT32:
if (expected != null && expected.typeId() == TypeID.LONG) {
return new IntAsLongReader(desc);
} else {
return new UnboxedReader<>(desc);
}
case FLOAT:
if (expected != null && expected.typeId() == TypeID.DOUBLE) {
return new FloatAsDoubleReader(desc);
} else {
return new UnboxedReader<>(desc);
}
case BOOLEAN:
case INT64:
case DOUBLE:
return new UnboxedReader<>(desc);
case INT96:
// Impala & Spark used to write timestamps as INT96 without a logical type. For backwards
// compatibility we try to read INT96 as timestamps.
return new TimestampInt96Reader(desc);
default:
throw new UnsupportedOperationException("Unsupported type: " + primitive);
}
}