in paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java [148:241]
private SimpleColStats toFieldStats(DataField field, ColumnStatistics stats, long nullCount) {
switch (field.type().getTypeRoot()) {
case CHAR:
case VARCHAR:
assertStatsClass(field, stats, StringColumnStatistics.class);
StringColumnStatistics stringStats = (StringColumnStatistics) stats;
return new SimpleColStats(
BinaryString.fromString(stringStats.getMinimum()),
BinaryString.fromString(stringStats.getMaximum()),
nullCount);
case BOOLEAN:
assertStatsClass(field, stats, BooleanColumnStatistics.class);
BooleanColumnStatistics boolStats = (BooleanColumnStatistics) stats;
return new SimpleColStats(
boolStats.getFalseCount() == 0, boolStats.getTrueCount() != 0, nullCount);
case DECIMAL:
assertStatsClass(field, stats, DecimalColumnStatistics.class);
DecimalColumnStatistics decimalStats = (DecimalColumnStatistics) stats;
DecimalType decimalType = (DecimalType) (field.type());
int precision = decimalType.getPrecision();
int scale = decimalType.getScale();
return new SimpleColStats(
Decimal.fromBigDecimal(
decimalStats.getMinimum().bigDecimalValue(), precision, scale),
Decimal.fromBigDecimal(
decimalStats.getMaximum().bigDecimalValue(), precision, scale),
nullCount);
case TINYINT:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics byteStats = (IntegerColumnStatistics) stats;
return new SimpleColStats(
(byte) byteStats.getMinimum(), (byte) byteStats.getMaximum(), nullCount);
case SMALLINT:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics shortStats = (IntegerColumnStatistics) stats;
return new SimpleColStats(
(short) shortStats.getMinimum(),
(short) shortStats.getMaximum(),
nullCount);
case INTEGER:
case TIME_WITHOUT_TIME_ZONE:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics intStats = (IntegerColumnStatistics) stats;
return new SimpleColStats(
Long.valueOf(intStats.getMinimum()).intValue(),
Long.valueOf(intStats.getMaximum()).intValue(),
nullCount);
case BIGINT:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics longStats = (IntegerColumnStatistics) stats;
return new SimpleColStats(
longStats.getMinimum(), longStats.getMaximum(), nullCount);
case FLOAT:
assertStatsClass(field, stats, DoubleColumnStatistics.class);
DoubleColumnStatistics floatStats = (DoubleColumnStatistics) stats;
return new SimpleColStats(
(float) floatStats.getMinimum(),
(float) floatStats.getMaximum(),
nullCount);
case DOUBLE:
assertStatsClass(field, stats, DoubleColumnStatistics.class);
DoubleColumnStatistics doubleStats = (DoubleColumnStatistics) stats;
return new SimpleColStats(
doubleStats.getMinimum(), doubleStats.getMaximum(), nullCount);
case DATE:
assertStatsClass(field, stats, DateColumnStatistics.class);
DateColumnStatistics dateStats = (DateColumnStatistics) stats;
return new SimpleColStats(
DateTimeUtils.toInternal(new Date(dateStats.getMinimum().getTime())),
DateTimeUtils.toInternal(new Date(dateStats.getMaximum().getTime())),
nullCount);
case TIMESTAMP_WITHOUT_TIME_ZONE:
assertStatsClass(field, stats, TimestampColumnStatistics.class);
TimestampColumnStatistics timestampStats = (TimestampColumnStatistics) stats;
return new SimpleColStats(
Timestamp.fromSQLTimestamp(timestampStats.getMinimum()),
Timestamp.fromSQLTimestamp(timestampStats.getMaximum()),
nullCount);
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
assertStatsClass(field, stats, TimestampColumnStatistics.class);
TimestampColumnStatistics timestampLtzStats = (TimestampColumnStatistics) stats;
return legacyTimestampLtzType
? new SimpleColStats(
Timestamp.fromSQLTimestamp(timestampLtzStats.getMinimum()),
Timestamp.fromSQLTimestamp(timestampLtzStats.getMaximum()),
nullCount)
: new SimpleColStats(
Timestamp.fromInstant(timestampLtzStats.getMinimum().toInstant()),
Timestamp.fromInstant(timestampLtzStats.getMaximum().toInstant()),
nullCount);
default:
return new SimpleColStats(null, null, nullCount);
}
}