in paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcTableStatsExtractor.java [92:221]
private FieldStats toFieldStats(
DataField field, ColumnStatistics stats, long rowCount, FieldStatsCollector collector) {
long nullCount = rowCount - stats.getNumberOfValues();
if (nullCount == rowCount) {
// all nulls
return collector.convert(new FieldStats(null, null, nullCount));
}
Preconditions.checkState(
(nullCount > 0) == stats.hasNull(),
"Bug in OrcFileStatsExtractor: nullCount is "
+ nullCount
+ " while stats.hasNull() is "
+ stats.hasNull()
+ "!");
FieldStats fieldStats;
switch (field.type().getTypeRoot()) {
case CHAR:
case VARCHAR:
assertStatsClass(field, stats, StringColumnStatistics.class);
StringColumnStatistics stringStats = (StringColumnStatistics) stats;
fieldStats =
new FieldStats(
BinaryString.fromString(stringStats.getMinimum()),
BinaryString.fromString(stringStats.getMaximum()),
nullCount);
break;
case BOOLEAN:
assertStatsClass(field, stats, BooleanColumnStatistics.class);
BooleanColumnStatistics boolStats = (BooleanColumnStatistics) stats;
fieldStats =
new FieldStats(
boolStats.getFalseCount() == 0,
boolStats.getTrueCount() != 0,
nullCount);
break;
case DECIMAL:
assertStatsClass(field, stats, DecimalColumnStatistics.class);
DecimalColumnStatistics decimalStats = (DecimalColumnStatistics) stats;
DecimalType decimalType = (DecimalType) (field.type());
int precision = decimalType.getPrecision();
int scale = decimalType.getScale();
fieldStats =
new FieldStats(
Decimal.fromBigDecimal(
decimalStats.getMinimum().bigDecimalValue(),
precision,
scale),
Decimal.fromBigDecimal(
decimalStats.getMaximum().bigDecimalValue(),
precision,
scale),
nullCount);
break;
case TINYINT:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics byteStats = (IntegerColumnStatistics) stats;
fieldStats =
new FieldStats(
(byte) byteStats.getMinimum(),
(byte) byteStats.getMaximum(),
nullCount);
break;
case SMALLINT:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics shortStats = (IntegerColumnStatistics) stats;
fieldStats =
new FieldStats(
(short) shortStats.getMinimum(),
(short) shortStats.getMaximum(),
nullCount);
break;
case INTEGER:
case TIME_WITHOUT_TIME_ZONE:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics intStats = (IntegerColumnStatistics) stats;
fieldStats =
new FieldStats(
Long.valueOf(intStats.getMinimum()).intValue(),
Long.valueOf(intStats.getMaximum()).intValue(),
nullCount);
break;
case BIGINT:
assertStatsClass(field, stats, IntegerColumnStatistics.class);
IntegerColumnStatistics longStats = (IntegerColumnStatistics) stats;
fieldStats =
new FieldStats(longStats.getMinimum(), longStats.getMaximum(), nullCount);
break;
case FLOAT:
assertStatsClass(field, stats, DoubleColumnStatistics.class);
DoubleColumnStatistics floatStats = (DoubleColumnStatistics) stats;
fieldStats =
new FieldStats(
(float) floatStats.getMinimum(),
(float) floatStats.getMaximum(),
nullCount);
break;
case DOUBLE:
assertStatsClass(field, stats, DoubleColumnStatistics.class);
DoubleColumnStatistics doubleStats = (DoubleColumnStatistics) stats;
fieldStats =
new FieldStats(
doubleStats.getMinimum(), doubleStats.getMaximum(), nullCount);
break;
case DATE:
assertStatsClass(field, stats, DateColumnStatistics.class);
DateColumnStatistics dateStats = (DateColumnStatistics) stats;
fieldStats =
new FieldStats(
DateTimeUtils.toInternal(
new Date(dateStats.getMinimum().getTime())),
DateTimeUtils.toInternal(
new Date(dateStats.getMaximum().getTime())),
nullCount);
break;
case TIMESTAMP_WITHOUT_TIME_ZONE:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
assertStatsClass(field, stats, TimestampColumnStatistics.class);
TimestampColumnStatistics timestampStats = (TimestampColumnStatistics) stats;
fieldStats =
new FieldStats(
Timestamp.fromSQLTimestamp(timestampStats.getMinimum()),
Timestamp.fromSQLTimestamp(timestampStats.getMaximum()),
nullCount);
break;
default:
fieldStats = new FieldStats(null, null, nullCount);
}
return collector.convert(fieldStats);
}