in paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetTableStatsExtractor.java [81:171]
private FieldStats toFieldStats(
DataField field, Statistics<?> stats, FieldStatsCollector collector) {
if (stats == null) {
return new FieldStats(null, null, null);
}
long nullCount = stats.getNumNulls();
if (!stats.hasNonNullValue()) {
return collector.convert(new FieldStats(null, null, nullCount));
}
FieldStats fieldStats;
switch (field.type().getTypeRoot()) {
case CHAR:
case VARCHAR:
assertStatsClass(field, stats, BinaryStatistics.class);
BinaryStatistics stringStats = (BinaryStatistics) stats;
fieldStats =
new FieldStats(
BinaryString.fromString(stringStats.minAsString()),
BinaryString.fromString(stringStats.maxAsString()),
nullCount);
break;
case BOOLEAN:
assertStatsClass(field, stats, BooleanStatistics.class);
BooleanStatistics boolStats = (BooleanStatistics) stats;
fieldStats = new FieldStats(boolStats.getMin(), boolStats.getMax(), nullCount);
break;
case DECIMAL:
PrimitiveType primitive = stats.type();
DecimalType decimalType = (DecimalType) (field.type());
int precision = decimalType.getPrecision();
int scale = decimalType.getScale();
fieldStats =
convertStatsToDecimalFieldStats(
primitive, field, stats, precision, scale, nullCount);
break;
case TINYINT:
assertStatsClass(field, stats, IntStatistics.class);
IntStatistics byteStats = (IntStatistics) stats;
fieldStats =
new FieldStats(
(byte) byteStats.getMin(), (byte) byteStats.getMax(), nullCount);
break;
case SMALLINT:
assertStatsClass(field, stats, IntStatistics.class);
IntStatistics shortStats = (IntStatistics) stats;
fieldStats =
new FieldStats(
(short) shortStats.getMin(),
(short) shortStats.getMax(),
nullCount);
break;
case INTEGER:
case DATE:
case TIME_WITHOUT_TIME_ZONE:
assertStatsClass(field, stats, IntStatistics.class);
IntStatistics intStats = (IntStatistics) stats;
fieldStats =
new FieldStats(
Long.valueOf(intStats.getMin()).intValue(),
Long.valueOf(intStats.getMax()).intValue(),
nullCount);
break;
case BIGINT:
assertStatsClass(field, stats, LongStatistics.class);
LongStatistics longStats = (LongStatistics) stats;
fieldStats = new FieldStats(longStats.getMin(), longStats.getMax(), nullCount);
break;
case FLOAT:
assertStatsClass(field, stats, FloatStatistics.class);
FloatStatistics floatStats = (FloatStatistics) stats;
fieldStats = new FieldStats(floatStats.getMin(), floatStats.getMax(), nullCount);
break;
case DOUBLE:
assertStatsClass(field, stats, DoubleStatistics.class);
DoubleStatistics doubleStats = (DoubleStatistics) stats;
fieldStats = new FieldStats(doubleStats.getMin(), doubleStats.getMax(), nullCount);
break;
case TIMESTAMP_WITHOUT_TIME_ZONE:
fieldStats = toTimestampStats(stats, ((TimestampType) field.type()).getPrecision());
break;
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
fieldStats =
toTimestampStats(
stats, ((LocalZonedTimestampType) field.type()).getPrecision());
break;
default:
fieldStats = new FieldStats(null, null, nullCount);
}
return collector.convert(fieldStats);
}