in xtable-core/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java [189:230]
public FileStats getColumnStatsForFile(AddFile addFile, List<InternalField> fields) {
if (StringUtils.isEmpty(addFile.stats())) {
return FileStats.builder().columnStats(Collections.emptyList()).numRecords(0).build();
}
// TODO: Additional work needed to track maps & arrays.
try {
DeltaStats deltaStats = MAPPER.readValue(addFile.stats(), DeltaStats.class);
collectUnsupportedStats(deltaStats.getAdditionalStats());
Map<String, Object> fieldPathToMaxValue = flattenStatMap(deltaStats.getMaxValues());
Map<String, Object> fieldPathToMinValue = flattenStatMap(deltaStats.getMinValues());
Map<String, Object> fieldPathToNullCount = flattenStatMap(deltaStats.getNullCount());
List<ColumnStat> columnStats =
fields.stream()
.filter(field -> fieldPathToMaxValue.containsKey(field.getPath()))
.map(
field -> {
String fieldPath = field.getPath();
Object minValue =
DeltaValueConverter.convertFromDeltaColumnStatValue(
fieldPathToMinValue.get(fieldPath), field.getSchema());
Object maxValue =
DeltaValueConverter.convertFromDeltaColumnStatValue(
fieldPathToMaxValue.get(fieldPath), field.getSchema());
Number nullCount = (Number) fieldPathToNullCount.get(fieldPath);
Range range = Range.vector(minValue, maxValue);
return ColumnStat.builder()
.field(field)
.numValues(deltaStats.getNumRecords())
.numNulls(nullCount.longValue())
.range(range)
.build();
})
.collect(CustomCollectors.toList(fields.size()));
return FileStats.builder()
.columnStats(columnStats)
.numRecords(deltaStats.getNumRecords())
.build();
} catch (IOException ex) {
throw new ParseException("Unable to parse stats json", ex);
}
}