in src/parquet/statistics.cc [198:251]
void TypedRowGroupStatistics<DType>::UpdateSpaced(const T* values,
const uint8_t* valid_bits,
int64_t valid_bits_offset,
int64_t num_not_null,
int64_t num_null) {
DCHECK(num_not_null >= 0);
DCHECK(num_null >= 0);
IncrementNullCount(num_null);
IncrementNumValues(num_not_null);
// TODO: support distinct count?
if (num_not_null == 0) return;
// Find first valid entry and use that for min/max
// As (num_not_null != 0) there must be one
int64_t length = num_null + num_not_null;
int64_t i = 0;
::arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset,
length);
StatsHelper<T> helper;
for (; i < length; i++) {
// PARQUET-1225: Handle NaNs
if (valid_bits_reader.IsSet() && !helper.IsNaN(values[i])) {
break;
}
valid_bits_reader.Next();
}
// All are NaNs and stats are not set yet
if ((i == length) && helper.IsNaN(values[i - 1])) {
// Don't set has_min_max flag since
// these values must be over-written by valid stats later
if (!has_min_max_) {
SetNaN(&min_);
SetNaN(&max_);
}
return;
}
T min = values[i];
T max = values[i];
for (; i < length; i++) {
if (valid_bits_reader.IsSet()) {
if ((std::ref(*(this->comparator_)))(values[i], min)) {
min = values[i];
} else if ((std::ref(*(this->comparator_)))(max, values[i])) {
max = values[i];
}
}
valid_bits_reader.Next();
}
SetMinMax(min, max);
}