void TypedRowGroupStatistics::UpdateSpaced()

in src/parquet/statistics.cc [198:251]


void TypedRowGroupStatistics<DType>::UpdateSpaced(const T* values,
                                                  const uint8_t* valid_bits,
                                                  int64_t valid_bits_offset,
                                                  int64_t num_not_null,
                                                  int64_t num_null) {
  DCHECK(num_not_null >= 0);
  DCHECK(num_null >= 0);

  IncrementNullCount(num_null);
  IncrementNumValues(num_not_null);
  // TODO: support distinct count?
  if (num_not_null == 0) return;

  // Find first valid entry and use that for min/max
  // As (num_not_null != 0) there must be one
  int64_t length = num_null + num_not_null;
  int64_t i = 0;
  ::arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset,
                                                    length);
  StatsHelper<T> helper;
  for (; i < length; i++) {
    // PARQUET-1225: Handle NaNs
    if (valid_bits_reader.IsSet() && !helper.IsNaN(values[i])) {
      break;
    }
    valid_bits_reader.Next();
  }

  // All are NaNs and stats are not set yet
  if ((i == length) && helper.IsNaN(values[i - 1])) {
    // Don't set has_min_max flag since
    // these values must be over-written by valid stats later
    if (!has_min_max_) {
      SetNaN(&min_);
      SetNaN(&max_);
    }
    return;
  }

  T min = values[i];
  T max = values[i];
  for (; i < length; i++) {
    if (valid_bits_reader.IsSet()) {
      if ((std::ref(*(this->comparator_)))(values[i], min)) {
        min = values[i];
      } else if ((std::ref(*(this->comparator_)))(max, values[i])) {
        max = values[i];
      }
    }
    valid_bits_reader.Next();
  }

  SetMinMax(min, max);
}