double tdigest::get_rank()

in tdigest/include/tdigest_impl.hpp [94:148]


double tdigest<T, A>::get_rank(T value) const {
  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
  if (std::isnan(value)) throw std::invalid_argument("operation is undefined for NaN");
  if (value < min_) return 0;
  if (value > max_) return 1;
  // one centroid and value == min_ == max_
  if ((centroids_.size() + buffer_.size()) == 1) return 0.5;

  const_cast<tdigest*>(this)->compress(); // side effect

  // left tail
  const T first_mean = centroids_.front().get_mean();
  if (value < first_mean) {
    if (first_mean - min_ > 0) {
      if (value == min_) return 0.5 / centroids_weight_;
      return (1.0 + (value - min_) / (first_mean - min_) * (centroids_.front().get_weight() / 2.0 - 1.0)); // ?
    }
    return 0; // should never happen
  }

  // right tail
  const T last_mean = centroids_.back().get_mean();
  if (value > last_mean) {
    if (max_ - last_mean > 0) {
      if (value == max_) return 1.0 - 0.5 / centroids_weight_;
        return 1.0 - ((1.0 + (max_ - value) / (max_ - last_mean) * (centroids_.back().get_weight() / 2.0 - 1.0)) / centroids_weight_); // ?
    }
    return 1; // should never happen
  }

  auto lower = std::lower_bound(centroids_.begin(), centroids_.end(), centroid(value, 1), centroid_cmp());
  if (lower == centroids_.end()) throw std::logic_error("lower == end in get_rank()");
  auto upper = std::upper_bound(lower, centroids_.end(), centroid(value, 1), centroid_cmp());
  if (upper == centroids_.begin()) throw std::logic_error("upper == begin in get_rank()");
  if (value < lower->get_mean()) --lower;
  if (upper == centroids_.end() || !((upper - 1)->get_mean() < value)) --upper;
  double weight_below = 0;
  auto it = centroids_.begin();
  while (it != lower) {
    weight_below += it->get_weight();
    ++it;
  }
  weight_below += lower->get_weight() / 2.0;
  double weight_delta = 0;
  while (it != upper) {
    weight_delta += it->get_weight();
    ++it;
  }
  weight_delta -= lower->get_weight() / 2.0;
  weight_delta += upper->get_weight() / 2.0;
  if (upper->get_mean() - lower->get_mean() > 0) {
    return (weight_below + weight_delta * (value - lower->get_mean()) / (upper->get_mean() - lower->get_mean())) / centroids_weight_;
  }
  return (weight_below + weight_delta / 2.0) / centroids_weight_;
}