in tdigest/include/tdigest_impl.hpp [94:148]
double tdigest<T, A>::get_rank(T value) const {
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if (std::isnan(value)) throw std::invalid_argument("operation is undefined for NaN");
if (value < min_) return 0;
if (value > max_) return 1;
// one centroid and value == min_ == max_
if ((centroids_.size() + buffer_.size()) == 1) return 0.5;
const_cast<tdigest*>(this)->compress(); // side effect
// left tail
const T first_mean = centroids_.front().get_mean();
if (value < first_mean) {
if (first_mean - min_ > 0) {
if (value == min_) return 0.5 / centroids_weight_;
return (1.0 + (value - min_) / (first_mean - min_) * (centroids_.front().get_weight() / 2.0 - 1.0)); // ?
}
return 0; // should never happen
}
// right tail
const T last_mean = centroids_.back().get_mean();
if (value > last_mean) {
if (max_ - last_mean > 0) {
if (value == max_) return 1.0 - 0.5 / centroids_weight_;
return 1.0 - ((1.0 + (max_ - value) / (max_ - last_mean) * (centroids_.back().get_weight() / 2.0 - 1.0)) / centroids_weight_); // ?
}
return 1; // should never happen
}
auto lower = std::lower_bound(centroids_.begin(), centroids_.end(), centroid(value, 1), centroid_cmp());
if (lower == centroids_.end()) throw std::logic_error("lower == end in get_rank()");
auto upper = std::upper_bound(lower, centroids_.end(), centroid(value, 1), centroid_cmp());
if (upper == centroids_.begin()) throw std::logic_error("upper == begin in get_rank()");
if (value < lower->get_mean()) --lower;
if (upper == centroids_.end() || !((upper - 1)->get_mean() < value)) --upper;
double weight_below = 0;
auto it = centroids_.begin();
while (it != lower) {
weight_below += it->get_weight();
++it;
}
weight_below += lower->get_weight() / 2.0;
double weight_delta = 0;
while (it != upper) {
weight_delta += it->get_weight();
++it;
}
weight_delta -= lower->get_weight() / 2.0;
weight_delta += upper->get_weight() / 2.0;
if (upper->get_mean() - lower->get_mean() > 0) {
return (weight_below + weight_delta * (value - lower->get_mean()) / (upper->get_mean() - lower->get_mean())) / centroids_weight_;
}
return (weight_below + weight_delta / 2.0) / centroids_weight_;
}