in util/Stats.cpp [175:223]
double Histogram::calcPercentile(double percentile) const {
if (percentile >= 100) {
return getMax();
}
if (percentile <= 0) {
return getRealMin();
}
// Initial value of prev should in theory be offset_
// but if the data is wrong (smaller than offset - eg 'negative') that
// yields to strangeness (see one bucket test)
int64_t prev = 0;
int64_t total = 0;
const int64_t ctrTotal = getCount();
const int64_t ctrMax = getMax();
const int64_t ctrMin = getRealMin();
double prevPerc = 0;
double perc = 0;
bool found = false;
int64_t cur = offset_;
// last bucket is virtual/special - we'll use max if we reach it
// we also use max if the bucket is past the max for better accuracy
// and the property that target = 100 will always return max
// (+/- rouding issues) and value close to 100 (99.9...) will be close to max
// if the data is not sampled in several buckets
for (size_t i = 0; i < kLastIndex; ++i) {
cur = (int64_t)Histogram::kHistogramBuckets[i] * divider_ + offset_;
total += hdata_[i];
perc = 100. * (double)total / ctrTotal;
if (cur > ctrMax) {
break;
}
if (perc >= percentile) {
found = true;
break;
}
prevPerc = perc;
prev = cur;
}
if (!found) {
// covers the > ctrMax case
cur = ctrMax;
perc = 100.; // can't be removed
}
// Fix up the min too to never return < min and increase low p accuracy
if (prev < ctrMin) {
prev = ctrMin;
}
return (prev + (percentile - prevPerc) * (cur - prev) / (perc - prevPerc));
}