in lib/maths/common/CQuantileSketch.cc [256:344]
bool CQuantileSketch::cdf(double x_, double& result, TOptionalInterpolation interpolation) const {
result = 0.0;
if (m_Knots.empty()) {
LOG_ERROR(<< "No values added to quantile sketch");
return false;
}
if (m_Unsorted > 0 || m_Knots.size() > this->fastReduceTargetSize()) {
// It is critical to match the size selected by the call to reduce in
// the add method or the unmerged values can cause issues estimating
// the cdf close to 0 or 1.
const_cast<CQuantileSketch*>(this)->reduce(this->fastReduceTargetSize());
}
CFloatStorage x = x_;
std::ptrdiff_t n = m_Knots.size();
if (n == 1) {
result = x < m_Knots[0].first ? 0.0 : (x > m_Knots[0].first ? 1.0 : 0.5);
return true;
}
std::ptrdiff_t k = std::lower_bound(m_Knots.begin(), m_Knots.end(), x,
COrderings::SFirstLess()) -
m_Knots.begin();
LOG_TRACE(<< "k = " << k);
// This must make the same assumptions as quantile regarding the distribution
// of values for each histogram bucket. See that function for more details.
switch (interpolation.value_or(this->cdfAndQuantileInterpolation())) {
case E_Linear: {
if (k == 0) {
double xl = m_Knots[0].first;
double xr = m_Knots[1].first;
double f = m_Knots[0].second / m_Count;
LOG_TRACE(<< "xl = " << xl << ", xr = " << xr << ", f = " << f);
result = f * std::max(x - 1.5 * xl + 0.5 * xr, 0.0) / (xr - xl);
} else if (k == n) {
double xl = m_Knots[n - 2].first;
double xr = m_Knots[n - 1].first;
double f = m_Knots[n - 1].second / m_Count;
LOG_TRACE(<< "xl = " << xl << ", xr = " << xr << ", f = " << f);
result = 1.0 - f * std::max(1.5 * xr - 0.5 * xl - x, 0.0) / (xr - xl);
} else {
double xl = m_Knots[k - 1].first;
double xr = m_Knots[k].first;
bool left = (2 * k < n);
bool loc = (2.0 * x < xl + xr);
double partial = 0.0;
for (std::ptrdiff_t i = left ? 0 : (loc ? k : k + 1),
m = left ? (loc ? k - 1 : k) : n;
i < m; ++i) {
partial += m_Knots[i].second;
}
partial = (left ? (partial + (loc ? 1.0 * m_Knots[k - 1].second : 0.0))
: (partial + (loc ? 0.0 : 1.0 * m_Knots[k].second))) /
m_Count;
double dn{0.5 * m_Knots[loc ? k - 1 : k].second / m_Count};
LOG_TRACE(<< "left = " << left << ", loc = " << loc << ", partial = " << partial
<< ", xl = " << xl << ", xr = " << xr << ", dn = " << dn);
result = left ? partial + dn * (2.0 * x - xl - xr) / (xr - xl)
: 1.0 - partial - dn * (xl + xr - 2.0 * x) / (xr - xl);
LOG_TRACE(<< "result = " << result << " "
<< dn * (2.0 * x - xl - xr) / (xr - xl));
}
return true;
}
case E_PiecewiseConstant: {
if (k == 0) {
double f = m_Knots[0].second / m_Count;
result = x < m_Knots[0].first ? 0.0 : 0.5 * f;
} else if (k == n) {
double f = m_Knots[n - 1].second / m_Count;
result = x > m_Knots[0].first ? 1.0 : 1.0 - 0.5 * f;
} else {
bool left = (2 * k < n);
double partial = x < m_Knots[0].first ? 0.0 : 0.5 * m_Knots[0].second;
for (std::ptrdiff_t i = left ? 0 : k + 1, m = left ? k : n; i < m; ++i) {
partial += m_Knots[i].second;
}
partial /= m_Count;
LOG_TRACE(<< "left = " << left << ", partial = " << partial);
result = left ? partial : 1.0 - partial;
}
return true;
}
}
return true;
}