bool CQuantileSketch::cdf()

in lib/maths/common/CQuantileSketch.cc [256:344]


bool CQuantileSketch::cdf(double x_, double& result, TOptionalInterpolation interpolation) const {
    result = 0.0;
    if (m_Knots.empty()) {
        LOG_ERROR(<< "No values added to quantile sketch");
        return false;
    }

    if (m_Unsorted > 0 || m_Knots.size() > this->fastReduceTargetSize()) {
        // It is critical to match the size selected by the call to reduce in
        // the add method or the unmerged values can cause issues estimating
        // the cdf close to 0 or 1.
        const_cast<CQuantileSketch*>(this)->reduce(this->fastReduceTargetSize());
    }

    CFloatStorage x = x_;
    std::ptrdiff_t n = m_Knots.size();
    if (n == 1) {
        result = x < m_Knots[0].first ? 0.0 : (x > m_Knots[0].first ? 1.0 : 0.5);
        return true;
    }

    std::ptrdiff_t k = std::lower_bound(m_Knots.begin(), m_Knots.end(), x,
                                        COrderings::SFirstLess()) -
                       m_Knots.begin();
    LOG_TRACE(<< "k = " << k);

    // This must make the same assumptions as quantile regarding the distribution
    // of values for each histogram bucket. See that function for more details.

    switch (interpolation.value_or(this->cdfAndQuantileInterpolation())) {
    case E_Linear: {
        if (k == 0) {
            double xl = m_Knots[0].first;
            double xr = m_Knots[1].first;
            double f = m_Knots[0].second / m_Count;
            LOG_TRACE(<< "xl = " << xl << ", xr = " << xr << ", f = " << f);
            result = f * std::max(x - 1.5 * xl + 0.5 * xr, 0.0) / (xr - xl);
        } else if (k == n) {
            double xl = m_Knots[n - 2].first;
            double xr = m_Knots[n - 1].first;
            double f = m_Knots[n - 1].second / m_Count;
            LOG_TRACE(<< "xl = " << xl << ", xr = " << xr << ", f = " << f);
            result = 1.0 - f * std::max(1.5 * xr - 0.5 * xl - x, 0.0) / (xr - xl);
        } else {
            double xl = m_Knots[k - 1].first;
            double xr = m_Knots[k].first;
            bool left = (2 * k < n);
            bool loc = (2.0 * x < xl + xr);
            double partial = 0.0;
            for (std::ptrdiff_t i = left ? 0 : (loc ? k : k + 1),
                                m = left ? (loc ? k - 1 : k) : n;
                 i < m; ++i) {
                partial += m_Knots[i].second;
            }
            partial = (left ? (partial + (loc ? 1.0 * m_Knots[k - 1].second : 0.0))
                            : (partial + (loc ? 0.0 : 1.0 * m_Knots[k].second))) /
                      m_Count;
            double dn{0.5 * m_Knots[loc ? k - 1 : k].second / m_Count};
            LOG_TRACE(<< "left = " << left << ", loc = " << loc << ", partial = " << partial
                      << ", xl = " << xl << ", xr = " << xr << ", dn = " << dn);
            result = left ? partial + dn * (2.0 * x - xl - xr) / (xr - xl)
                          : 1.0 - partial - dn * (xl + xr - 2.0 * x) / (xr - xl);
            LOG_TRACE(<< "result = " << result << " "
                      << dn * (2.0 * x - xl - xr) / (xr - xl));
        }
        return true;
    }
    case E_PiecewiseConstant: {
        if (k == 0) {
            double f = m_Knots[0].second / m_Count;
            result = x < m_Knots[0].first ? 0.0 : 0.5 * f;
        } else if (k == n) {
            double f = m_Knots[n - 1].second / m_Count;
            result = x > m_Knots[0].first ? 1.0 : 1.0 - 0.5 * f;
        } else {
            bool left = (2 * k < n);
            double partial = x < m_Knots[0].first ? 0.0 : 0.5 * m_Knots[0].second;
            for (std::ptrdiff_t i = left ? 0 : k + 1, m = left ? k : n; i < m; ++i) {
                partial += m_Knots[i].second;
            }
            partial /= m_Count;
            LOG_TRACE(<< "left = " << left << ", partial = " << partial);
            result = left ? partial : 1.0 - partial;
        }
        return true;
    }
    }
    return true;
}