bool CMultimodalPrior::probabilityOfLessLikelySamples()

in lib/maths/common/CMultimodalPrior.cc [931:1136]


bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
                                                      const TDouble1Vec& samples,
                                                      const TDoubleWeightsAry1Vec& weights,
                                                      double& lowerBound,
                                                      double& upperBound,
                                                      maths_t::ETail& tail) const {
    lowerBound = 0.0;
    upperBound = 1.0;
    tail = maths_t::E_UndeterminedTail;

    if (samples.empty()) {
        LOG_ERROR(<< "Can't compute distribution for empty sample set");
        return false;
    }
    if (this->isNonInformative()) {
        return true;
    }

    if (m_Modes.size() == 1) {
        return m_Modes[0].s_Prior->probabilityOfLessLikelySamples(
            calculation, samples, weights, lowerBound, upperBound, tail);
    }

    // Ideally we'd find the probability of the set of samples whose
    // total likelihood is less than or equal to that of the specified
    // samples, i.e. the probability of the set
    //   R = { y | L(y) < L(x) }
    //
    // where,
    //   x = {x(1), x(2), ..., x(n)} is the sample vector.
    //   y is understood to be a vector quantity.
    //
    // This is not *trivially* related to the probability that the
    // probabilities of the sets
    //   R(i) = { y | L(y) < L(x(i)) }
    //
    // since the joint conditional likelihood must be integrated over
    // priors for the parameters. However, we'll approximate this as
    // the joint probability (of a collection of standard normal R.Vs.)
    // having probabilities {P(R(i))}. This becomes increasingly accurate
    // as the prior distribution narrows.
    //
    // For the two sided calculation, we use the fact that the likelihood
    // function decreases monotonically away from the interval [a, b]
    // whose end points are the leftmost and rightmost modes' modes
    // since all component likelihoods decrease away from this interval.
    //
    // To evaluate the probability in the interval [a, b] we relax
    // the hard constraint that regions where f > f(x) contribute
    // zero probability. In particular, we note that we can write
    // the probability as:
    //   P = Integral{ I(f(s) < f(x)) * f(s) }ds
    //
    // and that:
    //   I(f(s) < f(x)) = lim_{k->inf}{ exp(-k * (f(s)/f(x) - 1))
    //                                  / (1 + exp(-k * (f(s)/f(x) - 1))) }
    //
    // We evaluate a smoother integral, i.e. smaller p, initially
    // to find out which regions contribute the most to P and then
    // re-evaluate those regions we need with higher resolution
    // using the fact that the maximum error in the approximation
    // of I(f(s) < f(x)) is 0.5.

    switch (calculation) {
    case maths_t::E_OneSidedBelow:
        if (this->minusLogJointCdf(samples, weights, upperBound, lowerBound) == false) {
            return false;
        }
        lowerBound = std::exp(-lowerBound);
        upperBound = std::exp(-upperBound);
        tail = maths_t::E_LeftTail;
        break;

    case maths_t::E_TwoSided: {
        static const double EPS{1000.0 * std::numeric_limits<double>::epsilon()};
        static const std::size_t MAX_ITERATIONS{20};

        CJointProbabilityOfLessLikelySamples lowerBoundCalculator;
        CJointProbabilityOfLessLikelySamples upperBoundCalculator;

        TDoubleDoublePr support{this->marginalLikelihoodSupport()};
        support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first;
        support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second;
        bool hasSeasonalScale{maths_t::hasSeasonalVarianceScale(weights)};
        double mean{hasSeasonalScale ? this->marginalLikelihoodMean() : 0.0};

        double a{std::numeric_limits<double>::max()};
        double b{-std::numeric_limits<double>::max()};
        double Z{0.0};
        for (const auto& mode : m_Modes) {
            double m{mode.s_Prior->marginalLikelihoodMode()};
            a = std::min(a, m);
            b = std::max(b, m);
            Z += mode.weight();
        }
        a = CTools::truncate(a, support.first, support.second);
        b = CTools::truncate(b, support.first, support.second);
        LOG_TRACE(<< "a = " << a << ", b = " << b << ", Z = " << Z);

        // Declared outside the loop to minimize the number of times
        // it is created.
        TDoubleWeightsAry1Vec weight(1);

        int tail_{0};
        for (std::size_t i = 0; i < samples.size(); ++i) {
            double x{samples[i]};
            weight[0] = weights[i];
            if (CMathsFuncs::isNan(x) || CMathsFuncs::isNan(weight[0])) {
                continue;
            }

            if (hasSeasonalScale) {
                x = mean + (x - mean) /
                               std::sqrt(maths_t::seasonalVarianceScale(weight[0]));
                maths_t::setSeasonalVarianceScale(1.0, weight[0]);
            }

            double fx;
            maths_t::EFloatingPointErrorStatus status =
                this->jointLogMarginalLikelihood({x}, weight, fx);
            if ((status & maths_t::E_FpFailed) != 0) {
                LOG_ERROR(<< "Unable to compute likelihood for " << x);
                return false;
            }
            if ((status & maths_t::E_FpOverflowed) != 0) {
                lowerBound = upperBound = 0.0;
                return true;
            }
            LOG_TRACE(<< "x = " << x << ", f(x) = " << fx);

            CPrior::CLogMarginalLikelihood logLikelihood{*this, weight};

            CTools::CMixtureProbabilityOfLessLikelySample calculator{m_Modes.size(),
                                                                     x, fx, a, b};
            for (const auto& mode : m_Modes) {
                double w{mode.weight() / Z};
                double centre{mode.s_Prior->marginalLikelihoodMode(weight[0])};
                double spread{
                    std::sqrt(mode.s_Prior->marginalLikelihoodVariance(weight[0]))};
                calculator.addMode(w, centre, spread);
                tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail);
            }

            double sampleLowerBound{0.0};
            double sampleUpperBound{0.0};

            double lb;
            double ub;
            double xl;
            CEqualWithTolerance<double> lequal{CToleranceTypes::E_AbsoluteTolerance,
                                               EPS * a};
            if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, xl)) {
                this->minusLogJointCdf({xl}, weight, lb, ub);
                sampleLowerBound += std::exp(std::min(-lb, -ub));
                sampleUpperBound += std::exp(std::max(-lb, -ub));
            } else {
                this->minusLogJointCdf({xl}, weight, lb, ub);
                sampleUpperBound += std::exp(std::max(-lb, -ub));
            }

            double xr;
            CEqualWithTolerance<double> requal{CToleranceTypes::E_AbsoluteTolerance,
                                               EPS * b};
            if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, xr)) {
                this->minusLogJointCdfComplement({xr}, weight, lb, ub);
                sampleLowerBound += std::exp(std::min(-lb, -ub));
                sampleUpperBound += std::exp(std::max(-lb, -ub));
            } else {
                this->minusLogJointCdfComplement({xr}, weight, lb, ub);
                sampleUpperBound += std::exp(std::max(-lb, -ub));
            }

            double p{0.0};
            if (a < b) {
                p = calculator.calculate(logLikelihood, sampleLowerBound);
            }

            LOG_TRACE(<< "sampleLowerBound = " << sampleLowerBound
                      << ", sampleUpperBound = " << sampleUpperBound << " p = " << p);

            lowerBoundCalculator.add(CTools::truncate(sampleLowerBound + p, 0.0, 1.0));
            upperBoundCalculator.add(CTools::truncate(sampleUpperBound + p, 0.0, 1.0));
        }

        if (lowerBoundCalculator.calculate(lowerBound) == false ||
            upperBoundCalculator.calculate(upperBound) == false) {
            LOG_ERROR(<< "Couldn't compute probability of less likely samples:"
                      << " " << lowerBoundCalculator << " " << upperBoundCalculator
                      << " (samples = " << samples << ", weights = " << weights << ")");
            return false;
        }
        tail = static_cast<maths_t::ETail>(tail_);
    } break;

    case maths_t::E_OneSidedAbove:
        if (this->minusLogJointCdfComplement(samples, weights, upperBound, lowerBound) == false) {
            return false;
        }
        lowerBound = std::exp(-lowerBound);
        upperBound = std::exp(-upperBound);
        tail = maths_t::E_RightTail;
        break;
    }

    return true;
}