in lib/maths/common/CMultimodalPrior.cc [931:1136]
bool CMultimodalPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound,
maths_t::ETail& tail) const {
lowerBound = 0.0;
upperBound = 1.0;
tail = maths_t::E_UndeterminedTail;
if (samples.empty()) {
LOG_ERROR(<< "Can't compute distribution for empty sample set");
return false;
}
if (this->isNonInformative()) {
return true;
}
if (m_Modes.size() == 1) {
return m_Modes[0].s_Prior->probabilityOfLessLikelySamples(
calculation, samples, weights, lowerBound, upperBound, tail);
}
// Ideally we'd find the probability of the set of samples whose
// total likelihood is less than or equal to that of the specified
// samples, i.e. the probability of the set
// R = { y | L(y) < L(x) }
//
// where,
// x = {x(1), x(2), ..., x(n)} is the sample vector.
// y is understood to be a vector quantity.
//
// This is not *trivially* related to the probability that the
// probabilities of the sets
// R(i) = { y | L(y) < L(x(i)) }
//
// since the joint conditional likelihood must be integrated over
// priors for the parameters. However, we'll approximate this as
// the joint probability (of a collection of standard normal R.Vs.)
// having probabilities {P(R(i))}. This becomes increasingly accurate
// as the prior distribution narrows.
//
// For the two sided calculation, we use the fact that the likelihood
// function decreases monotonically away from the interval [a, b]
// whose end points are the leftmost and rightmost modes' modes
// since all component likelihoods decrease away from this interval.
//
// To evaluate the probability in the interval [a, b] we relax
// the hard constraint that regions where f > f(x) contribute
// zero probability. In particular, we note that we can write
// the probability as:
// P = Integral{ I(f(s) < f(x)) * f(s) }ds
//
// and that:
// I(f(s) < f(x)) = lim_{k->inf}{ exp(-k * (f(s)/f(x) - 1))
// / (1 + exp(-k * (f(s)/f(x) - 1))) }
//
// We evaluate a smoother integral, i.e. smaller p, initially
// to find out which regions contribute the most to P and then
// re-evaluate those regions we need with higher resolution
// using the fact that the maximum error in the approximation
// of I(f(s) < f(x)) is 0.5.
switch (calculation) {
case maths_t::E_OneSidedBelow:
if (this->minusLogJointCdf(samples, weights, upperBound, lowerBound) == false) {
return false;
}
lowerBound = std::exp(-lowerBound);
upperBound = std::exp(-upperBound);
tail = maths_t::E_LeftTail;
break;
case maths_t::E_TwoSided: {
static const double EPS{1000.0 * std::numeric_limits<double>::epsilon()};
static const std::size_t MAX_ITERATIONS{20};
CJointProbabilityOfLessLikelySamples lowerBoundCalculator;
CJointProbabilityOfLessLikelySamples upperBoundCalculator;
TDoubleDoublePr support{this->marginalLikelihoodSupport()};
support.first = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.first;
support.second = (1.0 + (support.first > 0.0 ? EPS : -EPS)) * support.second;
bool hasSeasonalScale{maths_t::hasSeasonalVarianceScale(weights)};
double mean{hasSeasonalScale ? this->marginalLikelihoodMean() : 0.0};
double a{std::numeric_limits<double>::max()};
double b{-std::numeric_limits<double>::max()};
double Z{0.0};
for (const auto& mode : m_Modes) {
double m{mode.s_Prior->marginalLikelihoodMode()};
a = std::min(a, m);
b = std::max(b, m);
Z += mode.weight();
}
a = CTools::truncate(a, support.first, support.second);
b = CTools::truncate(b, support.first, support.second);
LOG_TRACE(<< "a = " << a << ", b = " << b << ", Z = " << Z);
// Declared outside the loop to minimize the number of times
// it is created.
TDoubleWeightsAry1Vec weight(1);
int tail_{0};
for (std::size_t i = 0; i < samples.size(); ++i) {
double x{samples[i]};
weight[0] = weights[i];
if (CMathsFuncs::isNan(x) || CMathsFuncs::isNan(weight[0])) {
continue;
}
if (hasSeasonalScale) {
x = mean + (x - mean) /
std::sqrt(maths_t::seasonalVarianceScale(weight[0]));
maths_t::setSeasonalVarianceScale(1.0, weight[0]);
}
double fx;
maths_t::EFloatingPointErrorStatus status =
this->jointLogMarginalLikelihood({x}, weight, fx);
if ((status & maths_t::E_FpFailed) != 0) {
LOG_ERROR(<< "Unable to compute likelihood for " << x);
return false;
}
if ((status & maths_t::E_FpOverflowed) != 0) {
lowerBound = upperBound = 0.0;
return true;
}
LOG_TRACE(<< "x = " << x << ", f(x) = " << fx);
CPrior::CLogMarginalLikelihood logLikelihood{*this, weight};
CTools::CMixtureProbabilityOfLessLikelySample calculator{m_Modes.size(),
x, fx, a, b};
for (const auto& mode : m_Modes) {
double w{mode.weight() / Z};
double centre{mode.s_Prior->marginalLikelihoodMode(weight[0])};
double spread{
std::sqrt(mode.s_Prior->marginalLikelihoodVariance(weight[0]))};
calculator.addMode(w, centre, spread);
tail_ = tail_ | (x < centre ? maths_t::E_LeftTail : maths_t::E_RightTail);
}
double sampleLowerBound{0.0};
double sampleUpperBound{0.0};
double lb;
double ub;
double xl;
CEqualWithTolerance<double> lequal{CToleranceTypes::E_AbsoluteTolerance,
EPS * a};
if (calculator.leftTail(logLikelihood, MAX_ITERATIONS, lequal, xl)) {
this->minusLogJointCdf({xl}, weight, lb, ub);
sampleLowerBound += std::exp(std::min(-lb, -ub));
sampleUpperBound += std::exp(std::max(-lb, -ub));
} else {
this->minusLogJointCdf({xl}, weight, lb, ub);
sampleUpperBound += std::exp(std::max(-lb, -ub));
}
double xr;
CEqualWithTolerance<double> requal{CToleranceTypes::E_AbsoluteTolerance,
EPS * b};
if (calculator.rightTail(logLikelihood, MAX_ITERATIONS, requal, xr)) {
this->minusLogJointCdfComplement({xr}, weight, lb, ub);
sampleLowerBound += std::exp(std::min(-lb, -ub));
sampleUpperBound += std::exp(std::max(-lb, -ub));
} else {
this->minusLogJointCdfComplement({xr}, weight, lb, ub);
sampleUpperBound += std::exp(std::max(-lb, -ub));
}
double p{0.0};
if (a < b) {
p = calculator.calculate(logLikelihood, sampleLowerBound);
}
LOG_TRACE(<< "sampleLowerBound = " << sampleLowerBound
<< ", sampleUpperBound = " << sampleUpperBound << " p = " << p);
lowerBoundCalculator.add(CTools::truncate(sampleLowerBound + p, 0.0, 1.0));
upperBoundCalculator.add(CTools::truncate(sampleUpperBound + p, 0.0, 1.0));
}
if (lowerBoundCalculator.calculate(lowerBound) == false ||
upperBoundCalculator.calculate(upperBound) == false) {
LOG_ERROR(<< "Couldn't compute probability of less likely samples:"
<< " " << lowerBoundCalculator << " " << upperBoundCalculator
<< " (samples = " << samples << ", weights = " << weights << ")");
return false;
}
tail = static_cast<maths_t::ETail>(tail_);
} break;
case maths_t::E_OneSidedAbove:
if (this->minusLogJointCdfComplement(samples, weights, upperBound, lowerBound) == false) {
return false;
}
lowerBound = std::exp(-lowerBound);
upperBound = std::exp(-upperBound);
tail = maths_t::E_RightTail;
break;
}
return true;
}