lib/maths/common/CGammaRateConjugate.cc (1,061 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <maths/common/CGammaRateConjugate.h>
#include <core/CLogger.h>
#include <core/CNonCopyable.h>
#include <core/CStatePersistInserter.h>
#include <core/CStateRestoreTraverser.h>
#include <core/RestoreMacros.h>
#include <maths/common/CBasicStatistics.h>
#include <maths/common/CBasicStatisticsPersist.h>
#include <maths/common/CChecksum.h>
#include <maths/common/CIntegration.h>
#include <maths/common/CMathsFuncs.h>
#include <maths/common/CMathsFuncsForMatrixAndVectorTypes.h>
#include <maths/common/CRestoreParams.h>
#include <maths/common/CSolvers.h>
#include <maths/common/CTools.h>
#include <maths/common/ProbabilityAggregators.h>
#include <boost/math/distributions/beta.hpp>
#include <boost/math/distributions/gamma.hpp>
#include <boost/math/special_functions/digamma.hpp>
#include <algorithm>
#include <cmath>
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
namespace ml {
namespace maths {
namespace common {
namespace {
namespace detail {
using TDoubleDoublePr = std::pair<double, double>;
using TDouble1Vec = core::CSmallVector<double, 1>;
using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec;
using TMeanAccumulator = CBasicStatistics::SSampleMean<CDoublePrecisionStorage>::TAccumulator;
using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<CDoublePrecisionStorage>::TAccumulator;
const double NON_INFORMATIVE_COUNT = 3.5;
//! Compute the coefficient of variance of the sample moments.
double minimumCoefficientOfVariation(bool isInteger, double mean) {
return std::max(MINIMUM_COEFFICIENT_OF_VARIATION,
isInteger ? std::sqrt(1.0 / 12.0) / mean : 0.0);
}
//! Apply the minimum coefficient of variation constraint to the sample
//! variance and log sample mean.
//!
//! \param[in] isInteger True if the data are integer and false otherwise.
//! \param[in,out] logMean The mean of the log of the data.
//! \param[in,out] moments The mean and variance of the data.
void truncateVariance(bool isInteger, TMeanAccumulator& logMean, TMeanVarAccumulator& moments) {
if (CBasicStatistics::count(moments) > 1.5) {
// The idea is to model the impact of a small coefficient of variation
// on the variance and the log of samples samples. Note that:
// mean(log(x'(i)) = 1/n * Sum_i{ log(m + (x'(i) - m)) }
// = 1/n * Sum_i{ log(m) + log(1 + d(i) / m) }
// ~ 1/n * Sum_i{ log(m) + d(i) / m - (d(i)/m)^2 / 2 }
// (1)
//
// where x(i) are our true samples, which are all very nearly m
// (since their variation is tiny by assumption) and x'(i) are our
// samples assuming minimum coefficient of variation. Finally, we
// note that:
// E[d(i)] = 0
// E[(d(i)/m)^2] = "coefficient variation"^2 (2)
//
// Because we age the data we can't rely on the fact that the estimated
// coefficient of variance is accurately reflected in the mean of the
// logs. The calculation of the likelihood shape is poorly conditioned
// if mean(log(x(i))) is too close to log(mean(x(i))). A variation of
// Jensen's inequality gives that:
// mean(log(x(i))) <= log(mean(x(i))) (3)
//
// (i.e. use the fact that log is concave and so is less than its tangent
// at every point:
// log(x0 + x - x0) <= log(x0) + 1/x0 * (x - x0)
//
// set x0 in above to mean(x(i)) and note that:
// Sum_i( x(i) - mean(x(i)) ) = 0.
//
// From which (3) follows.) Together with (1) and (2) this means that
// we should bound the mean of log(x(i)) by:
// log(mean(x(i))) - 1/2 * "minimum coefficient variation"^2
double sampleDeviation = std::sqrt(CBasicStatistics::variance(moments));
double sampleMean = std::max(std::fabs(CBasicStatistics::mean(moments)), 1e-8);
double cov = sampleDeviation / sampleMean;
double covMin = minimumCoefficientOfVariation(isInteger, sampleMean);
if (cov < covMin) {
double extraDeviation = sampleMean * (covMin - cov);
moments.s_Moments[1] += extraDeviation * extraDeviation;
}
double maxLogMean = std::log(moments.s_Moments[0]) - covMin * covMin / 2.0;
logMean.s_Moments[0] = std::min(double(logMean.s_Moments[0]), double(maxLogMean));
}
}
//! Computes the derivative w.r.t. the shape of the marginal likelihood
//! function for gamma distributed data with known prior for the rate.
class CLikelihoodDerivativeFunction {
public:
CLikelihoodDerivativeFunction(double numberSamples, double target)
: m_NumberSamples(numberSamples), m_Target(target) {}
double operator()(double x) const {
return boost::math::digamma(m_NumberSamples * x) - boost::math::digamma(x) - m_Target;
}
private:
double m_NumberSamples;
double m_Target;
};
//! Compute the maximum likelihood posterior shape if possible otherwise
//! estimates the shape using the method of moments.
//!
//! \param[in] oldShape The value of the shape used to seed the root bracketing
//! loop to find the new maximum likelihood shape (this should correspond to
//! the maximum likelihood shape implied by \p oldLogMean and \p oldMoments).
//! \param[in] oldLogMean The mean of the logs of all previous samples.
//! \param[in] newLogMean The mean of the logs of all previous samples plus
//! new samples to be incorporated into the estimate.
//! \param[in] oldMoments The mean and variance of the all previous samples.
//! \param[in] newMoments The mean and variance of the all previous samples
//! plus new samples to be incorporated into the estimate.
double maximumLikelihoodShape(double oldShape,
const TMeanAccumulator& oldLogMean,
const TMeanAccumulator& newLogMean,
const TMeanVarAccumulator& oldMoments,
const TMeanVarAccumulator& newMoments) {
if (CBasicStatistics::count(newMoments) < NON_INFORMATIVE_COUNT) {
return oldShape;
}
static const double EPS = 1e-3;
// Use large maximum growth factors for root bracketing because
// overshooting is less costly than extra iterations to bracket
// the root since we get cubic convergence in the solving loop.
// The derivative of the digamma function is monotone decreasing
// so we use a higher maximum growth factor on the upside.
static const double MIN_DOWN_FACTOR = 0.25;
static const double MAX_UP_FACTOR = 8.0;
std::size_t maxIterations = 20;
double oldNumber = CBasicStatistics::count(oldMoments);
double oldMean = CBasicStatistics::mean(oldMoments);
double oldTarget = 0.0;
if (oldNumber * oldMean > 0.0) {
oldTarget = std::log(oldNumber * oldMean) - CBasicStatistics::mean(oldLogMean);
}
double newNumber = CBasicStatistics::count(newMoments);
double newMean = CBasicStatistics::mean(newMoments);
if (newNumber * newMean == 0.0) {
return 0.0;
}
double target = std::log(newNumber * newMean) - CBasicStatistics::mean(newLogMean);
// Fall back to method of moments if maximum-likelihood fails.
double bestGuess = 1.0;
if (CBasicStatistics::variance(newMoments) > 0.0) {
bestGuess = newMean * newMean / CBasicStatistics::variance(newMoments);
}
// If we've estimated the shape before the old shape will typically
// be a very good initial estimate. Otherwise, use the best guess.
double x0 = bestGuess;
if (oldNumber > NON_INFORMATIVE_COUNT) {
x0 = oldShape;
}
TDoubleDoublePr bracket(x0, x0);
double downFactor = 0.8;
double upFactor = 1.4;
if (oldNumber > NON_INFORMATIVE_COUNT) {
// Compute, very approximately, minus the gradient of the function
// at the old shape. We just use the chord from the origin to the
// target value and truncate its value so the bracketing loop is
// well behaved.
double gradient = 1.0;
if (oldShape > 0.0) {
gradient = CTools::truncate(oldTarget / oldShape, EPS, 1.0);
}
// Choose the growth factors so we will typically bracket the root
// in one iteration and not overshoot too much. Again we truncate
// the values so that bracketing loop is well behaved.
double dTarget = std::fabs(target - oldTarget);
downFactor = CTools::truncate(1.0 - 2.0 * dTarget / gradient,
MIN_DOWN_FACTOR, 1.0 - EPS);
upFactor = CTools::truncate(1.0 + 2.0 * dTarget / gradient, 1.0 + EPS, MAX_UP_FACTOR);
}
CLikelihoodDerivativeFunction derivative(newNumber, target);
double f0 = 0.0;
TDoubleDoublePr fBracket(f0, f0);
try {
fBracket.first = fBracket.second = f0 = derivative(x0);
if (f0 == 0.0) {
// We're done.
return x0;
}
// The target function is monotone decreasing. The rate at which we
// change the down and up factors in this loop has been determined
// empirically to give a good expected total number of evaluations
// of the likelihood derivative function across a range of different
// process gamma shapes and rates. In particular, the mean total
// number of evaluations used by this function is around five.
for (/**/; maxIterations > 0; --maxIterations) {
if (fBracket.first < 0.0) {
bracket.second = bracket.first;
fBracket.second = fBracket.first;
bracket.first *= downFactor;
fBracket.first = derivative(bracket.first);
downFactor = std::max(0.8 * downFactor, MIN_DOWN_FACTOR);
} else if (fBracket.second > 0.0) {
bracket.first = bracket.second;
fBracket.first = fBracket.second;
bracket.second *= upFactor;
fBracket.second = derivative(bracket.second);
upFactor = std::min(1.4 * upFactor, MAX_UP_FACTOR);
} else {
break;
}
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to bracket root: " << e.what() << ", newNumber = " << newNumber
<< ", newMean = " << newMean << ", newLogMean = " << newLogMean
<< ", x0 = " << x0 << ", f(x0) = " << f0 << ", bracket = " << bracket
<< ", f(bracket) = " << fBracket << ", bestGuess = " << bestGuess);
return bestGuess;
}
if (maxIterations == 0) {
LOG_TRACE(<< "Failed to bracket root:"
<< " newNumber = " << newNumber << ", newMean = " << newMean
<< ", newLogMean = " << newLogMean << ", x0 = " << x0
<< ", f(x0) = " << f0 << ", bracket = " << bracket
<< ", f(bracket) = " << fBracket << ", bestGuess = " << bestGuess);
return bestGuess;
}
LOG_TRACE(<< "newNumber = " << newNumber << ", newMean = " << newMean
<< ", newLogMean = " << newLogMean << ", oldTarget = " << oldTarget
<< ", target = " << target << ", upFactor = " << upFactor
<< ", downFactor = " << downFactor << ", x0 = " << x0 << ", f(x0) = " << f0
<< ", bracket = " << bracket << ", f(bracket) = " << fBracket);
try {
CEqualWithTolerance<double> tolerance(CToleranceTypes::E_AbsoluteTolerance, EPS * x0);
CSolvers::solve(bracket.first, bracket.second, fBracket.first, fBracket.second,
derivative, maxIterations, tolerance, bestGuess);
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to solve: " << e.what() << ", newNumber = " << newNumber
<< ", x0 = " << x0 << ", f(x0) = " << f0 << ", bracket = " << bracket
<< ", f(bracket) = " << fBracket << ", bestGuess = " << bestGuess);
return bestGuess;
}
LOG_TRACE(<< "bracket = " << bracket);
return (bracket.first + bracket.second) / 2.0;
}
//! Adds "weight" x "right operand" to the "left operand".
struct SPlusWeight {
double operator()(double lhs, double rhs, double weight = 1.0) const {
return lhs + weight * rhs;
}
};
//! Evaluate \p func on the joint predictive distribution for \p samples
//! (integrating over the prior for the gamma rate) and aggregate the
//! results using \p aggregate.
//!
//! \param[in] samples The weighted samples.
//! \param[in] func The function to evaluate.
//! \param[in] aggregate The function to aggregate the results of \p func.
//! \param[in] isNonInformative True if the prior is non-informative.
//! \param[in] offset The constant offset of the data, in particular it
//! is assumed that \p samples are distributed as Y - "offset", where Y
//! is a gamma distributed R.V.
//! \param[in] likelihoodShape The shape of the likelihood for \p samples.
//! \param[in] priorShape The shape of the gamma prior of the rate parameter
//! of the likelihood for \p samples.
//! \param[in] priorRate The rate of the gamma prior of the rate parameter
//! of the likelihood for \p samples.
//! \param[out] result Filled in with the aggregation of results of \p func.
template<typename FUNC, typename AGGREGATOR, typename RESULT>
bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
FUNC func,
AGGREGATOR aggregate,
bool isNonInformative,
double offset,
double likelihoodShape,
double priorShape,
double priorRate,
RESULT& result) {
result = RESULT();
if (samples.empty()) {
LOG_ERROR(<< "Can't compute distribution for empty sample set");
return false;
}
// Computing the true joint marginal distribution of all the samples
// by integrating the joint likelihood over the prior distribution
// for the gamma rate is not tractable. We will approximate the joint
// p.d.f. as follows:
// Integral{ Product_i{ L(x(i), a | b) } * f(b) }db
// ~= Product_i{ Integral{ L(x(i), a | b) * f(b) }db }.
//
// where,
// L(., a | b) is the likelihood function and
// f(b) is the prior for the gamma rate.
//
// This becomes increasingly accurate as the prior distribution narrows.
static const double MINIMUM_GAMMA_SHAPE = 100.0;
LOG_TRACE(<< "likelihoodShape = " << likelihoodShape
<< ", priorShape = " << priorShape << ", priorRate = " << priorRate);
bool success = false;
try {
if (isNonInformative) {
// The non-informative prior is improper and effectively zero
// everywhere. (It is acceptable to approximate all finite samples
// as at the median of this distribution.)
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isNan(samples[i]) || CMathsFuncs::isNan(weights[i])) {
continue;
}
success = true;
double n = maths_t::count(weights[i]);
double x = samples[i] + offset;
result = aggregate(result, func(CTools::SImproperDistribution(), x), n);
}
} else if (priorShape > 2 && priorShape > likelihoodShape * MINIMUM_GAMMA_SHAPE) {
// The marginal likelihood is well approximated by a moment matched
// gamma distribution. By considering:
// E[ E[X | a, b] ] = E[ a' / B ]
// E[ E[(X - E[X | a, b])^2 | a, b] ] = E[ a' / B^2 ]
//
// where,
// a' is the likelihood shape.
// B is the likelihood rate and B ~ Gamma(a, b).
// a and b are the prior shape and rate parameters, respectively.
// The outer expectation E[.] is w.r.t. the prior on B.
//
// It is possible to show that the moment matched gamma distribution
// has:
// shape = (a - 2) / (a - 1) * a'
// rate = (a - 2) / b.
//
// We use this approximation to avoid calculating the incomplete beta
// function, which can be very slow particularly for large alpha and
// beta.
double shape = (priorShape - 2.0) / (priorShape - 1.0) * likelihoodShape;
double rate = (priorShape - 2.0) / priorRate;
LOG_TRACE(<< "shape = " << shape << ", rate = " << rate);
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isNan(samples[i]) || CMathsFuncs::isNan(weights[i])) {
continue;
}
success = true;
// We assume the data are described by X = Y - u where, Y is
// gamma distributed and u is a constant offset. This means
// that {x(i) + u} are gamma distributed.
double n = maths_t::count(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
double x = samples[i] + offset;
LOG_TRACE(<< "x = " << x);
double scaledShape = shape / varianceScale;
double scaledRate = rate / varianceScale;
boost::math::gamma_distribution<> gamma(scaledShape, 1.0 / scaledRate);
result = aggregate(result, func(gamma, x), n);
}
} else {
// We use the fact that the random variable is Z = X / (b + X) is
// beta distributed with parameters alpha equal to likelihoodShape
// and beta equal to priorShape. Therefore, we can compute the
// likelihoods by transforming the data as follows:
// x -> x / (b + x)
//
// and then using the beta distribution.
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isNan(samples[i]) || CMathsFuncs::isNan(weights[i])) {
continue;
}
success = true;
// We assume the data are described by X = Y - u where, Y is
// gamma distributed and u is a constant offset. This means
// that {x(i) + u} are gamma distributed.
double n = maths_t::count(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
double x = samples[i] + offset;
double scaledLikelihoodShape = likelihoodShape / varianceScale;
double scaledPriorRate = varianceScale * priorRate;
boost::math::beta_distribution<> beta(scaledLikelihoodShape, priorShape);
double z = CTools::sign(x) * std::fabs(x / (scaledPriorRate + x));
LOG_TRACE(<< "x = " << x << ", z = " << z);
result = aggregate(result, func(beta, z), n);
}
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Error: " << e.what() << " (offset = " << offset
<< ", likelihoodShape = " << likelihoodShape << ", priorShape = " << priorShape
<< ", priorRate = " << priorRate << ")");
return false;
}
LOG_TRACE(<< "result = " << result);
return success;
}
//! Evaluates a specified function object, which must be default constructible,
//! on the joint distribution of a set of the samples at a specified offset.
//!
//! This thin wrapper around the evaluateFunctionOnJointDistribution function
//! so that it can be integrated over the hidden variable representing the
//! actual value of a discrete datum which we assume is in the interval [n, n+1].
template<typename F>
class CEvaluateOnSamples : core::CNonCopyable {
public:
CEvaluateOnSamples(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
bool isNonInformative,
double offset,
double likelihoodShape,
double priorShape,
double priorRate)
: m_Samples(samples), m_Weights(weights), m_IsNonInformative(isNonInformative),
m_Offset(offset), m_LikelihoodShape(likelihoodShape),
m_PriorShape(priorShape), m_PriorRate(priorRate) {}
bool operator()(double x, double& result) const {
return evaluateFunctionOnJointDistribution(
m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative,
m_Offset + x, m_LikelihoodShape, m_PriorShape, m_PriorRate, result);
}
private:
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
bool m_IsNonInformative;
double m_Offset;
double m_LikelihoodShape;
double m_PriorShape;
double m_PriorRate;
};
//! Computes the probability of seeing less likely samples at a specified offset.
//!
//! This thin wrapper around the evaluateFunctionOnJointDistribution function
//! so that it can be integrated over the hidden variable representing the
//! actual value of a discrete datum which we assume is in the interval [n, n+1].
class CProbabilityOfLessLikelySamples : core::CNonCopyable {
public:
CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
bool isNonInformative,
double offset,
double likelihoodShape,
double priorShape,
double priorRate)
: m_Calculation(calculation), m_Samples(samples), m_Weights(weights),
m_IsNonInformative(isNonInformative), m_Offset(offset),
m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape),
m_PriorRate(priorRate), m_Tail(0) {}
bool operator()(double x, double& result) const {
CJointProbabilityOfLessLikelySamples probability;
maths_t::ETail tail = maths_t::E_UndeterminedTail;
CTools::CProbabilityOfLessLikelySample sampleProbability{m_Calculation};
if (!evaluateFunctionOnJointDistribution(
m_Samples, m_Weights,
[&](const auto& distribution, double x_) {
return sampleProbability(distribution, x_, tail);
},
CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative,
m_Offset + x, m_LikelihoodShape, m_PriorShape, m_PriorRate, probability) ||
!probability.calculate(result)) {
LOG_ERROR(<< "Failed to compute probability of less likely samples (samples ="
<< m_Samples << ", weights = " << m_Weights
<< ", offset = " << m_Offset + x << ")");
return false;
}
m_Tail = m_Tail | tail;
return true;
}
maths_t::ETail tail() const { return static_cast<maths_t::ETail>(m_Tail); }
private:
maths_t::EProbabilityCalculation m_Calculation;
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
bool m_IsNonInformative;
double m_Offset;
double m_LikelihoodShape;
double m_PriorShape;
double m_PriorRate;
mutable int m_Tail;
};
//! Compute the joint marginal log likelihood function of a collection
//! of independent samples from the gamma process. This is obtained by
//! integrating over the prior distribution for the rate. In particular,
//! it can be shown that:
//! log( L(x, a' | a, b) ) =
//! log( Product_i{ x(i) }^(a' - 1)
//! / Gamma(a') ^ n
//! * b ^ a
//! / Gamma(a)
//! * Gamma(n * a' + a)
//! / (b + Sum_i( x(i) ))^(n * a' + a) ).
//!
//! Here,
//! x = {y(i) + u} and {y(i)} is the sample vector and u the constant offset.
//! n = |x| the number of elements in the sample vector.
//! a' is the (maximum) likelihood shape of the gamma process.
//! a and b are the prior gamma shape and rate, respectively.
class CLogMarginalLikelihood : core::CNonCopyable {
public:
CLogMarginalLikelihood(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double offset,
double likelihoodShape,
double priorShape,
double priorRate)
: m_Samples(samples), m_Weights(weights), m_Offset(offset),
m_LikelihoodShape(likelihoodShape), m_PriorShape(priorShape),
m_PriorRate(priorRate), m_NumberSamples(0.0), m_ImpliedShape(0.0),
m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) {
this->precompute();
}
//! Evaluate the log marginal likelihood at the offset \p x.
bool operator()(double x, double& result) const {
if ((m_ErrorStatus & maths_t::E_FpFailed) != 0) {
return false;
}
double logSamplesSum = 0.0;
double sampleSum = 0.0;
double logSeasonalScaleSum = 0.0;
bool success = false;
try {
for (std::size_t i = 0; i < m_Samples.size(); ++i) {
if (CMathsFuncs::isNan(m_Samples[i]) || CMathsFuncs::isNan(m_Samples[i])) {
continue;
}
success = true;
double n = maths_t::countForUpdate(m_Weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) *
maths_t::countVarianceScale(m_Weights[i]);
double sample = m_Samples[i] + x + m_Offset;
if (sample <= 0.0) {
// Technically, the marginal likelihood is zero here
// so the log would be infinite. We use minus max
// double because log(0) = HUGE_VALUE, which causes
// problems for Windows. Calling code is notified
// when the calculation overflows and should avoid
// taking the exponential since this will underflow
// and pollute the floating point environment. This
// may cause issues for some library function
// implementations (see fe*exceptflag for more details).
result = std::numeric_limits<double>::lowest();
this->addErrorStatus(maths_t::E_FpOverflowed);
return false;
}
logSamplesSum += n * (m_LikelihoodShape / varianceScale - 1.0) *
std::log(sample);
sampleSum += n / varianceScale * sample;
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to calculate likelihood: " << e.what());
this->addErrorStatus(maths_t::E_FpFailed);
return false;
}
if (!success) {
this->addErrorStatus(maths_t::E_FpFailed);
return false;
}
result = m_Constant + logSamplesSum -
m_ImpliedShape * std::log(m_PriorRate + sampleSum) - logSeasonalScaleSum;
return true;
}
//! Retrieve the error status for the integration.
maths_t::EFloatingPointErrorStatus errorStatus() const {
return m_ErrorStatus;
}
private:
//! Compute all the constants in the integrand.
void precompute() {
m_NumberSamples = 0.0;
double logVarianceScaleSum = 0.0;
double nResidual = 0.0;
double logGammaScaledLikelihoodShape = 0.0;
double scaledImpliedShape = 0.0;
for (std::size_t i = 0; i < m_Weights.size(); ++i) {
double n = maths_t::countForUpdate(m_Weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) *
maths_t::countVarianceScale(m_Weights[i]);
m_NumberSamples += n;
if (varianceScale != 1.0) {
logVarianceScaleSum -= m_LikelihoodShape / varianceScale *
std::log(varianceScale);
logGammaScaledLikelihoodShape +=
n * std::lgamma(m_LikelihoodShape / varianceScale);
scaledImpliedShape += n * m_LikelihoodShape / varianceScale;
} else {
nResidual += n;
}
}
m_ImpliedShape = scaledImpliedShape + nResidual * m_LikelihoodShape + m_PriorShape;
LOG_TRACE(<< "numberSamples = " << m_NumberSamples);
m_Constant = m_PriorShape * std::log(m_PriorRate) - std::lgamma(m_PriorShape) +
logVarianceScaleSum - logGammaScaledLikelihoodShape -
nResidual * std::lgamma(m_LikelihoodShape) +
std::lgamma(m_ImpliedShape);
if (CMathsFuncs::isNan(m_ImpliedShape) || CMathsFuncs::isNan(m_Constant)) {
this->addErrorStatus(maths_t::E_FpFailed);
} else if (CMathsFuncs::isInf(m_ImpliedShape) || CMathsFuncs::isInf(m_Constant)) {
this->addErrorStatus(maths_t::E_FpOverflowed);
}
}
//! Update the error status.
void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const {
m_ErrorStatus = static_cast<maths_t::EFloatingPointErrorStatus>(m_ErrorStatus | status);
}
private:
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
double m_Offset;
double m_LikelihoodShape;
double m_PriorShape;
double m_PriorRate;
double m_NumberSamples;
double m_ImpliedShape;
double m_Constant;
mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus;
};
} // detail::
const core::TPersistenceTag OFFSET_TAG("a", "offset");
const core::TPersistenceTag LIKELIHOOD_SHAPE_TAG("b", "likelihood_shape");
const core::TPersistenceTag LOG_SAMPLES_MEAN_TAG("c", "log_samples_mean");
const core::TPersistenceTag SAMPLE_MOMENTS_TAG("d", "sample_moments");
const core::TPersistenceTag PRIOR_SHAPE_TAG("e", "prior_shape");
const core::TPersistenceTag PRIOR_RATE_TAG("f", "prior_rate");
const core::TPersistenceTag NUMBER_SAMPLES_TAG("g", "number_samples");
const core::TPersistenceTag DECAY_RATE_TAG("j", "decay_rate");
const std::string MEAN_TAG("mean");
const std::string STANDARD_DEVIATION_TAG("standard_deviation");
const std::string EMPTY_STRING;
const std::string UNKNOWN_VALUE_STRING("<unknown>");
}
CGammaRateConjugate::CGammaRateConjugate(maths_t::EDataType dataType,
double offset,
double shape,
double rate,
double decayRate,
double offsetMargin)
: CPrior(dataType, decayRate), m_Offset(offset), m_OffsetMargin(offsetMargin),
m_LikelihoodShape(1.0), m_PriorShape(shape), m_PriorRate(rate) {
}
CGammaRateConjugate::CGammaRateConjugate(const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser,
double offsetMargin)
: CPrior(params.s_DataType, 0.0), m_Offset(0.0), m_OffsetMargin(offsetMargin),
m_LikelihoodShape(1.0), m_PriorShape(0.0), m_PriorRate(0.0) {
if (traverser.traverseSubLevel([this](auto& traverser_) {
return this->acceptRestoreTraverser(traverser_);
}) == false) {
traverser.setBadState();
}
}
bool CGammaRateConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
do {
const std::string& name = traverser.name();
RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate,
core::CStringUtils::stringToType(traverser.value(), decayRate),
this->decayRate(decayRate))
RESTORE(OFFSET_TAG, m_Offset.fromString(traverser.value()))
RESTORE_BUILT_IN(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape)
RESTORE(LOG_SAMPLES_MEAN_TAG, m_LogSamplesMean.fromDelimited(traverser.value()))
RESTORE(SAMPLE_MOMENTS_TAG, m_SampleMoments.fromDelimited(traverser.value()))
RESTORE(PRIOR_SHAPE_TAG, m_PriorShape.fromString(traverser.value()))
RESTORE(PRIOR_RATE_TAG, m_PriorRate.fromString(traverser.value()))
RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples,
core::CStringUtils::stringToType(traverser.value(), numberSamples),
this->numberSamples(numberSamples))
} while (traverser.next());
return true;
}
CGammaRateConjugate CGammaRateConjugate::nonInformativePrior(maths_t::EDataType dataType,
double offset,
double decayRate,
double offsetMargin) {
return CGammaRateConjugate(dataType, offset + offsetMargin, NON_INFORMATIVE_SHAPE,
NON_INFORMATIVE_RATE, decayRate, offsetMargin);
}
CGammaRateConjugate::EPrior CGammaRateConjugate::type() const {
return E_Gamma;
}
CGammaRateConjugate* CGammaRateConjugate::clone() const {
return new CGammaRateConjugate(*this);
}
void CGammaRateConjugate::setToNonInformative(double offset, double decayRate) {
*this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(),
decayRate, this->offsetMargin());
}
double CGammaRateConjugate::offsetMargin() const {
return m_OffsetMargin;
}
bool CGammaRateConjugate::needsOffset() const {
return true;
}
double CGammaRateConjugate::adjustOffset(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights) {
COffsetCost cost(*this);
CApplyOffset apply(*this);
return this->adjustOffsetWithCost(samples, weights, cost, apply);
}
double CGammaRateConjugate::offset() const {
return m_Offset;
}
void CGammaRateConjugate::addSamples(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights) {
if (samples.empty()) {
return;
}
if (samples.size() != weights.size()) {
LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '"
<< weights << "'");
return;
}
this->adjustOffset(samples, weights);
this->CPrior::addSamples(samples, weights);
// We assume the data are described by X = Y - u where, Y is gamma
// distributed and u is a constant offset.
//
// If y = {y(i)} denotes the sample vector, then x = {y(i) + u} are
// gamma distributed with shape a' and rate b', and the likelihood
// function is:
// likelihood(x, a' | b') ~
// Product_i{ b' ^ a' * x(i) ^ (a' - 1) * exp(-b' * x(i)) }.
//
// Note that we treat the likelihood as a function of the free parameter
// a' for which we do not have prior distribution. Instead we estimate
// this by maximizing the posterior marginal likelihood function for the
// data. It can be shown that this is equivalent to solving:
// f(n * a' + 1) - f(a')
// = log( Sum_i(x(i)) ) - Sum_i( log(x(i)) ) / n (1)
//
// where f(.) is the digamma function, i.e. the derivative of the log of
// the gamma function. This means that sufficient statistics for estimating
// a' are Sum_i( x(i) ), Sum_i( log(x(i)) ) and n the number of samples.
// We maintain these statistics and compute a' by solving (1) after each
// update.
//
// The conjugate prior for b' is gamma and the update of the posterior
// with n independent samples comes from:
// likelihood(x, a' | b') * prior(b' | a, b) (2)
//
// where,
// prior(b' | a, b) ~ b'^(a - 1) * exp(-b * b')
//
// Equation (2) implies that the parameters of the prior distribution
// update as follows:
// a -> a + n * a'
// b -> b + Sum_i( x(i) )
//
// Note that the weight of the sample x(i) is interpreted as its count,
// i.e. n(i), so for example updating with {(x, 2)} is equivalent to
// updating with {x, x}.
//
// Since these values can be computed on the fly from the sample mean and
// number of samples we do not maintain separate variables for them.
//
// If the data are discrete then we approximate the discrete distribution
// by saying it is uniform on the intervals [n,n+1] for each integral n.
// This is like saying that the data are samples from:
// X' = X + Z
//
// where,
// Z is uniform in the interval [0,1].
//
// We care about the limiting behaviour of the filter, i.e. as the number
// of samples n->inf. In this case, the uniform law of large numbers gives
// that:
// Sum_i( f(x(i) + z(i)) ) -> E[ Sum_i( f(x(i) + Z) ) ]
//
// We use this to evaluate:
// mean(log(x(i) + z(i)))
// -> 1/n * Sum_i( (x(i) + 1) * log(x(i) + 1) - x(i) * log(x(i)) - 1 )
//
// mean(x(i) + z(i))
// -> 1/n * Sum_i( x(i) ) + 1/2
//
// and
// var(x(i) + z(i))
// = Sum_i( (x(i) + z(i) - 1/n * Sum_i( x(i) + z(i) ))^2 )
// -> Sum_i( x(i) - 1/n * Sum_j( x(j) ) ) - n / 12
TMeanAccumulator logSamplesMean = m_LogSamplesMean;
TMeanVarAccumulator sampleMoments = m_SampleMoments;
try {
double shift = boost::math::digamma(m_LikelihoodShape);
for (std::size_t i = 0; i < samples.size(); ++i) {
double x = samples[i] + m_Offset;
if (x <= 0.0 || !CMathsFuncs::isFinite(x) ||
!CMathsFuncs::isFinite(weights[i])) {
LOG_ERROR(<< "Discarding sample = " << x << ", weights = " << weights[i]);
continue;
}
double n = maths_t::countForUpdate(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
double shift_ = -shift + boost::math::digamma(m_LikelihoodShape / varianceScale) +
std::log(varianceScale);
if (this->isInteger()) {
double logxInvPlus1 = std::log(1.0 / x + 1.0);
double logxPlus1 = std::log(x + 1.0);
m_LogSamplesMean.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift_,
n / varianceScale);
m_SampleMoments.add(x + 0.5, n / varianceScale);
} else {
m_LogSamplesMean.add(std::log(x) - shift_, n / varianceScale);
m_SampleMoments.add(x, n / varianceScale);
}
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to update likelihood: " << e.what());
return;
}
// If the coefficient of variation of the data is too small we run
// in to numerical problems. We truncate the variation by modeling
// the impact of an actual variation (standard deviation divided by
// mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the
// sample log mean and moments. Note that if the data are integer
// then we limit the coefficient of variation to be:
// "s.t.d. + (1 / 12)^(1/2)" / mean
//
// This is equivalent to adding on the variance of the latent
// variable.
detail::truncateVariance(this->isInteger(), logSamplesMean, sampleMoments);
detail::truncateVariance(this->isInteger(), m_LogSamplesMean, m_SampleMoments);
m_LikelihoodShape = detail::maximumLikelihoodShape(
m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments);
LOG_TRACE(<< "m_Offset = " << m_Offset << ", m_LikelihoodShape = " << m_LikelihoodShape
<< ", m_LogSamplesMean = " << m_LogSamplesMean
<< ", m_SampleMoments = " << m_SampleMoments << ", m_PriorShape = " << m_PriorShape
<< ", m_PriorRate = " << m_PriorRate);
if (this->isBad()) {
LOG_ERROR(<< "Update failed (" << this->debug() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
this->setToNonInformative(this->offsetMargin(), this->decayRate());
}
}
void CGammaRateConjugate::propagateForwardsByTime(double time) {
if (!CMathsFuncs::isFinite(time) || time < 0.0) {
LOG_ERROR(<< "Bad propagation time " << time);
return;
}
if (this->isNonInformative()) {
// Nothing to be done.
return;
}
// We want to increase the variance of the gamma distribution while
// holding its mean constant s.t. in the limit t -> inf var -> inf.
// The mean and variance are a / b and a / b^2, respectively, for
// shape a and rate b so choose a factor f in the range [0, 1] and
// update as follows:
// a -> f * a
// b -> f * b
//
// Thus the mean is unchanged and variance is increased by 1 / f.
//
// This amounts to the following transformations of the sample moments:
// n -> f * n
// mean( x(i) ) -> mean( x(i) )
// mean( log(x(i) ) -> mean( log(x(i) )
// var( x(i) ) -> f / (f * n - 1) * var( x(i) )
TMeanAccumulator logSamplesMean = m_LogSamplesMean;
TMeanVarAccumulator sampleMoments = m_SampleMoments;
double count = CBasicStatistics::count(m_LogSamplesMean);
double alpha = std::exp(-this->decayRate() * time);
alpha = count > detail::NON_INFORMATIVE_COUNT
? (alpha * count + (1.0 - alpha) * detail::NON_INFORMATIVE_COUNT) / count
: 1.0;
if (alpha < 1.0) {
m_LogSamplesMean.age(alpha);
m_SampleMoments.age(alpha);
m_LikelihoodShape = detail::maximumLikelihoodShape(
m_LikelihoodShape, logSamplesMean, m_LogSamplesMean, sampleMoments, m_SampleMoments);
}
this->numberSamples(this->numberSamples() * alpha);
LOG_TRACE(<< "m_LikelihoodShape = " << m_LikelihoodShape << ", m_LogSamplesMean = "
<< m_LogSamplesMean << ", m_SampleMoments = " << m_SampleMoments
<< ", numberSamples = " << this->numberSamples());
}
CGammaRateConjugate::TDoubleDoublePr CGammaRateConjugate::marginalLikelihoodSupport() const {
return {-m_Offset, std::numeric_limits<double>::max()};
}
double CGammaRateConjugate::marginalLikelihoodMean() const {
return this->isInteger() ? this->mean() - 0.5 : this->mean();
}
double CGammaRateConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const {
double varianceScale = maths_t::seasonalVarianceScale(weights) *
maths_t::countVarianceScale(weights);
if (!this->isNonInformative()) {
// We use the fact that the marginal likelihood is the distribution
// of the R.V. defined as:
// X = b * Z / (1 - Z) - u (1)
//
// where,
// u is a constant offset.
// b is the prior rate.
// Z is beta distributed with alpha equal to m_LikelihoodShape
// and beta equal to m_PriorShape.
//
// So the mode occurs at the r.h.s. of (1) evaluated at the mode of Z.
double scaledLikelihoodShape = m_LikelihoodShape / varianceScale;
if (scaledLikelihoodShape > 1.0 && this->priorShape() > 1.0) {
try {
double scaledPriorRate = varianceScale * this->priorRate();
boost::math::beta_distribution<> beta(scaledLikelihoodShape,
this->priorShape());
double mode = boost::math::mode(beta);
return scaledPriorRate * mode / (1.0 - mode) - m_Offset;
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what()
<< ", likelihood shape = " << m_LikelihoodShape
<< ", prior shape = " << this->priorShape());
}
}
}
// We use the fact that for a gamma distribution:
// mean = a/b,
// mode = (a-1)/b and
// variance = a/b^2
//
// So provided mean isn't zero the mode is:
// (mean^2 - variance) / mean
double mean = CBasicStatistics::mean(m_SampleMoments);
double variance = varianceScale * CBasicStatistics::variance(m_SampleMoments);
return std::max(mean == 0.0 ? 0.0 : mean - variance / mean, 0.0) - m_Offset;
}
double CGammaRateConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const {
if (this->isNonInformative()) {
return std::numeric_limits<double>::max();
}
// This is just E_{B}[Var(X | B)] where B is the rate prior. There is
// a complication due to the fact that variance is a function of both
// X and the mean, which is a random variable. We can write Var(X | B)
// as
// E[ (X - M)^2 + (M - m)^2 | B ]
//
// and use the fact that X conditioned on B is gamma with shape equal
// to the maximum likelihood shape and scale equal to B and m' = a' / B.
// The first term evaluates to a' / B^2 and the expectation of 1 / B^2
// w.r.t. the prior is b^2 / (a-1) / (a-2). Similarly, it is possible
// to show that Var(a' / B) = a'^2 * E[ 1.0 / B^2 - (b / (a - 1))^2]
// whence...
double varianceScale = maths_t::seasonalVarianceScale(weights) *
maths_t::countVarianceScale(weights);
double a = this->priorShape();
if (a <= 2.0) {
return varianceScale * CBasicStatistics::variance(m_SampleMoments);
}
double b = this->priorRate();
return varianceScale * (1.0 + m_LikelihoodShape / (a - 1.0)) *
m_LikelihoodShape * b * b / (a - 1.0) / (a - 2.0);
}
CGammaRateConjugate::TDoubleDoublePr
CGammaRateConjugate::marginalLikelihoodConfidenceInterval(double percentage,
const TDoubleWeightsAry& weights) const {
if (this->isNonInformative()) {
return this->marginalLikelihoodSupport();
}
percentage /= 100.0;
percentage = CTools::truncate(percentage, 0.0, 1.0);
// We use the fact that the marginal likelihood is the distribution
// of the R.V. defined as:
// X = b * Z / (1 - Z) - u
//
// where,
// u is a constant offset.
// b is the prior rate.
// Z is beta distributed with alpha equal to m_LikelihoodShape
// and beta equal to m_PriorShape.
try {
double varianceScale = maths_t::seasonalVarianceScale(weights) *
maths_t::countVarianceScale(weights);
double scaledLikelihoodShape = m_LikelihoodShape / varianceScale;
double scaledPriorRate = varianceScale * this->priorRate();
boost::math::beta_distribution<> beta(scaledLikelihoodShape, this->priorShape());
double x1 = boost::math::quantile(beta, (1.0 - percentage) / 2.0);
x1 = scaledPriorRate * x1 / (1.0 - x1) - m_Offset - (this->isInteger() ? 0.5 : 0.0);
double x2 = x1;
if (percentage > 0.0) {
x2 = boost::math::quantile(beta, (1.0 + percentage) / 2.0);
x2 = scaledPriorRate * x2 / (1.0 - x2) - m_Offset -
(this->isInteger() ? 0.5 : 0.0);
}
LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2);
return {x1, x2};
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to compute confidence interval: " << e.what());
}
return this->marginalLikelihoodSupport();
}
maths_t::EFloatingPointErrorStatus
CGammaRateConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& result) const {
result = 0.0;
if (samples.empty()) {
LOG_ERROR(<< "Can't compute likelihood for empty sample set");
return maths_t::E_FpFailed;
}
if (samples.size() != weights.size()) {
LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '"
<< weights << "'");
return maths_t::E_FpFailed;
}
if (this->isNonInformative()) {
// The non-informative likelihood is improper and effectively
// zero everywhere. We use minus max double because
// log(0) = HUGE_VALUE, which causes problems for Windows.
// Calling code is notified when the calculation overflows
// and should avoid taking the exponential since this will
// underflow and pollute the floating point environment. This
// may cause issues for some library function implementations
// (see fe*exceptflag for more details).
result = std::numeric_limits<double>::lowest();
return maths_t::E_FpOverflowed;
}
maths_t::EFloatingPointErrorStatus status = maths_t::E_FpFailed;
try {
detail::CLogMarginalLikelihood logMarginalLikelihood(
samples, weights, m_Offset, m_LikelihoodShape, this->priorShape(),
this->priorRate());
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
CIntegration::logGaussLegendre<CIntegration::OrderThree>(
logMarginalLikelihood, 0.0, 1.0, result);
} else {
logMarginalLikelihood(0.0, result);
}
status = static_cast<maths_t::EFloatingPointErrorStatus>(
logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result));
if ((status & maths_t::E_FpFailed) != 0) {
LOG_ERROR(<< "Failed to compute log likelihood (" << this->debug() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
} else if ((status & maths_t::E_FpOverflowed) != 0) {
LOG_TRACE(<< "Log likelihood overflowed for (" << this->debug() << ")");
LOG_TRACE(<< "samples = " << samples);
LOG_TRACE(<< "weights = " << weights);
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to compute likelihood: " << e.what());
}
return status;
}
void CGammaRateConjugate::sampleMarginalLikelihood(std::size_t numberSamples,
TDouble1Vec& samples) const {
samples.clear();
if (numberSamples == 0 || this->numberSamples() == 0.0) {
return;
}
if (this->isNonInformative()) {
// We can't sample the marginal likelihood directly so match sample
// moments and sampled moments.
numberSamples = std::min(
numberSamples, static_cast<std::size_t>(this->numberSamples() + 0.5));
double mean = CBasicStatistics::mean(m_SampleMoments) - m_Offset;
double deviation = std::sqrt(CBasicStatistics::variance(m_SampleMoments));
double root_two = boost::math::double_constants::root_two;
switch (numberSamples) {
case 1:
samples.push_back(mean);
break;
case 2:
samples.push_back(mean - deviation / root_two);
samples.push_back(mean + deviation / root_two);
break;
default:
samples.push_back(mean - deviation);
samples.push_back(mean);
samples.push_back(mean + deviation);
break;
}
return;
}
// The sampling strategy is to split the marginal likelihood up into
// equal quantiles and then compute the expectation on each quantile
// and sample that point, i.e. effectively sample the points:
// { n * E[ X * I{[x_q_n(i), x_q_n((i+1))]} ] }
//
// where,
// X is a R.V. whose distribution is the marginal likelihood.
// I{.} is the indicator function.
// x_q_n(.) is the n'th quantile function.
// i ranges over 0 to n-1.
// n is the number of samples.
//
// This sampling strategy has various nice properties:
// 1) The sample quantiles match as many quantiles of the distribution
// as possible.
// 2) The sample mean always matches the distribution mean:
// Sum_i( E[X * I{[q_n(i), q_n((i+1))]}] ) = E[X * 1] = E[X]
// by linearity of the expectation operator and since the indicators
// range over the entire support for X.
// 3) As the number of samples increase each sample moment tends
// asymptotically to each corresponding distribution moment.
//
// We use the fact that X / (b + X) = Y where Y is beta distributed
// with alpha = a' and beta = a to derive the relationship:
// x2/(b+x2)
// E[ X * I{[x1,x2]} ] = b*a'/(a-1) * [F(x | a'+1, a-1)]
// x1/(b+x1)
//
// where,
// a and b are the prior gamma likelihood shape and rate, respectively.
// a' is the likelihood shape.
// F(. | a'+1, a-1) is the c.d.f. of a beta random variable with
// alpha = a'+1 and beta = a-1.
samples.reserve(numberSamples);
double mean = m_LikelihoodShape * this->priorRate() / (this->priorShape() - 1.0);
try {
boost::math::beta_distribution<> beta1(m_LikelihoodShape, this->priorShape());
boost::math::beta_distribution<> beta2(m_LikelihoodShape + 1.0,
this->priorShape() - 1.0);
LOG_TRACE(<< "mean = " << mean << ", numberSamples = " << numberSamples);
TDoubleDoublePr support = this->marginalLikelihoodSupport();
double lastPartialExpectation = 0.0;
for (std::size_t i = 1; i < numberSamples; ++i) {
double q = static_cast<double>(i) / static_cast<double>(numberSamples);
double xq = boost::math::quantile(beta1, q);
double partialExpectation = mean * CTools::safeCdf(beta2, xq);
double sample = static_cast<double>(numberSamples) *
(partialExpectation - lastPartialExpectation) -
m_Offset;
LOG_TRACE(<< "sample = " << sample);
// Sanity check the sample: should be in the distribution support.
if (sample >= support.first && sample <= support.second) {
samples.push_back(sample);
} else {
LOG_ERROR(<< "Sample out of bounds: sample = " << sample
<< ", likelihoodShape = " << m_LikelihoodShape
<< ", priorShape = " << this->priorShape() << ", q = " << q
<< ", x(q) = " << xq << ", mean = " << mean);
}
lastPartialExpectation = partialExpectation;
}
double sample =
static_cast<double>(numberSamples) * (mean - lastPartialExpectation) - m_Offset;
LOG_TRACE(<< "sample = " << sample);
// Sanity check the sample: should be in the distribution support.
if (sample >= support.first && sample <= support.second) {
samples.push_back(sample);
} else {
LOG_ERROR(<< "Sample out of bounds: sample = " << sample
<< ", likelihoodShape = " << m_LikelihoodShape
<< ", priorShape = " << this->priorShape() << ", mean = " << mean);
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to sample: " << e.what() << ", likelihoodShape = " << m_LikelihoodShape
<< ", priorShape = " << this->priorShape() << ", mean = " << mean);
}
}
bool CGammaRateConjugate::minusLogJointCdf(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
using TMinusLogCdf = detail::CEvaluateOnSamples<CTools::SMinusLogCdf>;
lowerBound = upperBound = 0.0;
TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_Offset,
m_LikelihoodShape, this->priorShape(), this->priorRate());
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
double value;
if (!CIntegration::logGaussLegendre<CIntegration::OrderThree>(
minusLogCdf, 0.0, 1.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
double value;
if (!minusLogCdf(0.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
bool CGammaRateConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
using TMinusLogCdfComplement = detail::CEvaluateOnSamples<CTools::SMinusLogCdfComplement>;
lowerBound = upperBound = 0.0;
TMinusLogCdfComplement minusLogCdfComplement(
samples, weights, this->isNonInformative(), m_Offset, m_LikelihoodShape,
this->priorShape(), this->priorRate());
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
double value;
if (!CIntegration::logGaussLegendre<CIntegration::OrderThree>(
minusLogCdfComplement, 0.0, 1.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. complement (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
double value;
if (!minusLogCdfComplement(0.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. complement (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
bool CGammaRateConjugate::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound,
maths_t::ETail& tail) const {
lowerBound = 0.0;
upperBound = 1.0;
tail = maths_t::E_UndeterminedTail;
detail::CProbabilityOfLessLikelySamples probability(
calculation, samples, weights, this->isNonInformative(), m_Offset,
m_LikelihoodShape, this->priorShape(), this->priorRate());
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
double value;
if (!CIntegration::gaussLegendre<CIntegration::OrderThree>(probability, 0.0,
1.0, value)) {
return false;
}
lowerBound = upperBound = value;
tail = probability.tail();
return true;
}
double value;
if (!probability(0.0, value)) {
return false;
}
lowerBound = upperBound = value;
tail = probability.tail();
return true;
}
bool CGammaRateConjugate::isNonInformative() const {
return CBasicStatistics::count(m_SampleMoments) < detail::NON_INFORMATIVE_COUNT ||
this->priorRate() == NON_INFORMATIVE_RATE;
}
void CGammaRateConjugate::print(const std::string& indent, std::string& result) const {
result += core_t::LINE_ENDING + indent + "gamma ";
if (this->isNonInformative()) {
result +=
"non-informative: # sample count = " +
core::CStringUtils::typeToStringPretty(CBasicStatistics::count(m_SampleMoments)) +
". priorRate = " + core::CStringUtils::typeToStringPretty(this->priorRate());
return;
}
std::string meanStr;
std::string sdStr;
std::tie(meanStr, sdStr) = this->printMarginalLikelihoodStatistics();
result += "mean = " + meanStr + " sd = " + sdStr;
}
CPrior::TStrStrPr CGammaRateConjugate::doPrintMarginalLikelihoodStatistics() const {
std::string meanStr;
std::string sdStr;
try {
if (this->priorShape() > 2.0) {
double shape = (this->priorShape() - 2.0) /
(this->priorShape() - 1.0) * m_LikelihoodShape;
double rate = this->priorRate() / (this->priorShape() - 2.0);
boost::math::gamma_distribution<> gamma(shape, rate);
double mean = boost::math::mean(gamma);
double deviation = boost::math::standard_deviation(gamma);
meanStr = core::CStringUtils::typeToStringPretty(mean - m_Offset);
sdStr = core::CStringUtils::typeToStringPretty(deviation);
return TStrStrPr{meanStr, sdStr};
}
} catch (const std::exception&) {}
double mean = CBasicStatistics::mean(m_SampleMoments);
double deviation = std::sqrt(CBasicStatistics::variance(m_SampleMoments));
meanStr = core::CStringUtils::typeToStringPretty(mean - m_Offset);
sdStr = core::CStringUtils::typeToStringPretty(deviation);
return TStrStrPr{meanStr, sdStr};
}
std::string CGammaRateConjugate::printJointDensityFunction() const {
if (this->isNonInformative()) {
// The non-informative likelihood is improper 0 everywhere.
return EMPTY_STRING;
}
// We'll plot the prior over a range where most of the mass is.
static const double RANGE = 0.99;
static const unsigned int POINTS = 51;
boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate());
double xStart = boost::math::quantile(gamma, (1.0 - RANGE) / 2.0);
double xEnd = boost::math::quantile(gamma, (1.0 + RANGE) / 2.0);
double xIncrement = (xEnd - xStart) / (POINTS - 1.0);
double x = xStart;
std::ostringstream xCoordinates;
std::ostringstream yCoordinates;
xCoordinates << "x = [";
for (unsigned int i = 0; i < POINTS; ++i, x += xIncrement) {
xCoordinates << x << " ";
}
xCoordinates << "];" << core_t::LINE_ENDING;
std::ostringstream pdf;
pdf << "pdf = [";
x = xStart;
for (unsigned int i = 0; i < POINTS; ++i, x += xIncrement) {
pdf << CTools::safePdf(gamma, x) << " ";
}
pdf << "];" << core_t::LINE_ENDING << "plot(x, pdf);";
return xCoordinates.str() + yCoordinates.str() + pdf.str();
}
std::uint64_t CGammaRateConjugate::checksum(std::uint64_t seed) const {
seed = this->CPrior::checksum(seed);
seed = CChecksum::calculate(seed, m_Offset);
seed = CChecksum::calculate(seed, m_LikelihoodShape);
seed = CChecksum::calculate(seed, m_LogSamplesMean);
seed = CChecksum::calculate(seed, m_SampleMoments);
seed = CChecksum::calculate(seed, m_PriorShape);
return CChecksum::calculate(seed, m_PriorRate);
}
void CGammaRateConjugate::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
mem->setName("CGammaRateConjugate");
}
std::size_t CGammaRateConjugate::memoryUsage() const {
return 0;
}
std::size_t CGammaRateConjugate::staticSize() const {
return sizeof(*this);
}
void CGammaRateConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision);
inserter.insertValue(OFFSET_TAG, m_Offset.toString());
inserter.insertValue(LIKELIHOOD_SHAPE_TAG, m_LikelihoodShape,
core::CIEEE754::E_DoublePrecision);
inserter.insertValue(LOG_SAMPLES_MEAN_TAG, m_LogSamplesMean.toDelimited());
inserter.insertValue(SAMPLE_MOMENTS_TAG, m_SampleMoments.toDelimited());
inserter.insertValue(PRIOR_SHAPE_TAG, m_PriorShape.toString());
inserter.insertValue(PRIOR_RATE_TAG, m_PriorRate.toString());
inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(),
core::CIEEE754::E_SinglePrecision);
if (inserter.readableTags() == true) {
std::string mean;
std::string sd;
std::tie(mean, sd) = this->printMarginalLikelihoodStatistics();
inserter.insertValue(MEAN_TAG, mean);
inserter.insertValue(STANDARD_DEVIATION_TAG, sd);
}
}
double CGammaRateConjugate::likelihoodShape() const {
return m_LikelihoodShape;
}
double CGammaRateConjugate::likelihoodRate() const {
if (this->isNonInformative()) {
return 0.0;
}
try {
boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate());
return boost::math::mean(gamma);
} catch (std::exception& e) {
LOG_ERROR(<< "Failed to compute likelihood rate: " << e.what() << ", prior shape = "
<< this->priorShape() << ", prior rate = " << this->priorRate());
}
return 0.0;
}
CGammaRateConjugate::TDoubleDoublePr
CGammaRateConjugate::confidenceIntervalRate(double percentage) const {
if (this->isNonInformative()) {
return {std::numeric_limits<double>::lowest(), std::numeric_limits<double>::max()};
}
percentage /= 100.0;
double lowerPercentile = 0.5 * (1.0 - percentage);
double upperPercentile = 0.5 * (1.0 + percentage);
try {
// The prior distribution for the rate is gamma.
boost::math::gamma_distribution<> gamma(this->priorShape(), 1.0 / this->priorRate());
return {boost::math::quantile(gamma, lowerPercentile),
boost::math::quantile(gamma, upperPercentile)};
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to compute confidence interval: " << e.what()
<< ", prior shape = " << this->priorShape()
<< ", prior rate = " << this->priorRate());
}
return {std::numeric_limits<double>::lowest(), std::numeric_limits<double>::max()};
}
bool CGammaRateConjugate::equalTolerance(const CGammaRateConjugate& rhs,
const TEqualWithTolerance& equal) const {
LOG_DEBUG(<< m_LikelihoodShape << " " << rhs.m_LikelihoodShape << ", "
<< this->priorShape() << " " << rhs.priorShape() << ", "
<< this->priorRate() << " " << rhs.priorRate());
return equal(m_LikelihoodShape, rhs.m_LikelihoodShape) &&
equal(this->priorShape(), rhs.priorShape()) &&
equal(this->priorRate(), rhs.priorRate());
}
double CGammaRateConjugate::mean() const {
if (this->isNonInformative()) {
return CBasicStatistics::mean(m_SampleMoments);
}
// This is just E_{B}[E[X | B]] - u where B is the rate prior
// and u is the offset. Note that X conditioned on B is gamma
// with shape equal to the maximum likelihood shape and scale
// equal to B. It's expectation is therefore a / B and the
// expectation of 1 / B w.r.t. the prior is b / (a-1).
double a = this->priorShape();
if (a <= 1.0) {
return CBasicStatistics::mean(m_SampleMoments) - m_Offset;
}
double b = this->priorRate();
return m_LikelihoodShape * b / (a - 1.0) - m_Offset;
}
double CGammaRateConjugate::priorShape() const {
return m_PriorShape + RATE_VARIANCE_SCALE *
CBasicStatistics::count(m_SampleMoments) * m_LikelihoodShape;
}
double CGammaRateConjugate::priorRate() const {
return m_PriorRate + RATE_VARIANCE_SCALE * CBasicStatistics::count(m_SampleMoments) *
CBasicStatistics::mean(m_SampleMoments);
}
bool CGammaRateConjugate::isBad() const {
return !CMathsFuncs::isFinite(m_Offset) ||
!CMathsFuncs::isFinite(m_LikelihoodShape) ||
!CMathsFuncs::isFinite(CBasicStatistics::count(m_LogSamplesMean)) ||
!CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_LogSamplesMean)) ||
!CMathsFuncs::isFinite(CBasicStatistics::count(m_SampleMoments)) ||
!CMathsFuncs::isFinite(CBasicStatistics::moment<0>(m_SampleMoments)) ||
!CMathsFuncs::isFinite(CBasicStatistics::moment<1>(m_SampleMoments)) ||
!CMathsFuncs::isFinite(m_PriorShape) || !CMathsFuncs::isFinite(m_PriorRate);
}
std::string CGammaRateConjugate::debug() const {
std::ostringstream result;
result << std::scientific << std::setprecision(15) << m_Offset << " "
<< m_LikelihoodShape << " " << m_LogSamplesMean << " "
<< m_SampleMoments << " " << m_PriorShape << " " << m_PriorRate;
return result.str();
}
const double CGammaRateConjugate::NON_INFORMATIVE_SHAPE = 1.0;
const double CGammaRateConjugate::NON_INFORMATIVE_RATE = 0.0;
const double CGammaRateConjugate::RATE_VARIANCE_SCALE = 0.23;
}
}
}