lib/maths/common/CLogNormalMeanPrecConjugate.cc (1,122 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <maths/common/CLogNormalMeanPrecConjugate.h>
#include <core/CLogger.h>
#include <core/CNonCopyable.h>
#include <core/CStatePersistInserter.h>
#include <core/CStateRestoreTraverser.h>
#include <core/RestoreMacros.h>
#include <maths/common/CBasicStatistics.h>
#include <maths/common/CChecksum.h>
#include <maths/common/CIntegration.h>
#include <maths/common/CLinearAlgebraTools.h>
#include <maths/common/CLogTDistribution.h>
#include <maths/common/CMathsFuncs.h>
#include <maths/common/CMathsFuncsForMatrixAndVectorTypes.h>
#include <maths/common/CRestoreParams.h>
#include <maths/common/CTools.h>
#include <maths/common/ProbabilityAggregators.h>
#include <boost/math/constants/constants.hpp>
#include <boost/math/distributions/gamma.hpp>
#include <boost/math/distributions/lognormal.hpp>
#include <boost/math/distributions/normal.hpp>
#include <boost/math/distributions/students_t.hpp>
#include <algorithm>
#include <cmath>
#include <sstream>
#include <string>
namespace ml {
namespace maths {
namespace common {
namespace {
using TSizeVec = std::vector<std::size_t>;
using TDouble1Vec = core::CSmallVector<double, 1>;
using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec;
using TMeanAccumulator = CBasicStatistics::SSampleMean<double>::TAccumulator;
using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<double>::TAccumulator;
const double MINIMUM_LOGNORMAL_SHAPE = 100.0;
namespace detail {
using TDoubleDoublePr = std::pair<double, double>;
using TDoubleDoublePrVec = std::vector<TDoubleDoublePr>;
//! \brief Adds "weight" x "right operand" to the "left operand".
struct SPlusWeight {
double operator()(double lhs, double rhs, double weight = 1.0) const {
return lhs + weight * rhs;
}
};
//! Get the effective location and scale of the sample.
//!
//! \param[in] vs The count variance scale.
//! \param[in] mean The normal prior mean.
//! \param[in] precision The normal prior precision.
//! \param[in] rate The gamma prior rate.
//! \param[in] shape The gamma prior shape.
//! \param[out] location The effective location of sample distribution.
//! \param[out] scale The effective scale of sample distribution.
inline void locationAndScale(double vs,
double r,
double s,
double mean,
double precision,
double rate,
double shape,
double& location,
double& scale) {
double t = vs == 1.0 ? r : r + std::log(s + vs * (1.0 - s));
double scaledPrecision = t == r ? precision : t / r * precision;
double scaledRate = t == r ? rate : t / r * rate;
location = mean + (r - t) / 2.0;
scale = std::sqrt((scaledPrecision + 1.0) / scaledPrecision * scaledRate / shape);
}
//! Evaluate \p func on the joint predictive distribution for \p samples
//! (integrating over the prior for the exponentiated normal mean and
//! precision) and aggregate the results using \p aggregate.
//!
//! \param samples The weighted samples.
//! \param weights The weights of each sample in \p samples.
//! \param func The function to evaluate.
//! \param aggregate The function to aggregate the results of \p func.
//! \param isNonInformative True if the prior is non-informative.
//! \param offset The constant offset of the data, in particular it is
//! assumed that \p samples are distributed as exp(Y) - "offset", where
//! Y is a normally distributed R.V.
//! \param shape The shape of the marginal precision prior.
//! \param rate The rate of the marginal precision prior.
//! \param mean The mean of the conditional mean prior.
//! \param precision The precision of the conditional mean prior.
//! \param result Filled in with the aggregation of results of \p func.
template<typename FUNC, typename AGGREGATOR, typename RESULT>
bool evaluateFunctionOnJointDistribution(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
FUNC func,
AGGREGATOR aggregate,
bool isNonInformative,
double offset,
double shape,
double rate,
double mean,
double precision,
RESULT& result) {
result = RESULT();
if (samples.empty()) {
LOG_ERROR(<< "Can't compute for empty sample set");
return false;
}
// Computing the true joint marginal distribution of all the samples
// by integrating the joint likelihood over the prior distribution
// for the exponentiated Gaussian mean and precision is not tractable.
// We will approximate the joint p.d.f. as follows:
// Integral{ Product_i{ L(x(i) | m,p) } * f(m,p) }dm*dp
// ~= Product_i{ Integral{ L(x(i) | m,p) * f(m,p) }dm*dp }.
//
// where,
// L(. | m,p) is the likelihood function and
// f(m,p) is the prior for the exponentiated Gaussian mean and precision.
//
// This becomes increasingly accurate as the prior distribution narrows.
bool success = false;
try {
if (isNonInformative) {
// The non-informative prior is improper and effectively 0 everywhere.
// (It is acceptable to approximate all finite samples as at the median
// of this distribution.)
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isNan(samples[i]) || CMathsFuncs::isNan(weights[i])) {
continue;
}
success = true;
double n = maths_t::count(weights[i]);
result = aggregate(
result, func(CTools::SImproperDistribution(), samples[i] + offset), n);
}
} else if (shape > MINIMUM_LOGNORMAL_SHAPE) {
// For large shape the marginal likelihood is very well approximated
// by a log-normal distribution. In particular, the true distribution
// is log t with 2 * a degrees of freedom, location m and scale
// s = ((p+1)/p * b/a) ^ (1/2). This implies that the p.d.f is
// proportional to:
// f(x) ~ 1 / x * (1 + 1/(2*a) * (log(x) - m)^2 / s^2) ^ -((2*a + 1) / 2).
//
// To compute the log-normal distribution we use the identity:
// lim n->inf { (1 + 1 / (2*n) * p * x^2)^-n } = exp(-p * x^2 / 2).
//
// This gives that in the limit of large a the p.d.f. tends to:
// f(x) ~ 1 / x * exp(-(log(x) - m) ^ 2 / s ^ 2 / 2).
//
// This is log-normal with:
// mean = m and
// scale = ((p+1)/p * b/a) ^ (1/2).
double r = rate / shape;
double s = std::exp(-r);
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isNan(samples[i]) || CMathsFuncs::isNan(weights[i])) {
continue;
}
success = true;
double n = maths_t::count(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
double location;
double scale;
locationAndScale(varianceScale, r, s, mean, precision, rate,
shape, location, scale);
boost::math::lognormal lognormal(location, scale);
result = aggregate(result, func(lognormal, samples[i] + offset), n);
}
} else {
// The marginal likelihood is log t with 2 * a degrees of freedom,
// location m and scale s = (a * p / (p + 1) / b) ^ (1/2).
double r = rate / shape;
double s = std::exp(-r);
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isNan(samples[i]) || CMathsFuncs::isNan(weights[i])) {
continue;
}
success = true;
double n = maths_t::count(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
double location;
double scale;
locationAndScale(varianceScale, r, s, mean, precision, rate,
shape, location, scale);
CLogTDistribution logt(2.0 * shape, location, scale);
result = aggregate(result, func(logt, samples[i] + offset), n);
}
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Error: " << e.what());
return false;
}
LOG_TRACE(<< "result = " << result);
return success;
}
//! \brief Evaluates a specified function object, which must be default constructible,
//! on the joint distribution of a set of the samples at a specified offset.
//!
//! This thin wrapper around the evaluateFunctionOnJointDistribution function
//! so that it can be integrated over the hidden variable representing the
//! actual value of a discrete datum which we assume is in the interval [n, n+1].
template<typename F>
class CEvaluateOnSamples : core::CNonCopyable {
public:
CEvaluateOnSamples(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
bool isNonInformative,
double offset,
double mean,
double precision,
double shape,
double rate)
: m_Samples(samples), m_Weights(weights),
m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean),
m_Precision(precision), m_Shape(shape), m_Rate(rate) {}
bool operator()(double x, double& result) const {
return evaluateFunctionOnJointDistribution(
m_Samples, m_Weights, F(), SPlusWeight(), m_IsNonInformative,
m_Offset + x, m_Shape, m_Rate, m_Mean, m_Precision, result);
}
private:
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
bool m_IsNonInformative;
double m_Offset;
double m_Mean;
double m_Precision;
double m_Shape;
double m_Rate;
};
//! \brief Kernel for computing the marginal likelihood's mean.
//!
//! This is used to evaluate the integral of the likelihood mean w.r.t. the
//! prior on the likelihood precision. Note that the integral over the prior
//! on the mean can be performed analytically so the kernel is:
//! <pre class="fragment">
//! \f$\(\displaystyle e^{m+\frac{1}{p}\left(\frac{1}{t} + 1\right)} f(p)\)\f$
//! </pre>
//! Here, \(m\) is the expected mean, and the prior on the precision\(p\) is
//! gamma distributed.
class CMeanKernel {
public:
using TValue = CVectorNx1<double, 2>;
public:
CMeanKernel(double m, double p, double a, double b)
: m_M(m), m_P(p), m_A(a), m_B(b) {}
bool operator()(double x, TValue& result) const {
try {
boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B);
double fx = boost::math::pdf(gamma, x);
result(0) = std::exp(m_M + 0.5 / x * (1.0 / m_P + 1.0)) * fx;
result(1) = fx;
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what()
<< ", m = " << m_M << ", p = " << m_P << ", a = " << m_A
<< ", b = " << m_B << ", x = " << x);
return false;
}
return true;
}
private:
double m_M, m_P, m_A, m_B;
};
//! \brief Kernel for computing the marginal likelihood's variance.
//!
//! This is used to evaluate the integral of the likelihood variance w.r.t.
//! the prior on the likelihood precision. Note that the integral over the
//! prior on the mean can be performed analytically.
class CVarianceKernel {
public:
using TValue = CVectorNx1<double, 2>;
public:
CVarianceKernel(double mean, double m, double p, double a, double b)
: m_Mean(mean), m_M(m), m_P(p), m_A(a), m_B(b) {}
bool operator()(const TValue& x, TValue& result) const {
try {
boost::math::gamma_distribution<> gamma(m_A, 1.0 / m_B);
boost::math::normal normal(m_M, std::sqrt(1.0 / x(0) / m_P));
double fx = boost::math::pdf(normal, x(1)) * boost::math::pdf(gamma, x(0));
double m = std::exp(x(1) + 0.5 / x(0));
result(0) = (m * m * (std::exp(1.0 / x(0)) - 1.0) + CTools::pow2(m - m_Mean)) * fx;
result(1) = fx;
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to calculate mean kernel: " << e.what()
<< ", m = " << m_M << ", p = " << m_P << ", a = " << m_A
<< ", b = " << m_B << ", x = " << x);
return false;
}
return true;
}
private:
double m_Mean, m_M, m_P, m_A, m_B;
};
//! \brief Computes the probability of seeing less likely samples at a specified
//! offset.
//!
//! This thin wrapper around the evaluateFunctionOnJointDistribution function
//! so that it can be integrated over the hidden variable representing the
//! actual value of a discrete datum which we assume is in the interval [n, n+1].
class CProbabilityOfLessLikelySamples : core::CNonCopyable {
public:
CProbabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
bool isNonInformative,
double offset,
double mean,
double precision,
double shape,
double rate)
: m_Calculation(calculation), m_Samples(samples), m_Weights(weights),
m_IsNonInformative(isNonInformative), m_Offset(offset), m_Mean(mean),
m_Precision(precision), m_Shape(shape), m_Rate(rate), m_Tail(0) {}
bool operator()(double x, double& result) const {
CJointProbabilityOfLessLikelySamples probability;
maths_t::ETail tail = maths_t::E_UndeterminedTail;
CTools::CProbabilityOfLessLikelySample sampleProbability{m_Calculation};
if (!evaluateFunctionOnJointDistribution(
m_Samples, m_Weights,
[&](const auto& distribution, double x_) {
return sampleProbability(distribution, x_, tail);
},
CJointProbabilityOfLessLikelySamples::SAddProbability(), m_IsNonInformative,
m_Offset + x, m_Shape, m_Rate, m_Mean, m_Precision, probability) ||
!probability.calculate(result)) {
LOG_ERROR(<< "Failed to compute probability of less likely samples (samples ="
<< m_Samples << ", weights = " << m_Weights
<< ", offset = " << m_Offset + x << ")");
return false;
}
m_Tail = m_Tail | tail;
return true;
}
maths_t::ETail tail() const { return static_cast<maths_t::ETail>(m_Tail); }
private:
maths_t::EProbabilityCalculation m_Calculation;
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
bool m_IsNonInformative;
double m_Offset;
double m_Mean;
double m_Precision;
double m_Shape;
double m_Rate;
mutable int m_Tail;
};
//! \brief Wraps up log marginal likelihood function so that it can be integrated
//! over the hidden variable representing the actual value of a discrete datum
//! which we assume is in the interval [n, n+1].
//!
//! We assume the data are described by X = exp(Y) - u where, Y is normally
//! distributed and u is a constant offset. The log marginal likelihood
//! function of the samples is the likelihood function for the data integrated
//! over the prior distribution for the mean and scale. It can be shown that:
//! log( L(x | m, p, a, b) ) =
//! log( 1 / (2 * pi) ^ (n/2)
//! * (p / (p + n)) ^ (1/2)
//! * b ^ a
//! * Gamma(a + n/2)
//! / Gamma(a)
//! / (b + 1/2 * (n * var(log(x))
//! + p * n * (mean(log(x)) - m)^2 / (p + n))) ^ (a + n/2)
//! / Product_i{ x(i) } ).
//!
//! Here,
//! x = {y(i) + u} and {y(i)} is the sample vector and u the constant offset.
//! n = |x| the number of elements in the sample vector.
//! mean(.) is the sample mean function.
//! var(.) is the sample variance function.
//! m and p are the prior Gaussian mean and precision, respectively.
//! a and b are the prior Gamma shape and rate, respectively.
class CLogMarginalLikelihood : core::CNonCopyable {
public:
CLogMarginalLikelihood(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double offset,
double mean,
double precision,
double shape,
double rate)
: m_Samples(samples), m_Weights(weights), m_Offset(offset), m_Mean(mean),
m_Precision(precision), m_Shape(shape), m_Rate(rate), m_NumberSamples(0.0),
m_Scales(), m_Constant(0.0), m_ErrorStatus(maths_t::E_FpNoErrors) {
this->precompute();
}
//! Evaluate the log marginal likelihood at the offset \p x.
bool operator()(double x, double& result) const {
if ((m_ErrorStatus & maths_t::E_FpFailed) != 0) {
return false;
}
double logSamplesSum = 0.0;
TMeanVarAccumulator logSampleMoments;
bool success = false;
try {
for (std::size_t i = 0; i < m_Samples.size(); ++i) {
if (CMathsFuncs::isNan(m_Samples[i]) || CMathsFuncs::isNan(m_Samples[i])) {
continue;
}
success = true;
double n = maths_t::countForUpdate(m_Weights[i]);
double sample = m_Samples[i] + m_Offset + x;
if (sample <= 0.0) {
// Technically, the marginal likelihood is zero here
// so the log would be infinite. We use minus max
// double because log(0) = HUGE_VALUE, which causes
// problems for Windows. Calling code is notified
// when the calculation overflows and should avoid
// taking the exponential since this will underflow
// and pollute the floating point environment. This
// may cause issues for some library function
// implementations (see fe*exceptflag for more details).
result = std::numeric_limits<double>::lowest();
this->addErrorStatus(maths_t::E_FpOverflowed);
return false;
}
double logSample = std::log(sample);
double w = m_Scales.empty() ? 1.0 : 1.0 / m_Scales[i].first;
double shift = m_Scales.empty() ? 0.0 : m_Scales[i].second;
logSamplesSum += n * logSample;
logSampleMoments.add(logSample - shift, n * w);
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to calculate likelihood: " << e.what());
this->addErrorStatus(maths_t::E_FpFailed);
return false;
}
if (!success) {
this->addErrorStatus(maths_t::E_FpFailed);
return false;
}
double weightedNumberSamples = CBasicStatistics::count(logSampleMoments);
double logSamplesMean = CBasicStatistics::mean(logSampleMoments);
double logSamplesSquareDeviation = (weightedNumberSamples - 1.0) *
CBasicStatistics::variance(logSampleMoments);
double impliedShape = m_Shape + 0.5 * m_NumberSamples;
double impliedRate = m_Rate + 0.5 * (logSamplesSquareDeviation +
m_Precision * weightedNumberSamples *
CTools::pow2(logSamplesMean - m_Mean) /
(m_Precision + weightedNumberSamples));
result = m_Constant - impliedShape * std::log(impliedRate) - logSamplesSum;
return success;
}
//! Retrieve the error status for the integration.
maths_t::EFloatingPointErrorStatus errorStatus() const {
return m_ErrorStatus;
}
private:
static const double LOG_2_PI;
private:
//! Compute all the constants in the integrand.
void precompute() {
double logVarianceScaleSum = 0.0;
if (maths_t::hasSeasonalVarianceScale(m_Weights) ||
maths_t::hasCountVarianceScale(m_Weights)) {
m_Scales.reserve(m_Weights.size());
double r = m_Rate / m_Shape;
double s = std::exp(-r);
for (std::size_t i = 0; i < m_Weights.size(); ++i) {
double varianceScale = maths_t::seasonalVarianceScale(m_Weights[i]) *
maths_t::countVarianceScale(m_Weights[i]);
// Get the scale and shift of the exponentiated Gaussian.
if (varianceScale == 1.0) {
m_Scales.emplace_back(1.0, 0.0);
} else {
double t = r + std::log(s + varianceScale * (1.0 - s));
m_Scales.emplace_back(t / r, 0.5 * (r - t));
logVarianceScaleSum += std::log(t / r);
}
}
}
m_NumberSamples = 0.0;
double weightedNumberSamples = 0.0;
for (std::size_t i = 0; i < m_Weights.size(); ++i) {
double n = maths_t::countForUpdate(m_Weights[i]);
m_NumberSamples += n;
weightedNumberSamples += n / (m_Scales.empty() ? 1.0 : m_Scales[i].first);
}
double impliedShape = m_Shape + 0.5 * m_NumberSamples;
double impliedPrecision = m_Precision + weightedNumberSamples;
m_Constant = 0.5 * (std::log(m_Precision) - std::log(impliedPrecision)) -
0.5 * m_NumberSamples * LOG_2_PI -
0.5 * logVarianceScaleSum + std::lgamma(impliedShape) -
std::lgamma(m_Shape) + m_Shape * std::log(m_Rate);
if (CMathsFuncs::isNan(m_Constant)) {
this->addErrorStatus(maths_t::E_FpFailed);
} else if (CMathsFuncs::isInf(m_Constant)) {
this->addErrorStatus(maths_t::E_FpOverflowed);
}
}
//! Update the error status.
void addErrorStatus(maths_t::EFloatingPointErrorStatus status) const {
m_ErrorStatus = static_cast<maths_t::EFloatingPointErrorStatus>(m_ErrorStatus | status);
}
private:
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
double m_Offset;
double m_Mean;
double m_Precision;
double m_Shape;
double m_Rate;
double m_NumberSamples;
TDoubleDoublePrVec m_Scales;
double m_Constant;
mutable maths_t::EFloatingPointErrorStatus m_ErrorStatus;
};
const double CLogMarginalLikelihood::LOG_2_PI =
std::log(boost::math::double_constants::two_pi);
//! \brief Wraps up the sample total square deviation of the logs of a
//! collection of samples, i.e.
//! <pre class="fragment">
//! \f$sum_i{(\log(x_i) - \frac{1}{n}sum_j{\log(x_j)})^2}\f$
//! </pre>
//!
//! so that it can be integrated over the hidden variable representing the
//! actual value of a discrete datum which we assume is in the interval
//! [n, n+1].
class CLogSampleSquareDeviation : core::CNonCopyable {
public:
CLogSampleSquareDeviation(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double mean)
: m_Samples(samples), m_Weights(weights), m_Mean(mean) {}
bool operator()(double x, double& result) const {
result = 0.0;
for (std::size_t i = 0; i < m_Samples.size(); ++i) {
double residual = m_Samples[i] + x;
if (residual <= 0.0 || !CMathsFuncs::isFinite(residual) ||
!CMathsFuncs::isFinite(m_Weights[i])) {
continue;
}
double n = maths_t::countForUpdate(m_Weights[i]);
residual = std::log(residual) - m_Mean;
result += n * CTools::pow2(residual);
}
return true;
}
private:
const TDouble1Vec& m_Samples;
const TDoubleWeightsAry1Vec& m_Weights;
double m_Mean;
};
} // detail::
const core::TPersistenceTag OFFSET_TAG("a", "offset");
const core::TPersistenceTag GAUSSIAN_MEAN_TAG("b", "gaussian_mean");
const core::TPersistenceTag GAUSSIAN_PRECISION_TAG("c", "gaussian_precision");
const core::TPersistenceTag GAMMA_SHAPE_TAG("d", "gamma_shape");
const core::TPersistenceTag GAMMA_RATE_TAG("e", "gamma_rate");
const core::TPersistenceTag NUMBER_SAMPLES_TAG("f", "number_samples");
const core::TPersistenceTag DECAY_RATE_TAG("i", "decay_rate");
const std::string MEAN_TAG("mean");
const std::string STANDARD_DEVIATION_TAG("standard_deviation");
const std::string EMPTY_STRING;
}
CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(maths_t::EDataType dataType,
double offset,
double gaussianMean,
double gaussianPrecision,
double gammaShape,
double gammaRate,
double decayRate,
double offsetMargin)
: CPrior(dataType, decayRate), m_Offset(offset), m_OffsetMargin(offsetMargin),
m_GaussianMean(gaussianMean), m_GaussianPrecision(gaussianPrecision),
m_GammaShape(gammaShape), m_GammaRate(gammaRate) {
}
CLogNormalMeanPrecConjugate::CLogNormalMeanPrecConjugate(const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser,
double offsetMargin)
: CPrior(params.s_DataType, params.s_DecayRate), m_Offset(0.0),
m_OffsetMargin(offsetMargin), m_GaussianMean(0.0),
m_GaussianPrecision(0.0), m_GammaShape(0.0), m_GammaRate(0.0) {
if (traverser.traverseSubLevel([this](auto& traverser_) {
return this->acceptRestoreTraverser(traverser_);
}) == false) {
traverser.setBadState();
}
}
bool CLogNormalMeanPrecConjugate::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
do {
const std::string& name = traverser.name();
RESTORE_SETUP_TEARDOWN(DECAY_RATE_TAG, double decayRate,
core::CStringUtils::stringToType(traverser.value(), decayRate),
this->decayRate(decayRate))
RESTORE(OFFSET_TAG, m_Offset.fromString(traverser.value()))
RESTORE(GAUSSIAN_MEAN_TAG, m_GaussianMean.fromString(traverser.value()))
RESTORE(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.fromString(traverser.value()))
RESTORE(GAMMA_SHAPE_TAG, m_GammaShape.fromString(traverser.value()))
RESTORE_BUILT_IN(GAMMA_RATE_TAG, m_GammaRate)
RESTORE_SETUP_TEARDOWN(NUMBER_SAMPLES_TAG, double numberSamples,
core::CStringUtils::stringToType(traverser.value(), numberSamples),
this->numberSamples(numberSamples))
} while (traverser.next());
return true;
}
CLogNormalMeanPrecConjugate
CLogNormalMeanPrecConjugate::nonInformativePrior(maths_t::EDataType dataType,
double offset,
double decayRate,
double offsetMargin) {
return CLogNormalMeanPrecConjugate(
dataType, offset + offsetMargin, NON_INFORMATIVE_MEAN, NON_INFORMATIVE_PRECISION,
NON_INFORMATIVE_SHAPE, NON_INFORMATIVE_RATE, decayRate, offsetMargin);
}
CLogNormalMeanPrecConjugate::EPrior CLogNormalMeanPrecConjugate::type() const {
return E_LogNormal;
}
CLogNormalMeanPrecConjugate* CLogNormalMeanPrecConjugate::clone() const {
return new CLogNormalMeanPrecConjugate(*this);
}
void CLogNormalMeanPrecConjugate::setToNonInformative(double offset, double decayRate) {
*this = nonInformativePrior(this->dataType(), offset + this->offsetMargin(),
decayRate, this->offsetMargin());
}
double CLogNormalMeanPrecConjugate::offsetMargin() const {
return m_OffsetMargin;
}
bool CLogNormalMeanPrecConjugate::needsOffset() const {
return true;
}
double CLogNormalMeanPrecConjugate::adjustOffset(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights) {
COffsetCost cost(*this);
CApplyOffset apply(*this);
return this->adjustOffsetWithCost(samples, weights, cost, apply);
}
double CLogNormalMeanPrecConjugate::offset() const {
return m_Offset;
}
void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights) {
if (samples.empty()) {
return;
}
if (samples.size() != weights.size()) {
LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '"
<< weights << "'");
return;
}
this->adjustOffset(samples, weights);
this->CPrior::addSamples(samples, weights);
// We assume the data are described by X = exp(Y) - u where, Y is normally
// distributed and u is a constant offset.
//
// If y = {y(i)} denotes the sample vector, then x = {y(i) + u} are
// log-normally distributed with mean m' and inverse scale s', and the
// likelihood function is:
// likelihood(x | m', s') ~
// Product_i{ 1 / x(i) * exp(-s' * (log(x(i)) - m')^2 / 2) }.
//
// The conjugate joint prior for m' and s' is gamma-normal and the update
// of the posterior with n independent samples comes from:
// likelihood(x | m', s') * prior(m', s' | m, p, a, b) (1)
//
// where,
// prior(m', s' | m, p, a, b) ~
// (s' * p)^(1/2) * exp(-s' * p * (m' - m)^2 / 2)
// * s'^(a - 1) * exp(-b * s')
//
// i.e. the conditional distribution of the mean is Gaussian with mean m
// and precision s' * p and the marginal distribution of s' is gamma with
// shape a and rate b. Equation (1) implies that the parameters of the prior
// distribution update as follows:
// m -> (p * m + n * mean(log(x))) / (p + n)
// p -> p + n
// a -> a + n/2
// b -> b + 1/2 * (n * var(log(x)) + p * n * (mean(log(x)) - m)^2 / (p + n))
//
// where,
// mean(.) is the sample mean function.
// var(.) is the sample variance function.
//
// Note that the weight of the sample x(i) is interpreted as its count,
// i.e. n(i), so for example updating with {(x, 2)} is equivalent to
// updating with {x, x}.
//
// If the data are discrete then we approximate the discrete distribution
// by saying it is uniform on the intervals [n,n+1] for each integral n.
// This is like saying that the data are samples from:
// X' = X + Z
//
// where,
// Z is uniform in the interval [0,1].
//
// We care about the limiting behaviour of the filter, i.e. as the number
// of samples n->inf. In this case, the uniform law of large numbers gives
// that:
// Sum_i( f(x(i) + z(i)) ) -> E[ Sum_i( f(x(i) + Z) ) ]
//
// We use this to evaluate:
// mean(log(x(i) + z(i)))
// -> 1/n * Sum_i( (x(i) + 1) * log(x(i) + 1) - x(i) * log(x(i)) - 1 )
//
// To evaluate var(log(x(i) + z(i))) we use numerical integration for
// simplicity because naive calculation of the exact integral suffers from
// cancellation errors.
double numberSamples = 0.0;
double scaledNumberSamples = 0.0;
double logSamplesMean = 0.0;
double logSamplesSquareDeviation = 0.0;
double r = m_GammaRate / m_GammaShape;
double s = std::exp(-r);
if (this->isInteger()) {
// Filled in with samples rescaled to have approximately unit
// variance scale.
TDouble1Vec scaledSamples;
scaledSamples.resize(samples.size(), 1.0);
TMeanAccumulator logSamplesMean_;
for (std::size_t i = 0; i < samples.size(); ++i) {
double x = samples[i] + m_Offset;
if (x <= 0.0 || !CMathsFuncs::isFinite(x) ||
!CMathsFuncs::isFinite(weights[i])) {
LOG_ERROR(<< "Discarding sample = " << x << ", weights = " << weights[i]);
continue;
}
double n = maths_t::countForUpdate(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
numberSamples += n;
double t = varianceScale == 1.0
? r
: r + std::log(s + varianceScale * (1.0 - s));
double shift = (r - t) / 2.0;
double scale = r == t ? 1.0 : t / r;
scaledSamples[i] = scale;
double logxInvPlus1 = std::log(1.0 / x + 1.0);
double logxPlus1 = std::log(x + 1.0);
logSamplesMean_.add(x * logxInvPlus1 + logxPlus1 - 1.0 - shift, n / scale);
}
scaledNumberSamples = CBasicStatistics::count(logSamplesMean_);
logSamplesMean = CBasicStatistics::mean(logSamplesMean_);
double mean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) /
(m_GaussianPrecision + scaledNumberSamples);
for (std::size_t i = 0; i < scaledSamples.size(); ++i) {
double scale = scaledSamples[i];
scaledSamples[i] =
scale == 1.0 ? samples[i] + m_Offset
: std::exp(mean + (std::log(samples[i] + m_Offset) - mean) /
std::sqrt(scale));
}
detail::CLogSampleSquareDeviation deviationFunction(scaledSamples, weights,
logSamplesMean);
CIntegration::gaussLegendre<CIntegration::OrderFive>(
deviationFunction, 0.0, 1.0, logSamplesSquareDeviation);
} else {
TMeanVarAccumulator logSamplesMoments;
for (std::size_t i = 0; i < samples.size(); ++i) {
double x = samples[i] + m_Offset;
if (x <= 0.0 || !CMathsFuncs::isFinite(x) ||
!CMathsFuncs::isFinite(weights[i])) {
LOG_ERROR(<< "Discarding sample = " << x << ", weights = " << weights[i]);
continue;
}
double n = maths_t::countForUpdate(weights[i]);
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
maths_t::countVarianceScale(weights[i]);
numberSamples += n;
double t = varianceScale == 1.0
? r
: r + std::log(s + varianceScale * (1.0 - s));
double scale = r == t ? 1.0 : t / r;
double shift = (r - t) / 2.0;
logSamplesMoments.add(std::log(x) - shift, n / scale);
}
scaledNumberSamples = CBasicStatistics::count(logSamplesMoments);
logSamplesMean = CBasicStatistics::mean(logSamplesMoments);
logSamplesSquareDeviation = (scaledNumberSamples - 1.0) *
CBasicStatistics::variance(logSamplesMoments);
}
m_GammaShape += 0.5 * numberSamples;
m_GammaRate += 0.5 * (logSamplesSquareDeviation +
m_GaussianPrecision * scaledNumberSamples *
CTools::pow2(logSamplesMean - m_GaussianMean) /
(m_GaussianPrecision + scaledNumberSamples));
m_GaussianMean = (m_GaussianPrecision * m_GaussianMean + scaledNumberSamples * logSamplesMean) /
(m_GaussianPrecision + scaledNumberSamples);
m_GaussianPrecision += scaledNumberSamples;
// If the coefficient of variation of the data is too small we run
// in to numerical problems. We truncate the variation by modeling
// the impact of an actual variation (standard deviation divided by
// mean) in the data of size MINIMUM_COEFFICIENT_OF_VARATION on the
// prior parameters.
if (m_GaussianPrecision > 1.5) {
// The idea is to model the impact of a coefficient of variation
// equal to MINIMUM_COEFFICIENT_OF_VARIATION on the parameters
// of the prior this will affect. In particular, this enters in
// to the gamma rate and shape by its impact on the mean of the
// log of the samples. Note that:
// mean(log(x'(i)) = 1/n * Sum_i{ log(m + (x'(i) - m)) }
// = 1/n * Sum_i{ log(m) + log(1 + d(i) / m) }
// ~ 1/n * Sum_i{ log(m) + d(i) / m - (d(i)/m)^2 / 2 }
//
// where x(i) are our true samples, which are all very nearly m
// (since their variation is tiny by assumption) and x'(i) are
// our samples assuming minimum coefficient of variation. Finally,
// we note that:
// E[d(i)] = 0
// E[(d(i)/m)^2] = "coefficient of variation"^2
//
// From which we derive the results below.
double minimumRate = (2.0 * m_GammaShape - 1.0) *
CTools::pow2(MINIMUM_COEFFICIENT_OF_VARIATION);
if (m_GammaRate < minimumRate) {
double extraVariation = (minimumRate - m_GammaRate) /
(m_GaussianPrecision - 1.0);
m_GammaRate = minimumRate;
m_GaussianMean -= 0.5 * extraVariation;
}
}
LOG_TRACE(<< "logSamplesMean = " << logSamplesMean << ", logSamplesSquareDeviation = "
<< logSamplesSquareDeviation << ", numberSamples = " << numberSamples
<< ", scaledNumberSamples = " << scaledNumberSamples);
LOG_TRACE(<< "m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate
<< ", m_GaussianMean = " << m_GaussianMean << ", m_GaussianPrecision = "
<< m_GaussianPrecision << ", m_Offset = " << m_Offset);
if (this->isBad()) {
LOG_ERROR(<< "Update failed (" << this->debug() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
this->setToNonInformative(this->offsetMargin(), this->decayRate());
}
}
void CLogNormalMeanPrecConjugate::propagateForwardsByTime(double time) {
if (!CMathsFuncs::isFinite(time) || time < 0.0) {
LOG_ERROR(<< "Bad propagation time " << time);
return;
}
if (this->isNonInformative()) {
// Nothing to be done.
return;
}
double alpha = std::exp(-this->decayRate() * time);
double beta = 1.0 - alpha;
m_GaussianPrecision = alpha * m_GaussianPrecision + beta * NON_INFORMATIVE_PRECISION;
// We want to increase the variance of the gamma distribution while
// holding its mean constant s.t. in the limit t -> inf var -> inf.
// The mean and variance are a / b and a / b^2, respectively, for
// shape a and rate b so choose a factor f in the range [0, 1] and
// update as follows:
// a -> f * a
// b -> f * b
//
// Thus the mean is unchanged and variance is increased by 1 / f.
double factor = std::min(
(alpha * m_GammaShape + beta * NON_INFORMATIVE_SHAPE) / m_GammaShape, 1.0);
m_GammaShape *= factor;
m_GammaRate *= factor;
this->numberSamples(this->numberSamples() * alpha);
LOG_TRACE(<< "time = " << time << ", alpha = " << alpha
<< ", m_GaussianPrecision = " << m_GaussianPrecision
<< ", m_GammaShape = " << m_GammaShape << ", m_GammaRate = " << m_GammaRate
<< ", numberSamples = " << this->numberSamples());
}
CLogNormalMeanPrecConjugate::TDoubleDoublePr
CLogNormalMeanPrecConjugate::marginalLikelihoodSupport() const {
return {-m_Offset, std::numeric_limits<double>::max()};
}
double CLogNormalMeanPrecConjugate::marginalLikelihoodMean() const {
return this->isInteger() ? this->mean() - 0.5 : this->mean();
}
double CLogNormalMeanPrecConjugate::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const {
if (this->isNonInformative()) {
return std::exp(m_GaussianMean) - m_Offset;
}
// We use the fact that for large precision the marginal likelihood
// is log-normally distributed and for small precision it is log-t.
// See evaluateFunctionOnJointDistribution for more discussion.
double varianceScale = maths_t::seasonalVarianceScale(weights) *
maths_t::countVarianceScale(weights);
try {
double r = m_GammaRate / m_GammaShape;
double s = std::exp(-r);
double location;
double scale;
detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision,
m_GammaRate, m_GammaShape, location, scale);
LOG_TRACE(<< "location = " << location << ", scale = " << scale);
if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) {
boost::math::lognormal logNormal(location, scale);
return boost::math::mode(logNormal) - m_Offset;
}
CLogTDistribution logt(2.0 * m_GammaShape, location, scale);
double result = mode(logt) - m_Offset - (this->isInteger() ? 0.5 : 0.0);
return result;
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to compute marginal likelihood mode: " << e.what()
<< ", gaussian mean = " << m_GaussianMean << ", gaussian precision = "
<< m_GaussianPrecision << ", gamma rate = " << m_GammaRate
<< ", gamma shape = " << m_GammaShape);
}
// Fall back to using the exponentiated Gaussian's mean and precision.
double normalMean = this->normalMean();
double normalPrecision = this->normalPrecision() / varianceScale;
return (normalPrecision == 0.0 ? 0.0 : std::exp(normalMean - 1.0 / normalPrecision)) - m_Offset;
}
double CLogNormalMeanPrecConjugate::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const {
if (this->isNonInformative()) {
return std::numeric_limits<double>::max();
}
// This is just
// E_{M, P}[Var(X | M, P) + (E[X | M,P] - E[X])^2] (1)
//
// where M, P likelihood mean and precision, respectively. Note that X
// conditioned on M and P is log normal and
// Var(X | M, P) = exp(2*M + 1/P) * (exp(1/P) - 1) (2)
//
// Pulling these together we have
// E_{M, P}[ exp(2*M + 1/P) * (exp(1/P) - 1) + (exp(M + 1/P) - E[X])^2 ]
//
// Note that although the integrand factorizes and we can perform the
// integration over M in closed form, this version suffers from numerical
// stability issues which can lead the variance to be negative. When the
// prior is narrow we use the approximation:
// E[exp(1/P * (2/t + 1)) * ((exp(1/P) - 1) - E[X]^2)]
// ~= exp(1/E[P] * (2/p + 1)) * (exp(1/E[P]) - 1)
// ~= exp(b/a * (2/p + 1)) * (exp(b/a) - 1)
//
// Note that b / a > 0 so this is necessarily non-negative.
double varianceScale = maths_t::seasonalVarianceScale(weights) *
maths_t::countVarianceScale(weights);
double vh = std::exp(2.0 * m_GaussianMean + m_GammaRate / m_GammaShape *
(2.0 / m_GaussianPrecision + 1.0)) *
(std::exp(m_GammaRate / m_GammaShape) - 1.0);
if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE) {
try {
detail::CVarianceKernel f(this->marginalLikelihoodMean(), m_GaussianMean,
m_GaussianPrecision, m_GammaShape, m_GammaRate);
boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
TDoubleVec a(2);
TDoubleVec b(2);
a[0] = boost::math::quantile(gamma, 0.03);
b[0] = boost::math::quantile(gamma, 0.97);
boost::math::normal normal(m_GaussianMean, 1.0 / a[0] / m_GaussianPrecision);
a[1] = boost::math::quantile(normal, 0.03);
b[1] = boost::math::quantile(normal, 0.97);
detail::CVarianceKernel::TValue variance;
if (CIntegration::sparseGaussLegendre<CIntegration::OrderThree, CIntegration::TwoDimensions>(
f, a, b, variance)) {
double vl = variance(0) / variance(1);
double alpha = std::min(
2.0 * (1.0 - m_GammaShape / MINIMUM_LOGNORMAL_SHAPE), 1.0);
return varianceScale * alpha * vl + (1.0 - alpha) * vh;
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to calculate variance: " << e.what());
}
}
return varianceScale * vh;
}
CLogNormalMeanPrecConjugate::TDoubleDoublePr
CLogNormalMeanPrecConjugate::marginalLikelihoodConfidenceInterval(double percentage,
const TDoubleWeightsAry& weights) const {
if (this->isNonInformative()) {
return this->marginalLikelihoodSupport();
}
percentage /= 100.0;
percentage = CTools::truncate(percentage, 0.0, 1.0);
// We use the fact that the marginal likelihood is a log-t distribution.
try {
double varianceScale = maths_t::seasonalVarianceScale(weights) *
maths_t::countVarianceScale(weights);
double r = m_GammaRate / m_GammaShape;
double s = std::exp(-r);
double location;
double scale;
detail::locationAndScale(varianceScale, r, s, m_GaussianMean, m_GaussianPrecision,
m_GammaRate, m_GammaShape, location, scale);
LOG_TRACE(<< "location = " << location << ", scale = " << scale);
if (m_GammaShape > MINIMUM_LOGNORMAL_SHAPE) {
boost::math::lognormal logNormal(location, scale);
double x1 = boost::math::quantile(logNormal, (1.0 - percentage) / 2.0) -
m_Offset - (this->isInteger() ? 0.5 : 0.0);
double x2 = percentage > 0.0
? boost::math::quantile(logNormal, (1.0 + percentage) / 2.0) -
m_Offset - (this->isInteger() ? 0.5 : 0.0)
: x1;
LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2);
return {x1, x2};
}
CLogTDistribution logt(2.0 * m_GammaShape, location, scale);
double x1 = quantile(logt, (1.0 - percentage) / 2.0) - m_Offset -
(this->isInteger() ? 0.5 : 0.0);
double x2 = percentage > 0.0 ? quantile(logt, (1.0 + percentage) / 2.0) -
m_Offset - (this->isInteger() ? 0.5 : 0.0)
: x1;
LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2);
return {x1, x2};
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to compute confidence interval: " << e.what());
}
return this->marginalLikelihoodSupport();
}
maths_t::EFloatingPointErrorStatus
CLogNormalMeanPrecConjugate::jointLogMarginalLikelihood(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& result) const {
result = 0.0;
if (samples.empty()) {
LOG_ERROR(<< "Can't compute likelihood for empty sample set");
return maths_t::E_FpFailed;
}
if (samples.size() != weights.size()) {
LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '"
<< weights << "'");
return maths_t::E_FpFailed;
}
if (this->isNonInformative()) {
// The non-informative likelihood is improper and effectively
// zero everywhere. We use minus max double because
// log(0) = HUGE_VALUE, which causes problems for Windows.
// Calling code is notified when the calculation overflows
// and should avoid taking the exponential since this will
// underflow and pollute the floating point environment. This
// may cause issues for some library function implementations
// (see fe*exceptflag for more details).
result = std::numeric_limits<double>::lowest();
return maths_t::E_FpOverflowed;
}
detail::CLogMarginalLikelihood logMarginalLikelihood(
samples, weights, m_Offset, m_GaussianMean, m_GaussianPrecision,
m_GammaShape, m_GammaRate);
if (this->isInteger()) {
CIntegration::logGaussLegendre<CIntegration::OrderThree>(
logMarginalLikelihood, 0.0, 1.0, result);
} else {
logMarginalLikelihood(0.0, result);
}
auto status = static_cast<maths_t::EFloatingPointErrorStatus>(
logMarginalLikelihood.errorStatus() | CMathsFuncs::fpStatus(result));
if ((status & maths_t::E_FpFailed) != 0) {
LOG_ERROR(<< "Failed to compute log likelihood (" << this->debug() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
} else if ((status & maths_t::E_FpOverflowed) != 0) {
LOG_TRACE(<< "Log likelihood overflowed for (" << this->debug() << ")");
LOG_TRACE(<< "samples = " << samples);
LOG_TRACE(<< "weights = " << weights);
}
return status;
}
void CLogNormalMeanPrecConjugate::sampleMarginalLikelihood(std::size_t numberSamples,
TDouble1Vec& samples) const {
samples.clear();
if (numberSamples == 0 || this->numberSamples() == 0.0) {
return;
}
if (this->isNonInformative()) {
// We can't sample the marginal likelihood directly. This should
// only happen if we've had one sample so just return that sample.
samples.push_back(std::exp(m_GaussianMean) - m_Offset);
return;
}
// The sampling strategy is to split the marginal likelihood up into
// equal quantiles and then compute the expectation on each quantile
// and sample that point, i.e. effectively sample the points:
// { n * E[ X * I{[q_n(i), q_n((i+1))]} ] }
//
// where,
// X is a R.V. whose distribution is the marginal likelihood.
// I{.} is the indicator function.
// q_n(.) is the n'th quantile function.
// i ranges over 0 to n-1.
// n is the number of samples.
//
// This sampling strategy has various nice properties:
// 1) The sample quantiles match as many quantiles of the distribution
// as possible.
// 2) The sample mean always matches the distribution mean:
// Sum_i( E[X * I{[q_n(i), q_n((i+1))]}] ) = E[X * 1] = E[X]
// by linearity of the expectation operator and since the indicators
// range over the entire support for X.
// 3) As the number of samples increase each sample moment tends
// asymptotically to each corresponding distribution moment.
//
// X is log t-distributed. However, the expectation diverges for the
// first and last quantile: it is equivalent to the moment generating
// function of a t-distribution. So, instead we sample the approximate
// log-normal distribution and use the relationship:
// E[ X * I{[x1,x2]} ] =
// [ 1/2
// * exp(m + (p+1)/p/2*b/a) x2
// * erf((p/(p+1)*a/b * (x - m) - 1) / (2 * p/(p+1)*a/b) ^ (1/2) ]
// x1
// m and p are the prior Gaussian mean and precision, respectively.
// a and b are the prior Gamma shape and rate, respectively.
// erf(.) is the error function.
samples.reserve(numberSamples);
double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision *
m_GammaRate / m_GammaShape);
try {
boost::math::lognormal lognormal(m_GaussianMean, scale);
double mean = boost::math::mean(lognormal);
LOG_TRACE(<< "mean = " << mean << ", scale = " << scale
<< ", numberSamples = " << numberSamples);
TDoubleDoublePr support = this->marginalLikelihoodSupport();
double lastPartialExpectation = 0.0;
for (std::size_t i = 1; i < numberSamples; ++i) {
double q = static_cast<double>(i) / static_cast<double>(numberSamples);
double xq = std::log(boost::math::quantile(lognormal, q));
double z = (xq - m_GaussianMean - scale * scale) / scale /
boost::math::double_constants::root_two;
double partialExpectation = mean * (1.0 + std::erf(z)) / 2.0;
double sample = static_cast<double>(numberSamples) *
(partialExpectation - lastPartialExpectation) -
m_Offset;
LOG_TRACE(<< "sample = " << sample);
// Sanity check the sample: should be in the distribution support.
if (sample >= support.first && sample <= support.second) {
samples.push_back(sample);
} else {
LOG_ERROR(<< "Sample out of bounds: sample = " << sample - m_Offset
<< ", gaussianMean = " << m_GaussianMean << ", scale = " << scale
<< ", q = " << q << ", x(q) = " << xq << ", mean = " << mean);
}
lastPartialExpectation = partialExpectation;
}
double sample =
static_cast<double>(numberSamples) * (mean - lastPartialExpectation) - m_Offset;
LOG_TRACE(<< "sample = " << sample);
if (sample >= support.first && sample <= support.second) {
samples.push_back(sample);
} else {
LOG_ERROR(<< "Sample out of bounds: sample = " << sample
<< ", gaussianMean = " << m_GaussianMean
<< ", scale = " << scale << ", mean = " << mean);
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to sample: " << e.what() << ", gaussianMean "
<< m_GaussianMean << ", scale = " << scale);
}
}
bool CLogNormalMeanPrecConjugate::minusLogJointCdf(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
using TMinusLogCdf = detail::CEvaluateOnSamples<CTools::SMinusLogCdf>;
lowerBound = upperBound = 0.0;
TMinusLogCdf minusLogCdf(samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean,
m_GaussianPrecision, m_GammaShape, m_GammaRate);
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
double value;
if (!CIntegration::logGaussLegendre<CIntegration::OrderThree>(
minusLogCdf, 0.0, 1.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
double value;
if (!minusLogCdf(0.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
bool CLogNormalMeanPrecConjugate::minusLogJointCdfComplement(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
using TMinusLogCdfComplement = detail::CEvaluateOnSamples<CTools::SMinusLogCdfComplement>;
lowerBound = upperBound = 0.0;
TMinusLogCdfComplement minusLogCdfComplement(
samples, weights, this->isNonInformative(), m_Offset, m_GaussianMean,
m_GaussianPrecision, m_GammaShape, m_GammaRate);
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
double value;
if (!CIntegration::logGaussLegendre<CIntegration::OrderThree>(
minusLogCdfComplement, 0.0, 1.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. complement (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
double value;
if (!minusLogCdfComplement(0.0, value)) {
LOG_ERROR(<< "Failed computing c.d.f. complement (samples = " << samples
<< ", weights = " << weights << ")");
return false;
}
lowerBound = upperBound = value;
return true;
}
bool CLogNormalMeanPrecConjugate::probabilityOfLessLikelySamples(
maths_t::EProbabilityCalculation calculation,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound,
maths_t::ETail& tail) const {
lowerBound = 0.0;
upperBound = 1.0;
tail = maths_t::E_UndeterminedTail;
detail::CProbabilityOfLessLikelySamples probability(
calculation, samples, weights, this->isNonInformative(), m_Offset,
m_GaussianMean, m_GaussianPrecision, m_GammaShape, m_GammaRate);
if (this->isInteger()) {
// If the data are discrete we compute the approximate expectation
// w.r.t. to the hidden offset of the samples Z, which is uniform
// on the interval [0,1].
double value;
if (!CIntegration::gaussLegendre<CIntegration::OrderThree>(probability, 0.0,
1.0, value)) {
return false;
}
lowerBound = upperBound = value;
tail = probability.tail();
return true;
}
double value;
if (!probability(0.0, value)) {
return false;
}
lowerBound = upperBound = value;
tail = probability.tail();
return true;
}
bool CLogNormalMeanPrecConjugate::isNonInformative() const {
return m_GammaRate == NON_INFORMATIVE_RATE || m_GaussianPrecision == NON_INFORMATIVE_PRECISION;
}
void CLogNormalMeanPrecConjugate::print(const std::string& indent, std::string& result) const {
result += core_t::LINE_ENDING + indent + "log-normal ";
if (this->isNonInformative()) {
result += "non-informative";
return;
}
std::string mean;
std::string sd;
std::tie(mean, sd) = this->printMarginalLikelihoodStatistics();
result += "mean = " + mean + " sd = " + sd;
}
CPrior::TStrStrPr CLogNormalMeanPrecConjugate::doPrintMarginalLikelihoodStatistics() const {
double scale = std::sqrt((m_GaussianPrecision + 1.0) / m_GaussianPrecision *
m_GammaRate / m_GammaShape);
try {
boost::math::lognormal lognormal(m_GaussianMean, scale);
double mean = boost::math::mean(lognormal);
double deviation = boost::math::standard_deviation(lognormal);
const std::string meanStr{core::CStringUtils::typeToStringPretty(mean - m_Offset)};
const std::string sdStr{core::CStringUtils::typeToStringPretty(deviation)};
return TStrStrPr{meanStr, sdStr};
} catch (std::exception&) {}
return {CPrior::UNKNOWN_VALUE_STRING, CPrior::UNKNOWN_VALUE_STRING};
}
std::string CLogNormalMeanPrecConjugate::printJointDensityFunction() const {
if (this->isNonInformative()) {
// The non-informative prior is improper and effectively 0 everywhere.
return std::string();
}
// We'll plot the prior over a range where most of the mass is.
static const double RANGE = 0.99;
static const unsigned int POINTS = 51;
boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
double precision = m_GaussianPrecision * this->normalPrecision();
boost::math::normal gaussian(m_GaussianMean, 1.0 / std::sqrt(precision));
double xStart = boost::math::quantile(gamma, (1.0 - RANGE) / 2.0);
double xEnd = boost::math::quantile(gamma, (1.0 + RANGE) / 2.0);
double xIncrement = (xEnd - xStart) / (POINTS - 1.0);
double x = xStart;
double yStart = boost::math::quantile(gaussian, (1.0 - RANGE) / 2.0);
double yEnd = boost::math::quantile(gaussian, (1.0 + RANGE) / 2.0);
double yIncrement = (yEnd - yStart) / (POINTS - 1.0);
double y = yStart;
std::ostringstream xCoordinates;
std::ostringstream yCoordinates;
xCoordinates << "x = [";
yCoordinates << "y = [";
for (unsigned int i = 0; i < POINTS; ++i, x += xIncrement, y += yIncrement) {
xCoordinates << x << " ";
yCoordinates << y << " ";
}
xCoordinates << "];" << core_t::LINE_ENDING;
yCoordinates << "];" << core_t::LINE_ENDING;
std::ostringstream pdf;
pdf << "pdf = [";
x = xStart;
for (unsigned int i = 0; i < POINTS; ++i, x += xIncrement) {
y = yStart;
for (unsigned int j = 0; j < POINTS; ++j, y += yIncrement) {
double conditionalPrecision = m_GaussianPrecision * x;
boost::math::normal conditionalGaussian(
m_GaussianMean, 1.0 / std::sqrt(conditionalPrecision));
pdf << (CTools::safePdf(gamma, x) * CTools::safePdf(conditionalGaussian, y))
<< " ";
}
pdf << core_t::LINE_ENDING;
}
pdf << "];" << core_t::LINE_ENDING << "mesh(x, y, pdf);";
return xCoordinates.str() + yCoordinates.str() + pdf.str();
}
std::uint64_t CLogNormalMeanPrecConjugate::checksum(std::uint64_t seed) const {
seed = this->CPrior::checksum(seed);
seed = CChecksum::calculate(seed, m_Offset);
seed = CChecksum::calculate(seed, m_GaussianMean);
seed = CChecksum::calculate(seed, m_GaussianPrecision);
seed = CChecksum::calculate(seed, m_GammaShape);
return CChecksum::calculate(seed, m_GammaRate);
}
void CLogNormalMeanPrecConjugate::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
mem->setName("CLogNormalMeanPrecConjugate");
}
std::size_t CLogNormalMeanPrecConjugate::memoryUsage() const {
return 0;
}
std::size_t CLogNormalMeanPrecConjugate::staticSize() const {
return sizeof(*this);
}
void CLogNormalMeanPrecConjugate::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
inserter.insertValue(DECAY_RATE_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision);
inserter.insertValue(OFFSET_TAG, m_Offset.toString());
inserter.insertValue(GAUSSIAN_MEAN_TAG, m_GaussianMean.toString());
inserter.insertValue(GAUSSIAN_PRECISION_TAG, m_GaussianPrecision.toString());
inserter.insertValue(GAMMA_SHAPE_TAG, m_GammaShape.toString());
inserter.insertValue(GAMMA_RATE_TAG, m_GammaRate, core::CIEEE754::E_DoublePrecision);
inserter.insertValue(NUMBER_SAMPLES_TAG, this->numberSamples(),
core::CIEEE754::E_SinglePrecision);
if (inserter.readableTags() == true) {
std::string mean;
std::string sd;
std::tie(mean, sd) = this->printMarginalLikelihoodStatistics();
inserter.insertValue(MEAN_TAG, mean);
inserter.insertValue(STANDARD_DEVIATION_TAG, sd);
}
}
double CLogNormalMeanPrecConjugate::normalMean() const {
return m_GaussianMean;
}
double CLogNormalMeanPrecConjugate::normalPrecision() const {
if (this->isNonInformative()) {
return 0.0;
}
try {
boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
return boost::math::mean(gamma);
} catch (std::exception& e) {
LOG_ERROR(<< "Failed to create prior: " << e.what()
<< " shape = " << m_GammaShape << ", rate = " << m_GammaRate);
}
return 0.0;
}
CLogNormalMeanPrecConjugate::TDoubleDoublePr
CLogNormalMeanPrecConjugate::confidenceIntervalNormalMean(double percentage) const {
if (this->isNonInformative()) {
return {std::numeric_limits<double>::lowest(), std::numeric_limits<double>::max()};
}
// Compute the symmetric confidence interval around the median of the
// distribution from the percentiles, i.e. find the percentiles q(1)
// and q(2) which satisfy:
// q(1) + q(2) = 1.0
// q(2) - q(1) = percentage
//
// We assume the data are described by X = exp(Y) - u where, Y is normally
// distributed and u is a constant offset.
//
// The marginal prior distribution for the mean, M, of Y is a t distribution
// with 2 * a degrees of freedom, location m and precision a * p / b.
// We can compute the percentiles for this distribution from the student's
// t distribution by noting that:
// (p * a / b) ^ (1/2) * (M - m) ~ student's t
//
// So the percentiles of the student's t map to the percentiles of M as follows:
// x_m_q = m + x_q_students / (a * p / b) ^ (1/2).
percentage /= 100.0;
double lowerPercentile = 0.5 * (1.0 - percentage);
double upperPercentile = 0.5 * (1.0 + percentage);
boost::math::students_t students(2.0 * m_GammaShape);
double xLower = boost::math::quantile(students, lowerPercentile);
double xUpper = boost::math::quantile(students, upperPercentile);
boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
double precision = m_GaussianPrecision * this->normalPrecision();
xLower = m_GaussianMean + xLower / std::sqrt(precision);
xUpper = m_GaussianMean + xUpper / std::sqrt(precision);
return {xLower, xUpper};
}
CLogNormalMeanPrecConjugate::TDoubleDoublePr
CLogNormalMeanPrecConjugate::confidenceIntervalNormalPrecision(double percentage) const {
if (this->isNonInformative()) {
return {std::numeric_limits<double>::lowest(), std::numeric_limits<double>::max()};
}
percentage /= 100.0;
double lowerPercentile = 0.5 * (1.0 - percentage);
double upperPercentile = 0.5 * (1.0 + percentage);
// The marginal prior distribution for the precision is gamma.
boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
return {boost::math::quantile(gamma, lowerPercentile),
boost::math::quantile(gamma, upperPercentile)};
}
bool CLogNormalMeanPrecConjugate::equalTolerance(const CLogNormalMeanPrecConjugate& rhs,
const TEqualWithTolerance& equal) const {
LOG_DEBUG(<< m_GaussianMean << " " << rhs.m_GaussianMean << ", " << m_GaussianPrecision
<< " " << rhs.m_GaussianPrecision << ", " << m_GammaShape << " "
<< rhs.m_GammaShape << ", " << m_GammaRate << " " << rhs.m_GammaRate);
return equal(m_GaussianMean, rhs.m_GaussianMean) &&
equal(m_GaussianPrecision, rhs.m_GaussianPrecision) &&
equal(m_GammaShape, rhs.m_GammaShape) && equal(m_GammaRate, rhs.m_GammaRate);
}
double CLogNormalMeanPrecConjugate::mean() const {
if (this->isNonInformative()) {
return std::exp(m_GaussianMean) - m_Offset;
}
// This is just
// E_{M, P}[E[X | M, P]] - u (1)
//
// where M, P likelihood mean and precision, respectively, and u is
// the offset. Note that X conditioned on M and P is log normal and
// E[X | M, P] = exp(M + 1/P/2) (2)
//
// Since the prior and the rhs of (2) factorizes so does the expectation
// and we can evaluate the expectation over m' in (1) to give
// E_{M, P}[E[X | M, P]] = exp(m) * E_{P}[exp(1/2/P * (1/t + 1))]
//
// where m and t are the conditional prior mean and precision scale,
// respectively. This last integral has no closed form solution, we
// evaluate by numerical integration when the prior is wide and use:
// E[exp(1/2/P * (1/t + 1))] ~= exp(1/2/E[P] * (1/t + 1))
// ~= exp(b/a/2 * (1/t + 1))
//
// when it is narrow.
if (m_GammaShape < MINIMUM_LOGNORMAL_SHAPE) {
try {
detail::CMeanKernel f(m_GaussianMean, m_GaussianPrecision,
m_GammaShape, m_GammaRate);
boost::math::gamma_distribution<> gamma(m_GammaShape, 1.0 / m_GammaRate);
double a = boost::math::quantile(gamma, 0.1);
double b = boost::math::quantile(gamma, 0.9);
detail::CMeanKernel::TValue result;
if (CIntegration::gaussLegendre<CIntegration::OrderSeven>(f, a, b, result)) {
return result(0) / result(1) - m_Offset;
}
} catch (const std::exception& e) {
LOG_ERROR(<< "Failed to calculate mean: " << e.what());
}
}
return std::exp(m_GaussianMean + 0.5 * m_GammaRate / m_GammaShape *
(1.0 / m_GaussianPrecision + 1.0)) -
m_Offset;
}
bool CLogNormalMeanPrecConjugate::isBad() const {
return !CMathsFuncs::isFinite(m_Offset) || !CMathsFuncs::isFinite(m_GaussianMean) ||
!CMathsFuncs::isFinite(m_GaussianPrecision) ||
!CMathsFuncs::isFinite(m_GammaShape) || !CMathsFuncs::isFinite(m_GammaRate);
}
std::string CLogNormalMeanPrecConjugate::debug() const {
std::ostringstream result;
result << std::scientific << std::setprecision(15) << m_Offset << " " << m_GaussianMean
<< " " << m_GaussianMean << " " << m_GammaShape << " " << m_GammaRate;
return result.str();
}
const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_MEAN = 0.0;
const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_PRECISION = 0.0;
const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_SHAPE = 1.0;
const double CLogNormalMeanPrecConjugate::NON_INFORMATIVE_RATE = 0.0;
}
}
}