lib/maths/common/COneOfNPrior.cc (848 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <maths/common/COneOfNPrior.h>
#include <core/CLogger.h>
#include <core/CMemoryDef.h>
#include <core/CStatePersistInserter.h>
#include <core/CStateRestoreTraverser.h>
#include <core/CStringUtils.h>
#include <core/RestoreMacros.h>
#include <maths/common/CBasicStatistics.h>
#include <maths/common/CBasicStatisticsPersist.h>
#include <maths/common/CChecksum.h>
#include <maths/common/CMathsFuncs.h>
#include <maths/common/CMathsFuncsForMatrixAndVectorTypes.h>
#include <maths/common/CPriorStateSerialiser.h>
#include <maths/common/CRestoreParams.h>
#include <maths/common/CSampling.h>
#include <maths/common/CTools.h>
#include <algorithm>
#include <cmath>
#include <iterator>
#include <limits>
#include <utility>
namespace ml {
namespace maths {
namespace common {
namespace {
using TBool5Vec = core::CSmallVector<bool, 5>;
using TDouble5Vec = core::CSmallVector<double, 5>;
using TMeanAccumulator = CBasicStatistics::SSampleMean<double>::TAccumulator;
using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
//! Compute the log of \p n.
double logn(std::size_t n) {
static const double LOG_N[] = {0.0, std::log(2.0), std::log(3.0),
std::log(4.0), std::log(5.0)};
return n < std::size(LOG_N) ? LOG_N[n - 1] : std::log(static_cast<double>(n));
}
const double DERATE = 0.99999;
const double MINUS_INF = DERATE * std::numeric_limits<double>::lowest();
const double INF = DERATE * std::numeric_limits<double>::max();
const double MAXIMUM_LOG_BAYES_FACTOR = std::log(1e8);
const double MAXIMUM_UNPENALISED_RELATIVE_VARIANCE_ERROR = 9.0;
const double MINIMUM_SIGNIFICANT_WEIGHT = 0.01;
const double MAXIMUM_RELATIVE_ERROR = 1e-3;
const double LOG_MAXIMUM_RELATIVE_ERROR = std::log(MAXIMUM_RELATIVE_ERROR);
const std::string VERSION_7_1_TAG("7.1");
// Version 7.1
const core::TPersistenceTag MODEL_7_1_TAG("a", "model");
const core::TPersistenceTag SAMPLE_MOMENTS_7_1_TAG("b", "sample_moments");
const core::TPersistenceTag NUMBER_SAMPLES_7_1_TAG("c", "number_samples");
const core::TPersistenceTag DECAY_RATE_7_1_TAG("d", "decay_rate");
// Version < 7.1
const std::string MODEL_OLD_TAG("a");
const std::string NUMBER_SAMPLES_OLD_TAG("b");
const std::string DECAY_RATE_OLD_TAG("e");
// Nested tags
const core::TPersistenceTag WEIGHT_TAG("a", "weight");
const core::TPersistenceTag PRIOR_TAG("b", "prior");
const std::string EMPTY_STRING;
//! Persist state for a models by passing information to \p inserter.
void modelAcceptPersistInserter(const CModelWeight& weight,
const CPrior& prior,
core::CStatePersistInserter& inserter) {
inserter.insertLevel(WEIGHT_TAG, std::bind(&CModelWeight::acceptPersistInserter,
&weight, std::placeholders::_1));
inserter.insertLevel(PRIOR_TAG, std::bind<void>(CPriorStateSerialiser(), std::cref(prior),
std::placeholders::_1));
}
}
//////// COneOfNPrior Implementation ////////
COneOfNPrior::COneOfNPrior(const TPriorPtrVec& models, maths_t::EDataType dataType, double decayRate)
: CPrior(dataType, decayRate) {
if (models.empty()) {
LOG_ERROR(<< "Can't initialize one-of-n with no models!");
return;
}
// Create a new model vector using uniform weights.
m_Models.reserve(models.size());
CModelWeight weight(1.0);
for (const auto& model : models) {
m_Models.emplace_back(weight, TPriorPtr(model->clone()));
}
}
COneOfNPrior::COneOfNPrior(const TDoublePriorPtrPrVec& models,
maths_t::EDataType dataType,
double decayRate /*= 0.0*/)
: CPrior(dataType, decayRate) {
if (models.empty()) {
LOG_ERROR(<< "Can't initialize mixed model with no models!");
return;
}
CScopeCanonicalizeWeights<TPriorPtr> canonicalize(m_Models);
// Create a new model vector using the specified models and their associated weights.
m_Models.reserve(models.size());
for (const auto& model : models) {
m_Models.emplace_back(CModelWeight(model.first), TPriorPtr(model.second->clone()));
}
}
COneOfNPrior::COneOfNPrior(const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser)
: CPrior(params.s_DataType, params.s_DecayRate) {
if (traverser.traverseSubLevel(std::bind(&COneOfNPrior::acceptRestoreTraverser,
this, std::cref(params),
std::placeholders::_1)) == false) {
traverser.setBadState();
}
}
bool COneOfNPrior::acceptRestoreTraverser(const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser) {
if (traverser.name() == VERSION_7_1_TAG) {
while (traverser.next()) {
const std::string& name = traverser.name();
RESTORE_SETUP_TEARDOWN(DECAY_RATE_7_1_TAG, double decayRate,
core::CStringUtils::stringToType(traverser.value(), decayRate),
this->decayRate(decayRate))
RESTORE(MODEL_7_1_TAG, traverser.traverseSubLevel(std::bind(
&COneOfNPrior::modelAcceptRestoreTraverser, this,
std::cref(params), std::placeholders::_1)))
RESTORE(SAMPLE_MOMENTS_7_1_TAG,
m_SampleMoments.fromDelimited(traverser.value()))
RESTORE_SETUP_TEARDOWN(
NUMBER_SAMPLES_7_1_TAG, double numberSamples,
core::CStringUtils::stringToType(traverser.value(), numberSamples),
this->numberSamples(numberSamples))
}
} else {
do {
const std::string& name = traverser.name();
RESTORE_SETUP_TEARDOWN(DECAY_RATE_OLD_TAG, double decayRate,
core::CStringUtils::stringToType(traverser.value(), decayRate),
this->decayRate(decayRate))
RESTORE(MODEL_OLD_TAG, traverser.traverseSubLevel(std::bind(
&COneOfNPrior::modelAcceptRestoreTraverser, this,
std::cref(params), std::placeholders::_1)))
RESTORE_SETUP_TEARDOWN(
NUMBER_SAMPLES_OLD_TAG, double numberSamples,
core::CStringUtils::stringToType(traverser.value(), numberSamples),
this->numberSamples(numberSamples))
} while (traverser.next());
if (!this->isNonInformative()) {
double variance{INF};
for (const auto& model : m_Models) {
variance = std::min(variance, model.second->marginalLikelihoodVariance());
}
m_SampleMoments = CBasicStatistics::momentsAccumulator(
this->numberSamples(), this->marginalLikelihoodMean(), variance);
}
}
return true;
}
COneOfNPrior::COneOfNPrior(const COneOfNPrior& other)
: CPrior(other.dataType(), other.decayRate()),
m_SampleMoments(other.m_SampleMoments) {
// Clone all the models up front so we can implement strong exception safety.
m_Models.reserve(other.m_Models.size());
for (const auto& model : other.m_Models) {
m_Models.emplace_back(model.first, TPriorPtr(model.second->clone()));
}
this->CPrior::addSamples(other.numberSamples());
}
COneOfNPrior& COneOfNPrior::operator=(const COneOfNPrior& rhs) {
if (this != &rhs) {
COneOfNPrior tmp(rhs);
this->swap(tmp);
}
return *this;
}
void COneOfNPrior::swap(COneOfNPrior& other) {
this->CPrior::swap(other);
m_Models.swap(other.m_Models);
std::swap(m_SampleMoments, other.m_SampleMoments);
}
COneOfNPrior::EPrior COneOfNPrior::type() const {
return E_OneOfN;
}
COneOfNPrior* COneOfNPrior::clone() const {
return new COneOfNPrior(*this);
}
void COneOfNPrior::dataType(maths_t::EDataType value) {
this->CPrior::dataType(value);
for (auto& model : m_Models) {
model.second->dataType(value);
}
}
void COneOfNPrior::decayRate(double value) {
this->CPrior::decayRate(value);
for (auto& model : m_Models) {
model.second->decayRate(this->decayRate());
}
}
void COneOfNPrior::setToNonInformative(double offset, double decayRate) {
for (auto& model : m_Models) {
model.first.age(0.0);
model.second->setToNonInformative(offset, decayRate);
}
m_SampleMoments = TMeanVarAccumulator();
this->decayRate(decayRate);
this->numberSamples(0.0);
}
void COneOfNPrior::removeModels(CModelFilter& filter) {
CScopeCanonicalizeWeights<TPriorPtr> canonicalize(m_Models);
std::size_t last = 0;
for (std::size_t i = 0; i < m_Models.size(); ++i) {
if (last != i) {
std::swap(m_Models[last], m_Models[i]);
}
if (!filter(m_Models[last].second->type())) {
++last;
}
}
m_Models.erase(m_Models.begin() + last, m_Models.end());
}
bool COneOfNPrior::needsOffset() const {
for (const auto& model : m_Models) {
if (model.second->needsOffset()) {
return true;
}
}
return false;
}
double COneOfNPrior::adjustOffset(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights) {
TMeanAccumulator result;
TDouble5Vec penalties;
for (auto& model : m_Models) {
double penalty = model.second->adjustOffset(samples, weights);
penalties.push_back(penalty);
result.add(penalty, model.first);
}
if (CBasicStatistics::mean(result) != 0.0) {
CScopeCanonicalizeWeights<TPriorPtr> canonicalize(m_Models);
for (std::size_t i = 0; i < penalties.size(); ++i) {
if (m_Models[i].second->participatesInModelSelection() &&
CMathsFuncs::isFinite(penalties)) {
CModelWeight& weight = m_Models[i].first;
weight.logWeight(weight.logWeight() + penalties[i]);
}
}
}
return CBasicStatistics::mean(result);
}
double COneOfNPrior::offset() const {
double offset = 0.0;
for (const auto& model : m_Models) {
offset = std::max(offset, model.second->offset());
}
return offset;
}
void COneOfNPrior::addSamples(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights) {
if (samples.empty()) {
return;
}
if (samples.size() != weights.size()) {
LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '"
<< weights << "'");
return;
}
this->adjustOffset(samples, weights);
double n{this->numberSamples()};
this->CPrior::addSamples(samples, weights);
n = this->numberSamples() - n;
for (std::size_t i = 0; i < samples.size(); ++i) {
if (CMathsFuncs::isFinite(samples[i]) && CMathsFuncs::isFinite(weights[i])) {
double xi = samples[i];
double ni = maths_t::countForUpdate(weights[i]);
m_SampleMoments.add(xi, ni);
}
}
// For this 1-of-n model we assume that all the data come from one
// the distributions which comprise the model. The model can be
// treated exactly like a discrete parameter and assigned a posterior
// probability in a Bayesian sense. In particular, we have:
// f({p(m), m} | x) = L(x | p(m), m) * f(p(m)) * P(m) / N (1)
//
// where,
// x = {x(1), x(2), ... , x(n)} is the sample vector,
// f({p(m), m} | x) is the posterior distribution for p(m) and m,
// p(m) are the parameters of the model,
// m is the model,
// L(x | p(m), m) is the likelihood of the data given the model m'th,
// f(p(m)) is the prior for the m'th model parameters,
// P(m) is the prior probability the data are from the m'th model,
// N is a normalization factor.
//
// There is one such relation for each model and N is computed over
// all models:
// N = Sum_m( Integral_dp(m)( f({p(m), m}) ) )
//
// Note that we can write the r.h.s. of (1) as:
// ((L(x | p(m), m) / N'(m)) * f(p(m))) * (N'(m) / N * P(m)) (2)
//
// where,
// N'(m) = Integral_dp(m)( L(x | {p(m), m}) ),
// N = Sum_m( N'(m | x) ) by definition.
//
// This means that the joint posterior distribution factorises into the
// posterior distribution for the model parameters given the data and
// the posterior weights for each model, i.e.
// f({p(m), m} | x) = f'(p(m) | x) * P'(m | x)
//
// where f' and P' come from (2). Finally, note that N'(m) is really
// a function of the data, say h_m(x), and satisfies the relation:
// h_m({x(1), x(2), ... , x(k+1)})
// = L(x(k+1) | {p(m), m, x(1), x(2), ... , x(k)})
// * h_m({x(1), x(2), ... , x(k)})
//
// Here, L(x(k+1) | {p(m), m, x(1), x(2), ... , x(k)}) is the likelihood
// of the (k+1)'th datum given model m and all previous data. Really, the
// data just enter into this via the updated model parameters p(m). This
// is the form we use below.
//
// Note that the weight of the sample x(i) is interpreted as its count,
// i.e. n(i), so for example updating with {(x, 2)} is equivalent to
// updating with {x, x}.
CScopeCanonicalizeWeights<TPriorPtr> canonicalize(m_Models);
// We need to check *before* adding samples to the constituent models.
bool isNonInformative{this->isNonInformative()};
TDouble5Vec minusBics;
TDouble5Vec varianceMismatchPenalties;
TBool5Vec used;
TBool5Vec uses;
bool failed{false};
// If none of the models are a good fit for the new data then restrict
// the maximum Bayes Factor since the data likelihood given the model
// is most useful if one of the models is (mostly) correct.
double m{std::max(n, 1.0)};
double maxLogBayesFactor{-m * MAXIMUM_LOG_BAYES_FACTOR};
for (auto& model : m_Models) {
double minusBic{0.0};
maths_t::EFloatingPointErrorStatus status{maths_t::E_FpOverflowed};
used.push_back(model.second->participatesInModelSelection());
if (used.back()) {
double logLikelihood;
status = model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood);
minusBic = 2.0 * logLikelihood -
model.second->unmarginalizedParameters() * CTools::fastLog(n);
double modeLogLikelihood;
TDouble1Vec modes;
modes.reserve(weights.size());
for (const auto& weight : weights) {
modes.push_back(model.second->marginalLikelihoodMode(weight));
}
model.second->jointLogMarginalLikelihood(modes, weights, modeLogLikelihood);
maxLogBayesFactor = std::max(
maxLogBayesFactor, logLikelihood - std::max(modeLogLikelihood, logLikelihood));
}
minusBics.push_back((status & maths_t::E_FpOverflowed) ? MINUS_INF : minusBic);
failed |= (status & maths_t::E_FpFailed) != 0;
// Update the component prior distribution.
model.second->addSamples(samples, weights);
varianceMismatchPenalties.push_back(
-m * MAXIMUM_LOG_BAYES_FACTOR *
std::max(1.0 - MAXIMUM_UNPENALISED_RELATIVE_VARIANCE_ERROR *
CBasicStatistics::variance(m_SampleMoments) /
model.second->marginalLikelihoodVariance(),
0.0));
uses.push_back(model.second->participatesInModelSelection());
if (!uses.back()) {
model.first.logWeight(MINUS_INF);
}
}
if (failed) {
LOG_ERROR(<< "Failed to compute log-likelihood");
LOG_ERROR(<< "samples = " << samples);
return;
}
if (isNonInformative) {
return;
}
maxLogBayesFactor += m * MAXIMUM_LOG_BAYES_FACTOR;
LOG_TRACE(<< "BICs = " << minusBics);
LOG_TRACE(<< "max Bayes Factor = " << maxLogBayesFactor);
LOG_TRACE(<< "variance penalties = " << varianceMismatchPenalties);
double maxLogModelWeight{MINUS_INF + m * MAXIMUM_LOG_BAYES_FACTOR};
double maxMinusBic{*std::max_element(minusBics.begin(), minusBics.end())};
for (std::size_t i = 0; i < m_Models.size(); ++i) {
if (used[i] && uses[i]) {
m_Models[i].first.addLogFactor(
std::max(minusBics[i] / 2.0, maxMinusBic / 2.0 - maxLogBayesFactor) +
varianceMismatchPenalties[i]);
maxLogModelWeight = std::max(maxLogModelWeight, m_Models[i].first.logWeight());
}
}
for (std::size_t i = 0; i < m_Models.size(); ++i) {
if (!used[i] && uses[i]) {
m_Models[i].first.logWeight(maxLogModelWeight - m * MAXIMUM_LOG_BAYES_FACTOR);
}
}
if (this->badWeights()) {
LOG_ERROR(<< "Update failed (" << this->debugWeights() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
this->setToNonInformative(this->offsetMargin(), this->decayRate());
}
}
void COneOfNPrior::propagateForwardsByTime(double time) {
if (!CMathsFuncs::isFinite(time) || time < 0.0) {
LOG_ERROR(<< "Bad propagation time " << time);
return;
}
CScopeCanonicalizeWeights<TPriorPtr> canonicalize(m_Models);
double alpha = std::exp(-this->decayRate() * time);
for (auto& model : m_Models) {
model.first.age(alpha);
model.second->propagateForwardsByTime(time);
}
m_SampleMoments.age(alpha);
this->numberSamples(this->numberSamples() * alpha);
LOG_TRACE(<< "numberSamples = " << this->numberSamples());
}
COneOfNPrior::TDoubleDoublePr COneOfNPrior::marginalLikelihoodSupport() const {
TDoubleDoublePr result(MINUS_INF, INF);
// We define this is as the intersection of the component model supports.
for (const auto& model : m_Models) {
if (model.second->participatesInModelSelection()) {
TDoubleDoublePr modelSupport = model.second->marginalLikelihoodSupport();
result.first = std::max(result.first, modelSupport.first);
result.second = std::min(result.second, modelSupport.second);
}
}
return result;
}
double COneOfNPrior::marginalLikelihoodMean() const {
if (this->isNonInformative()) {
return this->medianModelMean();
}
// This is E_{P(i)}[ E[X | P(i)] ] and the conditional expectation
// is just the individual model expectation. Note we exclude models
// with low weight because typically the means are similar between
// models and if they are very different we don't want to include
// the model if there is strong evidence against it.
double result = 0.0;
double Z = 0.0;
for (const auto& model : m_Models) {
double wi = model.first;
if (wi > MINIMUM_SIGNIFICANT_WEIGHT) {
result += wi * model.second->marginalLikelihoodMean();
Z += wi;
}
}
return result / Z;
}
double COneOfNPrior::nearestMarginalLikelihoodMean(double value) const {
if (this->isNonInformative()) {
return this->medianModelMean();
}
// See marginalLikelihoodMean for discussion.
double result = 0.0;
double Z = 0.0;
for (const auto& model : m_Models) {
double wi = model.first;
if (wi > MINIMUM_SIGNIFICANT_WEIGHT) {
result += wi * model.second->nearestMarginalLikelihoodMean(value);
Z += wi;
}
}
return result / Z;
}
double COneOfNPrior::marginalLikelihoodMode(const TDoubleWeightsAry& weights) const {
// We approximate this as the weighted average of the component
// model modes.
// Declared outside the loop to minimize the number of times
// they are created.
TDouble1Vec sample(1);
TDoubleWeightsAry1Vec weight(1, weights);
TMeanAccumulator mode;
for (const auto& model : m_Models) {
if (model.second->participatesInModelSelection()) {
double wi = model.first;
double mi = model.second->marginalLikelihoodMode(weights);
double logLikelihood;
sample[0] = mi;
model.second->jointLogMarginalLikelihood(sample, weight, logLikelihood);
mode.add(mi, wi * std::exp(logLikelihood));
}
}
double result = CBasicStatistics::mean(mode);
TDoubleDoublePr support = this->marginalLikelihoodSupport();
return CTools::truncate(result, support.first, support.second);
}
double COneOfNPrior::marginalLikelihoodVariance(const TDoubleWeightsAry& weights) const {
if (this->isNonInformative()) {
return INF;
}
// This is E_{P(i)}[ Var[X | i] ] and the conditional expectation
// is just the individual model expectation. Note we exclude models
// with low weight because typically the means are similar between
// models and if they are very different we don't want to include
// the model if there is strong evidence against it.
double result = 0.0;
double Z = 0.0;
for (const auto& model : m_Models) {
double wi = model.first;
if (wi > MINIMUM_SIGNIFICANT_WEIGHT) {
result += wi * model.second->marginalLikelihoodVariance(weights);
Z += wi;
}
}
return result / Z;
}
COneOfNPrior::TDoubleDoublePr
COneOfNPrior::marginalLikelihoodConfidenceInterval(double percentage,
const TDoubleWeightsAry& weights) const {
// We approximate this as the weighted sum of the component model
// intervals. To compute the weights we expand all component model
// marginal likelihoods about a reasonable estimate for the true
// interval end points, i.e.
// [a_0, b_0] = [Sum_m( a(m) * w(m) ), Sum_m( b(m) * w(m) )]
//
// Here, m ranges over the component models, w(m) are the model
// weights and P([a(m), b(m)]) = p where p is the desired percentage
// expressed as a probability. Note that this will be accurate in
// the limit that one model dominates.
//
// Note P([a, b]) = F(b) - F(a) where F(.) is the c.d.f. of the
// marginal likelihood f(.) so it possible to compute a first order
// correction to [a_0, b_0] as follows:
// a_1 = a_0 + ((1 - p) / 2 - F(a_0)) / f(a_0)
// b_1 = b_0 + ((1 - p) / 2 - F(b_0)) / f(b_0) (1)
//
// For the time being we just compute a_0 and b_0. We can revisit
// this calculation if the accuracy proves to be a problem.
TMeanAccumulator x1, x2;
for (const auto& model : m_Models) {
double weight = model.first;
if (weight >= MAXIMUM_RELATIVE_ERROR) {
TDoubleDoublePr interval =
model.second->marginalLikelihoodConfidenceInterval(percentage, weights);
x1.add(interval.first, weight);
x2.add(interval.second, weight);
}
}
LOG_TRACE(<< "x1 = " << x1 << ", x2 = " << x2);
return {CBasicStatistics::mean(x1), CBasicStatistics::mean(x2)};
}
maths_t::EFloatingPointErrorStatus
COneOfNPrior::jointLogMarginalLikelihood(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& result) const {
result = 0.0;
if (samples.empty()) {
LOG_ERROR(<< "Can't compute likelihood for empty sample set");
return maths_t::E_FpFailed;
}
if (samples.size() != weights.size()) {
LOG_ERROR(<< "Mismatch in samples '" << samples << "' and weights '"
<< weights << "'");
return maths_t::E_FpFailed;
}
// We get that:
// marginal_likelihood(x) = Sum_m( L(x | m) * P(m) ).
//
// where,
// x = {x(1), x(2), ... , x(n)} the sample vector,
// L(x | m) = Integral_du(m)( L(x | {m, p(m)}) ),
// p(m) are the parameters of the component and
// P(m) is the prior probability the data are from the m'th model.
// We re-normalize the data so that the maximum likelihood is one
// to avoid underflow.
TDouble5Vec logLikelihoods;
double Z = 0.0;
TMaxAccumulator maxLogLikelihood;
for (const auto& model : m_Models) {
if (model.second->participatesInModelSelection()) {
double logLikelihood;
maths_t::EFloatingPointErrorStatus status =
model.second->jointLogMarginalLikelihood(samples, weights, logLikelihood);
if ((status & maths_t::E_FpFailed) != 0) {
return status;
}
if ((status & maths_t::E_FpOverflowed) == 0) {
logLikelihood += model.first.logWeight();
logLikelihoods.push_back(logLikelihood);
maxLogLikelihood.add(logLikelihood);
}
Z += std::exp(model.first.logWeight());
}
}
if (maxLogLikelihood.count() == 0) {
// Technically, the marginal likelihood is zero here so the
// log would be infinite. We use minus max double because
// log(0) = HUGE_VALUE, which causes problems for Windows.
// Calling code is notified when the calculation overflows
// and should avoid taking the exponential since this will
// underflow and pollute the floating point environment.
// This may cause issues for some library function
// implementations (see fe*exceptflag for more details).
result = MINUS_INF;
return maths_t::E_FpOverflowed;
}
for (auto logLikelihood : logLikelihoods) {
result += std::exp(logLikelihood - maxLogLikelihood[0]);
}
result = maxLogLikelihood[0] + CTools::fastLog(result / Z);
maths_t::EFloatingPointErrorStatus status = CMathsFuncs::fpStatus(result);
if ((status & maths_t::E_FpFailed) != 0) {
LOG_ERROR(<< "Failed to compute log likelihood (" << this->debugWeights() << ")");
LOG_ERROR(<< "samples = " << samples);
LOG_ERROR(<< "weights = " << weights);
LOG_ERROR(<< "logLikelihoods = " << logLikelihoods);
LOG_ERROR(<< "maxLogLikelihood = " << maxLogLikelihood[0]);
} else if ((status & maths_t::E_FpOverflowed) != 0) {
LOG_ERROR(<< "Log likelihood overflowed for (" << this->debugWeights() << ")");
LOG_TRACE(<< "likelihoods = " << logLikelihoods);
LOG_TRACE(<< "samples = " << samples);
LOG_TRACE(<< "weights = " << weights);
}
return status;
}
void COneOfNPrior::sampleMarginalLikelihood(std::size_t numberSamples,
TDouble1Vec& samples) const {
samples.clear();
if (numberSamples == 0) {
return;
}
TDouble5Vec weights;
double Z = 0.0;
for (const auto& model : m_Models) {
weights.push_back(model.first);
Z += model.first;
}
for (auto& weight : weights) {
weight /= Z;
}
CSampling::TSizeVec sampling;
CSampling::weightedSample(numberSamples, weights, sampling);
LOG_TRACE(<< "weights = " << weights << ", sampling = " << sampling);
if (sampling.size() != m_Models.size()) {
LOG_ERROR(<< "Failed to sample marginal likelihood");
return;
}
TDoubleDoublePr support = this->marginalLikelihoodSupport();
support.first = CTools::shiftRight(support.first);
support.second = CTools::shiftLeft(support.second);
samples.reserve(numberSamples);
TDouble1Vec modelSamples;
for (std::size_t i = 0; i < m_Models.size(); ++i) {
modelSamples.clear();
m_Models[i].second->sampleMarginalLikelihood(sampling[i], modelSamples);
for (auto sample : modelSamples) {
samples.push_back(CTools::truncate(sample, support.first, support.second));
}
}
LOG_TRACE(<< "samples = " << samples);
}
bool COneOfNPrior::isSelectedModelMultimodal() const {
auto weights = this->weights();
for (std::size_t i = 0; i < weights.size(); ++i) {
if (weights[i] > 0.1 && m_Models[i].second->type() == EPrior::E_Multimodal) {
return true;
}
}
return false;
}
bool COneOfNPrior::minusLogJointCdfImpl(bool complement,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
lowerBound = upperBound = 0.0;
if (samples.empty()) {
LOG_ERROR(<< "Can't compute c.d.f. " << (complement ? "complement " : "")
<< "for empty sample set");
return false;
}
if (this->isNonInformative()) {
lowerBound = upperBound = -std::log(complement ? 1.0 - CTools::IMPROPER_CDF
: CTools::IMPROPER_CDF);
return true;
}
// We get that:
// cdf(x) = Integral_dx( Sum_m( L(x | m) * P(m) )
//
// where,
// x = {x(1), x(2), ... , x(n)} the sample vector,
// L(x | m) = Integral_du(m)( L(x | {m, p(m)}) ),
// p(m) are the parameters of the component,
// P(m) is the prior probability the data are from the m'th model and
// Integral_dx is over [-inf, x(1)] o [-inf, x(2)] o ... o [-inf, x(n)]
// and o denotes the outer product.
TDoubleSizePr5Vec logWeights = this->normalizedLogWeights();
LOG_TRACE(<< "logWeights = " << logWeights);
TDouble5Vec logLowerBounds;
TDouble5Vec logUpperBounds;
TMaxAccumulator maxLogLowerBound;
TMaxAccumulator maxLogUpperBound;
double logMaximumRemainder = MINUS_INF;
for (std::size_t i = 0, n = logWeights.size(); i < n; ++i) {
double wi = logWeights[i].first;
const CPrior& model = *m_Models[logWeights[i].second].second;
double li = 0.0;
double ui = 0.0;
if (complement && !model.minusLogJointCdfComplement(samples, weights, li, ui)) {
LOG_ERROR(<< "Failed computing c.d.f. complement for " << samples);
return false;
}
if (!complement && !model.minusLogJointCdf(samples, weights, li, ui)) {
LOG_ERROR(<< "Failed computing c.d.f. for " << samples);
return false;
}
li = wi - li;
ui = wi - ui;
logLowerBounds.push_back(li);
logUpperBounds.push_back(ui);
maxLogLowerBound.add(li);
maxLogUpperBound.add(ui);
// Check if we can exit early with reasonable precision.
if (i + 1 < n) {
logMaximumRemainder = logn(n - i - 1) + logWeights[i + 1].first;
if (logMaximumRemainder < maxLogLowerBound[0] + LOG_MAXIMUM_RELATIVE_ERROR &&
logMaximumRemainder < maxLogUpperBound[0] + LOG_MAXIMUM_RELATIVE_ERROR) {
break;
}
}
}
if (!CTools::logWillUnderflow<double>(maxLogLowerBound[0])) {
maxLogLowerBound[0] = 0.0;
}
if (!CTools::logWillUnderflow<double>(maxLogUpperBound[0])) {
maxLogUpperBound[0] = 0.0;
}
for (std::size_t i = 0; i < logLowerBounds.size(); ++i) {
lowerBound += std::exp(logLowerBounds[i] - maxLogLowerBound[0]);
upperBound += std::exp(logUpperBounds[i] - maxLogUpperBound[0]);
}
lowerBound = -std::log(lowerBound) - maxLogLowerBound[0];
upperBound = -std::log(upperBound) - maxLogUpperBound[0];
if (logLowerBounds.size() < logWeights.size()) {
upperBound += -std::log(1.0 + std::exp(logMaximumRemainder + upperBound));
}
lowerBound = std::max(lowerBound, 0.0);
upperBound = std::max(upperBound, 0.0);
LOG_TRACE(<< "Joint -log(c.d.f." << (complement ? " complement" : "")
<< ") = [" << lowerBound << "," << upperBound << "]");
return true;
}
bool COneOfNPrior::minusLogJointCdf(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
return this->minusLogJointCdfImpl(false /*complement*/, samples, weights,
lowerBound, upperBound);
}
bool COneOfNPrior::minusLogJointCdfComplement(const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound) const {
return this->minusLogJointCdfImpl(true /*complement*/, samples, weights,
lowerBound, upperBound);
}
bool COneOfNPrior::probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation,
const TDouble1Vec& samples,
const TDoubleWeightsAry1Vec& weights,
double& lowerBound,
double& upperBound,
maths_t::ETail& tail) const {
lowerBound = upperBound = 0.0;
tail = maths_t::E_UndeterminedTail;
if (samples.empty()) {
LOG_ERROR(<< "Can't compute distribution for empty sample set");
return false;
}
if (this->isNonInformative()) {
lowerBound = upperBound = 1.0;
return true;
}
// The joint probability of less likely collection of samples can be
// computed from the conditional probabilities of a less likely collection
// of samples from each component model:
// P(R) = Sum_i( P(R | m) * P(m) )
//
// where,
// P(R | m) is the probability of a less likely collection of samples
// from the m'th model and
// P(m) is the prior probability the data are from the m'th model.
using TDoubleTailPr = std::pair<double, maths_t::ETail>;
using TDoubleTailPrMaxAccumulator = CBasicStatistics::SMax<TDoubleTailPr>::TAccumulator;
TDoubleSizePr5Vec logWeights = this->normalizedLogWeights();
TDoubleTailPrMaxAccumulator tail_;
for (std::size_t i = 0; i < logWeights.size(); ++i) {
double weight = std::exp(logWeights[i].first);
const CPrior& model = *m_Models[logWeights[i].second].second;
if (lowerBound > static_cast<double>(m_Models.size() - i) * weight / MAXIMUM_RELATIVE_ERROR) {
// The probability calculation is relatively expensive so don't
// evaluate the probabilities that aren't needed to get good
// accuracy.
break;
}
double modelLowerBound;
double modelUpperBound;
maths_t::ETail modelTail;
if (!model.probabilityOfLessLikelySamples(calculation, samples, weights, modelLowerBound,
modelUpperBound, modelTail)) {
// Logging handled at a lower level.
lowerBound = 0.0;
upperBound = 1.0;
return false;
}
LOG_TRACE(<< "weight = " << weight << ", modelLowerBound = " << modelLowerBound
<< ", modelUpperBound = " << modelLowerBound);
lowerBound += weight * modelLowerBound;
upperBound += weight * modelUpperBound;
tail_.add({weight * (modelLowerBound + modelUpperBound), modelTail});
}
if (!(lowerBound >= 0.0 && lowerBound <= 1.001) ||
!(upperBound >= 0.0 && upperBound <= 1.001)) {
LOG_ERROR(<< "Bad probability bounds = [" << lowerBound << ", " << upperBound << "]"
<< ", " << logWeights);
}
if (CMathsFuncs::isNan(lowerBound)) {
lowerBound = 0.0;
}
if (CMathsFuncs::isNan(upperBound)) {
upperBound = 1.0;
}
lowerBound = CTools::truncate(lowerBound, 0.0, 1.0);
upperBound = CTools::truncate(upperBound, 0.0, 1.0);
tail = tail_[0].second;
LOG_TRACE(<< "Probability = [" << lowerBound << "," << upperBound << "]");
return true;
}
bool COneOfNPrior::isNonInformative() const {
for (const auto& model : m_Models) {
if (model.second->participatesInModelSelection() &&
model.second->isNonInformative()) {
return true;
}
}
return false;
}
void COneOfNPrior::print(const std::string& indent, std::string& result) const {
result += core_t::LINE_ENDING + indent + "one-of-n";
if (this->isNonInformative()) {
result += " non-informative";
}
result += ':';
result += core_t::LINE_ENDING + indent + " # samples " +
core::CStringUtils::typeToStringPretty(this->numberSamples());
for (const auto& model : m_Models) {
double weight = model.first;
if (weight >= MINIMUM_SIGNIFICANT_WEIGHT) {
std::string indent_ = indent + " weight " +
core::CStringUtils::typeToStringPretty(weight) + " ";
model.second->print(indent_, result);
}
}
}
std::string COneOfNPrior::printJointDensityFunction() const {
return "Not supported";
}
std::uint64_t COneOfNPrior::checksum(std::uint64_t seed) const {
seed = this->CPrior::checksum(seed);
seed = CChecksum::calculate(seed, m_Models);
seed = CChecksum::calculate(seed, m_SampleMoments);
return seed;
}
void COneOfNPrior::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
mem->setName("COneOfNPrior");
core::memory_debug::dynamicSize("m_Models", m_Models, mem);
}
std::size_t COneOfNPrior::memoryUsage() const {
return core::memory::dynamicSize(m_Models);
}
std::size_t COneOfNPrior::staticSize() const {
return sizeof(*this);
}
void COneOfNPrior::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
inserter.insertValue(VERSION_7_1_TAG, "");
for (const auto& model : m_Models) {
inserter.insertLevel(MODEL_7_1_TAG, std::bind(&modelAcceptPersistInserter,
std::cref(model.first),
std::cref(*model.second),
std::placeholders::_1));
}
inserter.insertValue(SAMPLE_MOMENTS_7_1_TAG, m_SampleMoments.toDelimited());
inserter.insertValue(DECAY_RATE_7_1_TAG, this->decayRate(), core::CIEEE754::E_SinglePrecision);
inserter.insertValue(NUMBER_SAMPLES_7_1_TAG, this->numberSamples(),
core::CIEEE754::E_SinglePrecision);
}
COneOfNPrior::TDoubleVec COneOfNPrior::weights() const {
TDoubleVec result = this->logWeights();
for (auto& weight : result) {
weight = std::exp(weight);
}
return result;
}
COneOfNPrior::TDoubleVec COneOfNPrior::logWeights() const {
TDoubleVec result;
result.reserve(m_Models.size());
double Z = 0.0;
for (const auto& model : m_Models) {
result.push_back(model.first.logWeight());
Z += std::exp(result.back());
}
Z = std::log(Z);
for (auto& weight : result) {
weight -= Z;
}
return result;
}
COneOfNPrior::TPriorCPtrVec COneOfNPrior::models() const {
TPriorCPtrVec result;
result.reserve(m_Models.size());
for (const auto& model : m_Models) {
result.push_back(model.second.get());
}
return result;
}
bool COneOfNPrior::modelAcceptRestoreTraverser(const SDistributionRestoreParams& params,
core::CStateRestoreTraverser& traverser) {
CModelWeight weight(1.0);
bool gotWeight = false;
TPriorPtr model;
do {
const std::string& name = traverser.name();
RESTORE_SETUP_TEARDOWN(
WEIGHT_TAG,
/*no-op*/,
traverser.traverseSubLevel(std::bind(&CModelWeight::acceptRestoreTraverser,
&weight, std::placeholders::_1)),
gotWeight = true)
RESTORE(PRIOR_TAG, traverser.traverseSubLevel(std::bind<bool>(
CPriorStateSerialiser(), std::cref(params),
std::ref(model), std::placeholders::_1)))
} while (traverser.next());
if (!gotWeight) {
LOG_ERROR(<< "No weight found");
return false;
}
if (model == nullptr) {
LOG_ERROR(<< "No model found");
return false;
}
m_Models.emplace_back(weight, std::move(model));
return true;
}
COneOfNPrior::TDoubleSizePr5Vec COneOfNPrior::normalizedLogWeights() const {
TDoubleSizePr5Vec result;
double Z = 0.0;
for (std::size_t i = 0; i < m_Models.size(); ++i) {
if (m_Models[i].second->participatesInModelSelection()) {
double logWeight = m_Models[i].first.logWeight();
result.emplace_back(logWeight, i);
Z += std::exp(logWeight);
}
}
Z = std::log(Z);
for (auto& logWeight : result) {
logWeight.first -= Z;
}
std::sort(result.begin(), result.end(), std::greater<TDoubleSizePr>());
return result;
}
double COneOfNPrior::medianModelMean() const {
TDoubleVec means;
means.reserve(m_Models.size());
for (const auto& model : m_Models) {
if (model.second->participatesInModelSelection()) {
means.push_back(model.second->marginalLikelihoodMean());
}
}
return CBasicStatistics::median(means);
}
bool COneOfNPrior::badWeights() const {
for (const auto& model : m_Models) {
if (!CMathsFuncs::isFinite(model.first.logWeight())) {
return true;
}
}
return false;
}
std::string COneOfNPrior::debugWeights() const {
if (m_Models.empty()) {
return std::string();
}
std::ostringstream result;
result << std::scientific << std::setprecision(15);
for (const auto& model : m_Models) {
result << " " << model.first.logWeight();
}
result << " ";
return result.str();
}
}
}
}