lib/maths/common/unittest/CToolsTest.cc (1,049 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <core/CLogger.h>
#include <maths/common/CCompositeFunctions.h>
#include <maths/common/CIntegration.h>
#include <maths/common/CLinearAlgebra.h>
#include <maths/common/CLinearAlgebraEigen.h>
#include <maths/common/CLinearAlgebraTools.h>
#include <maths/common/CLogTDistribution.h>
#include <maths/common/CMathsFuncs.h>
#include <maths/common/CTools.h>
#include <maths/common/CToolsDetail.h>
#include <test/BoostTestCloseAbsolute.h>
#include <test/CRandomNumbers.h>
#include <boost/math/concepts/real_concept.hpp>
#include <boost/math/distributions/beta.hpp>
#include <boost/math/distributions/negative_binomial.hpp>
#include <boost/math/distributions/students_t.hpp>
#include <boost/test/unit_test.hpp>
#include <array>
#include <bitset>
#include <numeric>
BOOST_AUTO_TEST_SUITE(CToolsTest)
using namespace ml;
using namespace maths;
using namespace common;
using namespace test;
namespace {
using TDoubleDoublePr = std::pair<double, double>;
using TDoubleBoolPr = std::pair<double, bool>;
using TDoubleVec = std::vector<double>;
namespace adapters {
template<typename DISTRIBUTION>
bool isDiscrete(const DISTRIBUTION&) {
return false;
}
bool isDiscrete(const boost::math::negative_binomial_distribution<>&) {
return true;
}
template<typename DISTRIBUTION>
TDoubleDoublePr support(const DISTRIBUTION& distribution) {
return boost::math::support(distribution);
}
TDoubleDoublePr support(const CLogTDistribution& logt) {
CLogTDistribution::TOptionalDouble minimum = localMinimum(logt);
return TDoubleDoublePr(minimum ? *minimum : 0.0,
boost::math::tools::max_value<double>());
}
template<typename DISTRIBUTION>
TDoubleBoolPr stationaryPoint(const DISTRIBUTION& distribution) {
return TDoubleBoolPr(boost::math::mode(distribution), true);
}
TDoubleBoolPr stationaryPoint(const CLogTDistribution& logt) {
return TDoubleBoolPr(ml::maths::common::mode(logt), true);
}
TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<>& beta) {
if (beta.alpha() < 1.0 && beta.beta() < 1.0) {
return TDoubleBoolPr((beta.alpha() - 1.0) / (beta.alpha() + beta.beta() - 2.0), false);
}
return TDoubleBoolPr(boost::math::mode(beta), true);
}
template<typename DISTRIBUTION>
double pdf(const DISTRIBUTION& distribution, const double& x) {
return CTools::safePdf(distribution, x);
}
double pdf(const CLogTDistribution& logt, const double& x) {
return ml::maths::common::pdf(logt, x);
}
template<typename DISTRIBUTION>
double cdf(const DISTRIBUTION& distribution, const double& x) {
return CTools::safeCdf(distribution, x);
}
double cdf(const CLogTDistribution& logt, const double& x) {
return ml::maths::common::cdf(logt, x);
}
template<typename DISTRIBUTION>
double cdfComplement(const DISTRIBUTION& distribution, const double& x) {
return CTools::safeCdfComplement(distribution, x);
}
double cdfComplement(const CLogTDistribution& logt, const double& x) {
return ml::maths::common::cdfComplement(logt, x);
}
} // adapters::
template<typename DISTRIBUTION>
double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distribution, double x) {
TDoubleBoolPr stationaryPoint = adapters::stationaryPoint(distribution);
double eps = 1e-8;
double pdf = adapters::pdf(distribution, x);
LOG_TRACE(<< "x = " << x << ", f(x) = " << pdf
<< ", stationaryPoint = " << stationaryPoint.first);
double x1 = stationaryPoint.first;
if (x > stationaryPoint.first) {
// Search for lower bound.
double minX = adapters::support(distribution).first + eps;
for (double increment = std::max(x1 / 2.0, 1.0);
x1 > minX &&
((stationaryPoint.second && adapters::pdf(distribution, x1) > pdf) ||
(!stationaryPoint.second && adapters::pdf(distribution, x1) < pdf));
x1 = std::max(x1 - increment, minX), increment *= 2.0) {
// Empty.
}
}
double x2 = stationaryPoint.first;
if (x < stationaryPoint.first) {
// Search for upper bound.
double maxX = adapters::support(distribution).second - eps;
for (double increment = std::max(x2 / 2.0, 1.0);
x2 < maxX &&
((stationaryPoint.second && adapters::pdf(distribution, x2) > pdf) ||
(!stationaryPoint.second && adapters::pdf(distribution, x2) < pdf));
x2 = std::min(x2 + increment, maxX), increment *= 2.0) {
// Empty.
}
}
LOG_TRACE(<< "1) x1 = " << x1 << ", x2 = " << x2);
if (adapters::pdf(distribution, x1) > adapters::pdf(distribution, x2)) {
std::swap(x1, x2);
}
LOG_TRACE(<< "2) x1 = " << x1 << ", x2 = " << x2);
// Binary search.
while (std::fabs(x1 - x2) > eps) {
double x3 = (x1 + x2) / 2.0;
if (adapters::pdf(distribution, x3) > pdf) {
x2 = x3;
} else {
x1 = x3;
}
}
LOG_TRACE(<< "3) x1 = " << x1 << ", x2 = " << x2);
double y = (x > (x1 + x2) / 2.0) ? std::min(x1, x2) : std::max(x1, x2);
if (x > y) {
std::swap(x, y);
}
LOG_TRACE(<< "x = " << x << ", y = " << y
<< ", f(x) = " << adapters::pdf(distribution, x)
<< ", f(y) = " << adapters::pdf(distribution, y));
if (stationaryPoint.second) {
double cdfy =
adapters::cdfComplement(distribution, y) +
(adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0);
double cdfx = adapters::cdf(distribution, x);
LOG_TRACE(<< "F(x) = " << cdfx << ", 1 - F(y) = " << cdfy);
return cdfx + cdfy;
}
double cdfy = adapters::cdf(distribution, y) +
(adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0);
double cdfx = adapters::cdf(distribution, x);
LOG_TRACE(<< "F(x) = " << cdfx << ", F(y) = " << cdfy);
return cdfy - cdfx;
}
template<typename DISTRIBUTION>
double numericalProbabilityOfLessLikelySample(const DISTRIBUTION& distribution, double x) {
return numericalProbabilityOfLessLikelySampleImpl(distribution, x);
}
double numericalProbabilityOfLessLikelySample(const boost::math::negative_binomial_distribution<>& negativeBinomial,
double x) {
double fx = CTools::safePdf(negativeBinomial, x);
double m = boost::math::mode(negativeBinomial);
double fm = CTools::safePdf(negativeBinomial, m);
if (fx >= fm) {
return 1.0;
}
double f0 = CTools::safePdf(negativeBinomial, 0.0);
if (x > m && fx < f0) {
return CTools::safeCdfComplement(negativeBinomial, x) +
CTools::safePdf(negativeBinomial, x);
}
return numericalProbabilityOfLessLikelySampleImpl(negativeBinomial, x);
}
double numericalProbabilityOfLessLikelySample(const CLogTDistribution& logt, double x) {
// We need special handling for the case that the p.d.f. is
// single sided and if it is greater at x than the local "mode".
double m = mode(logt);
if (m == 0.0) {
return cdfComplement(logt, x);
}
double fx = pdf(logt, x);
double fm = pdf(logt, m);
if (fx > fm) {
return cdfComplement(logt, x);
}
CLogTDistribution::TOptionalDouble xmin = localMinimum(logt);
if (xmin && (pdf(logt, *xmin) > fx || *xmin == m)) {
return cdfComplement(logt, x);
}
return numericalProbabilityOfLessLikelySampleImpl(logt, x);
}
double numericalProbabilityOfLessLikelySample(const boost::math::beta_distribution<>& beta,
double x) {
// We need special handling of the case that the equal p.d.f.
// point is very close to 0 or 1.
double fx = CTools::safePdf(beta, x);
double a = beta.alpha();
double b = beta.beta();
double xmin = 1000.0 * std::numeric_limits<double>::min();
if (a >= 1.0 && fx < CTools::safePdf(beta, xmin)) {
return std::exp(a * std::log(xmin) - std::log(a) + std::lgamma(a + b) -
std::lgamma(a) - std::lgamma(b)) +
CTools::safeCdfComplement(beta, x);
}
double xmax = 1.0 - std::numeric_limits<double>::epsilon();
if (b >= 1.0 && fx < CTools::safePdf(beta, xmax)) {
double y = std::exp(std::log(boost::math::beta(a, b) * fx) / b);
return std::pow(y, b) / b / boost::math::beta(a, b) + CTools::safeCdf(beta, x);
}
return numericalProbabilityOfLessLikelySampleImpl(beta, x);
}
template<typename DISTRIBUTION>
class CPdf {
public:
CPdf(const DISTRIBUTION& distribution) : m_Distribution(distribution) {}
bool operator()(double x, double& result) const {
result = boost::math::pdf(m_Distribution, x);
return true;
}
private:
DISTRIBUTION m_Distribution;
};
class CIdentity {
public:
bool operator()(double x, double& result) const {
result = x;
return true;
}
};
template<typename DISTRIBUTION>
double numericalIntervalExpectation(const DISTRIBUTION& distribution,
double a,
double b,
std::size_t steps = 10) {
if (a == b) {
return b;
}
double numerator = 0.0;
double denominator = 0.0;
CPdf<DISTRIBUTION> fx(distribution);
maths::common::CCompositeFunctions::CProduct<CPdf<DISTRIBUTION>, CIdentity> xfx(fx);
double dx = (b - a) / static_cast<double>(steps);
for (std::size_t i = 0; i < steps; ++i, a += dx) {
double fxi;
BOOST_TEST_REQUIRE(
maths::common::CIntegration::gaussLegendre<maths::common::CIntegration::OrderFive>(
fx, a, a + dx, fxi));
double xfxi;
BOOST_TEST_REQUIRE(
maths::common::CIntegration::gaussLegendre<maths::common::CIntegration::OrderFive>(
xfx, a, a + dx, xfxi));
numerator += xfxi;
denominator += fxi;
}
return numerator / denominator;
}
template<typename T>
class CTruncatedPdf {
public:
CTruncatedPdf(const maths::common::CMixtureDistribution<T>& mixture, double cutoff)
: m_Mixture(mixture), m_Cutoff(cutoff) {}
bool operator()(double x, double& fx) const {
fx = maths::common::pdf(m_Mixture, x);
if (fx > m_Cutoff) {
fx = 0.0;
}
return true;
}
private:
const maths::common::CMixtureDistribution<T>& m_Mixture;
double m_Cutoff;
};
template<typename T>
class CLogPdf {
public:
CLogPdf(const maths::common::CMixtureDistribution<T>& mixture)
: m_Mixture(mixture) {}
double operator()(double x) const {
return std::log(maths::common::pdf(m_Mixture, x));
}
bool operator()(double x, double& fx) const {
fx = std::log(maths::common::pdf(m_Mixture, x));
return true;
}
private:
const maths::common::CMixtureDistribution<T>& m_Mixture;
};
void printByte(const unsigned char& byte, std::ostream& o) {
o << std::bitset<CHAR_BIT>{byte};
}
template<typename T>
std::string printBits(const T& t) {
std::ostringstream o;
const unsigned char* byte{reinterpret_cast<const unsigned char*>(&t)};
for (std::size_t s = 0; s < sizeof(t); ++s, ++byte) {
printByte(*byte, o);
}
return o.str();
}
}
BOOST_AUTO_TEST_CASE(testProbabilityOfLessLikelySample) {
// The probability of a lower likelihood sample x from a single
// mode distribution is:
// F(a) + 1 - F(b)
//
// where,
// a satisfies f(a) = f(x) and a < m,
// b satisfies f(b) = f(x) and b > m,
// F(.) denotes the c.d.f. of the distribution,
// f(.) denotes the p.d.f. of the distribution and
// m is the mode of the distribution.
//
// We'll use this relation to test our probabilities.
CTools::CProbabilityOfLessLikelySample probabilityOfLessLikelySample(maths_t::E_TwoSided);
double p1, p2;
maths_t::ETail tail = maths_t::E_UndeterminedTail;
double m;
LOG_DEBUG(<< "******** normal ********");
boost::math::normal_distribution<> normal(3.0, 5.0);
p1 = numericalProbabilityOfLessLikelySample(normal, -1.0);
p2 = probabilityOfLessLikelySample(normal, -1.0, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
m = adapters::stationaryPoint(normal).first;
tail = maths_t::E_UndeterminedTail;
p1 = 1.0;
p2 = probabilityOfLessLikelySample(normal, m, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_EQUAL(p1, p2);
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(normal, 8.0);
p2 = probabilityOfLessLikelySample(normal, 8.0, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
LOG_DEBUG(<< "******** student's t ********");
boost::math::students_t students(2.0);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(students, -4.0);
p2 = probabilityOfLessLikelySample(students, -4.0, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
m = adapters::stationaryPoint(students).first;
tail = maths_t::E_UndeterminedTail;
p1 = 1.0;
p2 = probabilityOfLessLikelySample(students, m, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_EQUAL(p1, p2);
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(students, 3.0);
p2 = probabilityOfLessLikelySample(students, 3.0, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
LOG_DEBUG(<< "******** negative binomial ********");
{
double successFraction[] = {0.5, 1.0, 10.0, 100.0, 1000.0};
double successProbability[] = {1e-3, 0.25, 0.5, 0.75, 1.0 - 1e-3};
for (size_t i = 0; i < std::size(successFraction); ++i) {
for (size_t j = 0; j < std::size(successProbability); ++j) {
LOG_DEBUG(<< "**** r = " << successFraction[i]
<< ", p = " << successProbability[j] << " ****");
boost::math::negative_binomial_distribution<> negativeBinomial(
successFraction[i], successProbability[j]);
if (successFraction[i] <= 1.0) {
// Monotone decreasing.
double x = std::fabs(std::log(successProbability[j]));
for (int l = 0; l < 10; ++l) {
tail = maths_t::E_UndeterminedTail;
x = std::floor(2.0 * x + 0.5);
p1 = CTools::safeCdfComplement(negativeBinomial, x) +
CTools::safePdf(negativeBinomial, x);
p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
continue;
}
double m1 = boost::math::mode(negativeBinomial);
BOOST_REQUIRE_EQUAL(
1.0, probabilityOfLessLikelySample(negativeBinomial, m1, tail));
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
double offset = m1;
for (int l = 0; l < 5; ++l) {
offset /= 2.0;
double x = std::floor(m1 - offset);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x);
p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.02 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.02 * std::fabs(std::min(std::log(p1), std::log(p2)))));
if (offset > 0.0)
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
if (offset == 0.0)
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
x = std::ceil(m1 + offset);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x);
p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.02 * std::max(p1, p2));
if (offset > 0.0)
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
if (offset == 0.0)
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
}
double factor = 1.0;
for (int l = 0; l < 5; ++l) {
factor *= 2.0;
double x = std::floor(m1 / factor);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x);
p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))));
if (x != m1)
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
if (x == m1)
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
x = std::ceil(m1 * factor);
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x);
p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))));
if (x != m1)
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
if (x == m1)
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
}
}
}
}
// Note when we are testing the tails of the distribution we
// use the relative error in the log.
LOG_DEBUG(<< "******** log normal ********");
boost::math::lognormal_distribution<> logNormal(1.0, 0.5);
tail = maths_t::E_UndeterminedTail;
p1 = -std::log(numericalProbabilityOfLessLikelySample(logNormal, 0.3));
p2 = -std::log(probabilityOfLessLikelySample(logNormal, 0.3, tail));
LOG_DEBUG(<< "-log(p1) = " << p1 << ", -log(p2) = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.05 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
m = adapters::stationaryPoint(logNormal).first;
tail = maths_t::E_UndeterminedTail;
p1 = 1.0;
p2 = probabilityOfLessLikelySample(logNormal, m, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_EQUAL(p1, p2);
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
tail = maths_t::E_UndeterminedTail;
p1 = -std::log(numericalProbabilityOfLessLikelySample(logNormal, 12.0));
p2 = -std::log(probabilityOfLessLikelySample(logNormal, 12.0, tail));
LOG_DEBUG(<< "-log(p1) = " << p1 << ", -log(p2) = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
LOG_DEBUG(<< "******** log t ********");
{
double degreesFreedom[] = {1.0, 5.0, 50.0};
double locations[] = {-0.5, 0.1, 1.0, 5.0};
double scales[] = {0.1, 1.0, 5.0};
for (size_t i = 0; i < std::size(degreesFreedom); ++i) {
for (size_t j = 0; j < std::size(locations); ++j) {
for (size_t k = 0; k < std::size(scales); ++k) {
LOG_DEBUG(<< "**** v = " << degreesFreedom[i] << ", l = "
<< locations[j] << ", s = " << scales[k] << " ****");
CLogTDistribution logt(degreesFreedom[i], locations[j], scales[k]);
double m1 = mode(logt);
if (m1 == 0.0) {
// Monotone decreasing.
double x = std::exp(locations[j]) / 32.0;
tail = maths_t::E_UndeterminedTail;
for (int l = 0; l < 10; ++l) {
x *= 2.0;
p1 = cdfComplement(logt, x);
p2 = probabilityOfLessLikelySample(logt, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2));
}
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
continue;
}
double offset = m1;
for (int l = 0; l < 5; ++l) {
offset /= 2.0;
double x = m1 - offset;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(logt, x);
p2 = probabilityOfLessLikelySample(logt, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.02 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
x = m1 + offset;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(logt, x);
p2 = probabilityOfLessLikelySample(logt, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.02 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
double factor = 1.0;
for (int l = 0; l < 5; ++l) {
factor *= 2.0;
double x = m1 / factor;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(logt, x);
p2 = probabilityOfLessLikelySample(logt, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
x = m1 * factor;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(logt, x);
p2 = probabilityOfLessLikelySample(logt, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.05 * std::fabs(std::min(std::log(p1), std::log(p2)))));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
}
}
}
}
LOG_DEBUG(<< "******** gamma ********");
{
double shapes[] = {0.1, 1.0, 1.1, 100.0, 10000.0};
double scales[] = {0.0001, 0.01, 1.0, 10.0};
for (size_t i = 0; i < std::size(shapes); ++i) {
for (size_t j = 0; j < std::size(scales); ++j) {
LOG_DEBUG(<< "***** shape = " << shapes[i]
<< ", scale = " << scales[j] << " *****");
boost::math::gamma_distribution<> gamma(shapes[i], scales[j]);
if (shapes[i] <= 1.0) {
double x = boost::math::mean(gamma);
tail = maths_t::E_UndeterminedTail;
for (int k = 0; k < 10; ++k) {
x *= 2.0;
p1 = CTools::safeCdfComplement(gamma, x);
p2 = probabilityOfLessLikelySample(gamma, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
continue;
}
double m1 = boost::math::mode(gamma);
LOG_DEBUG(<< "mode = " << m1);
double offset = 1.0;
for (int k = 0; k < 5; ++k) {
offset /= 2.0;
double x = (1.0 - offset) * m1;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(gamma, x);
p2 = probabilityOfLessLikelySample(gamma, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
double y = (1.0 + offset) * m1;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(gamma, y);
p2 = probabilityOfLessLikelySample(gamma, y, tail);
LOG_TRACE(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
double factor = 1.0;
for (int k = 0; k < 5; ++k) {
factor *= 2.0;
double x = m1 / factor;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(gamma, x);
p2 = probabilityOfLessLikelySample(gamma, x, tail);
LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
double y = factor * m1;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(gamma, y);
p2 = probabilityOfLessLikelySample(gamma, y, tail);
LOG_TRACE(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2
<< ", log(p1) = " << std::log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <=
0.01 * std::fabs(std::min(std::log(p1), std::log(p2)))));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
}
}
}
LOG_DEBUG(<< "******** beta ********");
{
double alphas[] = {0.01, 0.98, 1.0, 1.01, 1000.0};
double betas[] = {0.01, 0.98, 1.0, 1.01, 1000.0};
for (size_t i = 0; i < std::size(alphas); ++i) {
for (size_t j = 0; j < std::size(betas); ++j) {
LOG_DEBUG(<< "**** alpha = " << alphas[i]
<< ", beta = " << betas[j] << " ****");
boost::math::beta_distribution<> beta(alphas[i], betas[j]);
if (alphas[i] == 1.0 && betas[j] == 1.0) {
// Constant.
for (int k = 0; k < 6; ++k) {
double x = static_cast<double>(k) / 5.0;
tail = maths_t::E_UndeterminedTail;
p1 = 1.0;
p2 = probabilityOfLessLikelySample(beta, x, tail);
LOG_TRACE(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x)
<< ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_EQUAL(p1, p2);
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
}
} else if (alphas[i] <= 1.0 && betas[j] >= 1.0) {
// Monotone decreasing.
for (int k = 0; k < 6; ++k) {
double x = static_cast<double>(k) / 5.0;
tail = maths_t::E_UndeterminedTail;
p1 = CTools::safeCdfComplement(beta, x);
p2 = probabilityOfLessLikelySample(beta, x, tail);
LOG_TRACE(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x)
<< ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
}
} else if (alphas[i] >= 1.0 && betas[j] <= 1.0) {
// Monotone increasing.
for (int k = 0; k < 6; ++k) {
double x = static_cast<double>(k) / 5.0;
tail = maths_t::E_UndeterminedTail;
p1 = CTools::safeCdf(beta, x);
p2 = probabilityOfLessLikelySample(beta, x, tail);
LOG_TRACE(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x)
<< ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2));
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
}
} else {
double stationaryPoint = adapters::stationaryPoint(beta).first;
bool maximum = adapters::stationaryPoint(beta).second;
LOG_DEBUG(<< "stationary point = " << stationaryPoint);
double epsMinus = stationaryPoint;
double epsPlus = 1.0 - stationaryPoint;
for (int k = 0; k < 5; ++k) {
epsMinus /= 2.0;
double xMinus = stationaryPoint - epsMinus;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(beta, xMinus);
p2 = probabilityOfLessLikelySample(beta, xMinus, tail);
LOG_TRACE(<< "x- = " << xMinus << ", p1 = " << p1
<< ", p2 = " << p2 << ", log(p1) = " << log(p1)
<< ", log(p2) = " << std::log(p2));
BOOST_TEST_REQUIRE(
(std::fabs(p1 - p2) <= 0.05 * std::max(p1, p2) ||
std::fabs(std::log(p1) - std::log(p2)) <
0.25 * std::fabs(std::min(std::log(p1), std::log(p2)))));
if (maximum)
BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail);
if (!maximum)
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
epsPlus /= 2.0;
double xPlus = stationaryPoint + epsPlus;
tail = maths_t::E_UndeterminedTail;
p1 = numericalProbabilityOfLessLikelySample(beta, xPlus);
p2 = probabilityOfLessLikelySample(beta, xPlus, tail);
LOG_TRACE(<< "x+ = " << xPlus << ", p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.05 * std::max(p1, p2));
if (maximum)
BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail);
if (!maximum)
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
}
}
}
}
}
// Special cases:
{
// x at maximum
boost::math::beta_distribution<> beta(2.0, 8.0);
double mode = adapters::stationaryPoint(beta).first;
tail = maths_t::E_UndeterminedTail;
p1 = 1.0;
p2 = probabilityOfLessLikelySample(beta, mode, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_EQUAL(p1, p2);
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
}
{
// x at minimum
boost::math::beta_distribution<> beta(0.6, 0.3);
double mode = adapters::stationaryPoint(beta).first;
tail = maths_t::E_UndeterminedTail;
p1 = 0.0;
p2 = probabilityOfLessLikelySample(beta, mode, tail);
LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2);
BOOST_REQUIRE_EQUAL(p1, p2);
BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail);
}
}
BOOST_AUTO_TEST_CASE(testIntervalExpectation) {
// We check the expectations agree with numerical integration
// and also some corner cases. Specifically, that we handle
// +/- infinity correctly and the also the case that a and b
// are very close.
maths::common::CTools::SIntervalExpectation expectation;
double expected;
double actual;
LOG_DEBUG(<< "*** Normal ***");
{
boost::math::normal_distribution<> normal(10.0, 5.0);
expected = numericalIntervalExpectation(normal, 0.0, 12.0);
actual = expectation(normal, 0.0, 12.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
expected = numericalIntervalExpectation(normal, -40.0, 13.0);
actual = expectation(normal, std::numeric_limits<double>::lowest(), 13.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
expected = 7.0;
actual = expectation(normal, 7.0, 7.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected);
expected = 8.1;
actual = expectation(normal, 8.1,
8.1 * (1.0 + std::numeric_limits<double>::epsilon()));
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected);
}
LOG_DEBUG(<< "*** Log-Normal ***");
{
boost::math::lognormal_distribution<> logNormal(1.5, 0.8);
expected = numericalIntervalExpectation(logNormal, 0.5, 7.0);
actual = expectation(logNormal, 0.5, 7.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
expected = numericalIntervalExpectation(logNormal, 0.0, 9.0);
actual = expectation(logNormal, std::numeric_limits<double>::lowest(), 9.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
expected = 6.0;
actual = expectation(logNormal, 6.0, 6.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected);
expected = 8.1;
actual = expectation(logNormal, 8.1,
8.1 * (1.0 + std::numeric_limits<double>::epsilon()));
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected);
}
LOG_DEBUG(<< "*** Gamma ***");
{
boost::math::gamma_distribution<> gamma(5.0, 3.0);
expected = numericalIntervalExpectation(gamma, 0.5, 4.0);
actual = expectation(gamma, 0.5, 4.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
expected = numericalIntervalExpectation(gamma, 0.0, 5.0);
actual = expectation(gamma, std::numeric_limits<double>::lowest(), 5.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
expected = 6.0;
actual = expectation(gamma, 6.0, 6.0);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected);
expected = 8.1;
actual = expectation(gamma, 8.1,
8.1 * (1.0 + std::numeric_limits<double>::epsilon()));
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected);
}
}
BOOST_AUTO_TEST_CASE(testIntervalExpectationNumericIssues) {
// We check some limiting cases which lead to numerically challenging
// cases. These can easily generate NaNs if not handled carefully.
maths::common::CTools::SIntervalExpectation expectation;
LOG_DEBUG(<< "*** Log-Normal ***");
{
for (double a : {0.01, 1e-4, 1e-8, 1e-16, 1e-32, 0.0}) {
for (double b : {0.01, 0.1}) {
for (double location : {-1.0, 1.0}) {
for (double scale : {0.1, 2.0}) {
boost::math::lognormal logNormal(location, scale);
double actual = expectation(logNormal, a, b);
double expected =
numericalIntervalExpectation(logNormal, a, b, 100);
if (maths::common::CMathsFuncs::isNan(expected) == false) {
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE(actual, expected, 1.0 /* percent */);
}
}
}
}
}
}
LOG_DEBUG(<< "*** Gamma ***");
{
for (double a : {0.01, 1e-4, 1e-8, 1e-16, 1e-32, 0.0}) {
for (double b : {0.01, 0.1}) {
for (double shape : {1.0, 5.0}) {
for (double rate : {1.0, 0.2}) {
boost::math::gamma_distribution<> gamma(
0.236554 * shape, 1.0 / 36.9769 / rate);
double actual = expectation(gamma, a, b);
double expected = numericalIntervalExpectation(gamma, a, b, 100);
LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual);
BOOST_REQUIRE_CLOSE(actual, expected, 20.0 /* percent */);
}
}
}
}
}
}
BOOST_AUTO_TEST_CASE(testMixtureProbabilityOfLessLikelySample) {
using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
test::CRandomNumbers rng;
std::size_t n = 500;
TDoubleVec x;
rng.generateUniformSamples(0.0, 1000.0, 20, x);
TDoubleVec means;
rng.generateUniformSamples(200.0, 800.0, n, means);
TDoubleVec sd;
rng.generateUniformSamples(0.1, 50.0, n, sd);
TDoubleVec weights;
rng.generateUniformSamples(1.0, 10.0, n, weights);
for (std::size_t i = 4; i <= 20; i += 4) {
LOG_DEBUG(<< "*** modes = " << i << " ***");
TMeanAccumulator meanError;
TMeanAccumulator meanLogError;
for (std::size_t j = 0; j < n - i; j += i) {
TDoubleVec modeWeights;
std::vector<boost::math::normal> modes;
double a = std::numeric_limits<double>::max();
double b = -std::numeric_limits<double>::max();
for (std::size_t k = 0; k < i; ++k) {
modeWeights.push_back(weights[j + k]);
modes.push_back(boost::math::normal(means[j + k], sd[j + k]));
a = std::min(a, means[j + k]);
b = std::max(b, means[j + k]);
}
maths::common::CMixtureDistribution<boost::math::normal> mixture(modeWeights, modes);
for (std::size_t k = 0; k < x.size(); ++k) {
double logFx = maths::common::pdf(mixture, x[k]);
if (logFx == 0.0) {
logFx = 10.0 * core::constants::LOG_MIN_DOUBLE;
} else {
logFx = std::log(logFx);
}
maths::common::CTools::CMixtureProbabilityOfLessLikelySample calculator(
i, x[k], logFx, a, b);
for (std::size_t l = 0; l < modeWeights.size(); ++l) {
calculator.addMode((mixture.weights())[l],
boost::math::mean(modes[l]),
boost::math::standard_deviation(modes[l]));
}
double pTails = 0.0;
CLogPdf<boost::math::normal> logPdf(mixture);
maths::common::CEqualWithTolerance<double> equal(
maths::common::CToleranceTypes::E_AbsoluteTolerance, 0.5);
double xleft;
BOOST_TEST_REQUIRE(calculator.leftTail(logPdf, 10, equal, xleft));
pTails += maths::common::cdf(mixture, xleft);
double xright;
BOOST_TEST_REQUIRE(calculator.rightTail(logPdf, 10, equal, xright));
pTails += maths::common::cdfComplement(mixture, xright);
double p = pTails + calculator.calculate(logPdf, pTails);
double pExpected = pTails;
CTruncatedPdf<boost::math::normal> pdf(mixture, std::exp(logFx));
for (double xi = a, l = 0, step = 0.5 * (b - a) / std::floor(b - a);
l < 2 * static_cast<std::size_t>(b - a); xi += step, ++l) {
double pi;
maths::common::CIntegration::gaussLegendre<maths::common::CIntegration::OrderThree>(
pdf, xi, xi + step, pi);
pExpected += pi;
}
LOG_TRACE(<< "pTails = " << pTails);
LOG_TRACE(<< "x = " << x[k] << ", log(f(x)) = " << logFx
<< ", P(x) = " << p << ", expected P(x) = " << pExpected);
BOOST_TEST_REQUIRE(pExpected > 0.0);
if (pExpected > 0.1) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(pExpected, p, 0.12);
} else if (pExpected > 1e-10) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(std::log(pExpected), std::log(p),
0.15 * std::fabs(std::log(pExpected)));
} else {
BOOST_REQUIRE_CLOSE_ABSOLUTE(std::log(pExpected), std::log(p),
0.015 * std::fabs(std::log(pExpected)));
}
meanError.add(std::fabs(p - pExpected));
meanLogError.add(std::fabs(std::log(p) - std::log(pExpected)) /
std::max(std::fabs(std::log(pExpected)),
std::fabs(std::log(p))));
}
}
LOG_DEBUG(<< "meanError = " << maths::common::CBasicStatistics::mean(meanError));
LOG_DEBUG(<< "meanLogError = " << maths::common::CBasicStatistics::mean(meanLogError));
BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.005);
BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanLogError) < 0.03);
}
}
BOOST_AUTO_TEST_CASE(testAnomalyScore) {
// Test p = inverseAnomalyScore(anomalyScore(p))
double p = 0.04;
for (std::size_t i = 0; i < 305; ++i, p *= 0.1) {
double anomalyScore = CTools::anomalyScore(p);
LOG_DEBUG(<< "p = " << p << ", anomalyScore = " << anomalyScore);
BOOST_REQUIRE_CLOSE_ABSOLUTE(p, CTools::inverseAnomalyScore(anomalyScore), 1e-3 * p);
}
}
BOOST_AUTO_TEST_CASE(testFastLog) {
test::CRandomNumbers rng;
// Small
{
TDoubleVec x;
rng.generateUniformSamples(-100.0, 0.0, 10000, x);
for (std::size_t i = 0; i < x.size(); ++i) {
LOG_TRACE(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] << ", fast log(x) = "
<< maths::common::CTools::fastLog(std::exp(x[i])));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
x[i], maths::common::CTools::fastLog(std::exp(x[i])), 5e-5);
}
}
// Mid
{
TDoubleVec x;
rng.generateUniformSamples(1.0, 1e6, 10000, x);
for (std::size_t i = 0; i < x.size(); ++i) {
LOG_TRACE(<< "x = " << x[i] << ", log(x) = " << std::log(x[i])
<< ", fast log(x) = " << maths::common::CTools::fastLog(x[i]));
BOOST_REQUIRE_CLOSE_ABSOLUTE(std::log(x[i]),
maths::common::CTools::fastLog(x[i]), 5e-5);
}
}
// Large
{
TDoubleVec x;
rng.generateUniformSamples(20.0, 80.0, 10000, x);
for (std::size_t i = 0; i < x.size(); ++i) {
LOG_TRACE(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] << ", fast log(x) = "
<< maths::common::CTools::fastLog(std::exp(x[i])));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
x[i], maths::common::CTools::fastLog(std::exp(x[i])), 5e-5);
}
}
}
BOOST_AUTO_TEST_CASE(testMiscellaneous) {
double x_[] = {0.0, 3.2, 2.1, -1.8, 4.5};
maths::common::CVectorNx1<double, 5> x(x_, x_ + 5);
maths::common::CVectorNx1<double, 5> a(-2.0);
maths::common::CVectorNx1<double, 5> b(5.0);
double expected[][5] = {
{0.0, 3.2, 2.1, -1.8, 4.5}, {0.0, 3.2, 2.1, -1.5, 4.5},
{0.0, 3.2, 2.1, -1.0, 4.0}, {0.0, 3.2, 2.1, -0.5, 3.5},
{0.0, 3.0, 2.1, 0.0, 3.0}, {0.5, 2.5, 2.1, 0.5, 2.5},
{1.0, 2.0, 2.0, 1.0, 2.0}, {1.5, 1.5, 1.5, 1.5, 1.5}};
for (std::size_t i = 0; a <= b; ++i) {
maths::common::CVectorNx1<double, 5> expect(expected[i]);
maths::common::CVectorNx1<double, 5> actual =
maths::common::CTools::truncate(x, a, b);
BOOST_REQUIRE_EQUAL(expect, actual);
a = a + maths::common::CVectorNx1<double, 5>(0.5);
b = b - maths::common::CVectorNx1<double, 5>(0.5);
}
}
BOOST_AUTO_TEST_CASE(testLgamma) {
std::array<double, 8> testData = {{3.5, 0.125, -0.125, 0.000244140625, 1.3552527156068805e-20,
4.95547e+25, 5.01753e+25, 8.64197e+25}};
std::array<double, 8> expectedData = {
{1.2009736023470742248160218814507129957702389154682,
2.0194183575537963453202905211670995899482809521344,
2.1653002489051702517540619481440174064962195287626,
8.3176252939431805089043336920440196990966796875000,
45.7477139169563926657247066032141447067260742187500,
2.882355039447984e+27, 2.919076782442754e+27, 5.074673490557339e+27}};
for (std::size_t i = 0; i < testData.size(); ++i) {
double actual;
double expected = expectedData[i];
BOOST_TEST_REQUIRE(maths::common::CTools::lgamma(testData[i], actual, true));
BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected);
}
double result;
BOOST_TEST_REQUIRE(maths::common::CTools::lgamma(0, result, false));
BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(0, result, true) == false));
BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(-1, result, false)));
BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(-1, result, true) == false));
BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(
std::numeric_limits<double>::max() - 1, result, false)));
BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(std::numeric_limits<double>::max() - 1,
result, true) == false));
BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
}
BOOST_AUTO_TEST_CASE(testSoftmax) {
// Test some invariants and that std::vector and maths::common::CDenseVector versions agree.
using TDoubleVector = maths::common::CDenseVector<double>;
test::CRandomNumbers rng;
TDoubleVec z;
for (std::size_t t = 0; t < 100; ++t) {
rng.generateUniformSamples(-3.0, 3.0, 5, z);
TDoubleVec p{z};
CTools::inplaceSoftmax(p);
BOOST_REQUIRE_CLOSE(1.0, std::accumulate(p.begin(), p.end(), 0.0), 1e-6);
BOOST_TEST_REQUIRE(*std::min_element(p.begin(), p.end()) >= 0.0);
TDoubleVector p_{TDoubleVector::fromStdVector(z)};
CTools::inplaceSoftmax(p_);
for (std::size_t i = 0; i < 5; ++i) {
BOOST_REQUIRE_CLOSE(p[i], p_[i], 1e-6);
}
}
}
BOOST_AUTO_TEST_CASE(testLogSoftmax) {
// Test some invariants and that std::vector and maths::common::CDenseVector versions agree.
using TDoubleVector = maths::common::CDenseVector<double>;
test::CRandomNumbers rng;
TDoubleVec z;
for (std::size_t t = 0; t < 100; ++t) {
rng.generateUniformSamples(-3.0, 3.0, 5, z);
TDoubleVec p{z};
CTools::inplaceSoftmax(p);
TDoubleVec logP{z};
CTools::inplaceLogSoftmax(logP);
BOOST_TEST_REQUIRE(*std::max_element(logP.begin(), logP.end()) <= 0.0);
for (std::size_t i = 0; i < 5; ++i) {
BOOST_REQUIRE_CLOSE(std::log(p[i]), logP[i], 1e-6);
}
TDoubleVector logP_{TDoubleVector::fromStdVector(z)};
CTools::inplaceLogSoftmax(logP_);
for (std::size_t i = 0; i < 5; ++i) {
BOOST_REQUIRE_CLOSE(logP[i], logP_[i], 1e-6);
}
}
}
BOOST_AUTO_TEST_CASE(testStable) {
// Test the bit representation of the stable log and exp of some random values.
// This will fail if they're different on any of our target platforms.
BOOST_REQUIRE_EQUAL("1001000010101011111010010001000011111110001100011111001110111111",
printBits(CTools::stableLog(0.301283021)));
BOOST_REQUIRE_EQUAL("1001010011010100101100111011101001100110101100010010110101000000",
printBits(CTools::stableLog(2803801.9332)));
BOOST_REQUIRE_EQUAL("1100010011111111100101000001110111100000001011010001001101000000",
printBits(CTools::stableLog(120.880233)));
BOOST_REQUIRE_EQUAL("1110111001101001111010100000001100101010100100110000011001000000",
printBits(CTools::stableLog(16.808042323)));
BOOST_REQUIRE_EQUAL("1000000001000110101011010011001110111111101111000001000101000000",
printBits(CTools::stableExp(1.48937498342)));
BOOST_REQUIRE_EQUAL("1101111011000110011111000001010001101011111111000011000001000000",
printBits(CTools::stableExp(2.832390)));
BOOST_REQUIRE_EQUAL("0101111001100101111110101101101100100100111001101001001100111111",
printBits(CTools::stableExp(-3.94080233)));
BOOST_REQUIRE_EQUAL("1111100000011010010001001000000100011011010100100000010101000000",
printBits(CTools::stableExp(0.98023840)));
}
BOOST_AUTO_TEST_SUITE_END()