lib/maths/common/unittest/CToolsTest.cc (1,049 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <core/CLogger.h> #include <maths/common/CCompositeFunctions.h> #include <maths/common/CIntegration.h> #include <maths/common/CLinearAlgebra.h> #include <maths/common/CLinearAlgebraEigen.h> #include <maths/common/CLinearAlgebraTools.h> #include <maths/common/CLogTDistribution.h> #include <maths/common/CMathsFuncs.h> #include <maths/common/CTools.h> #include <maths/common/CToolsDetail.h> #include <test/BoostTestCloseAbsolute.h> #include <test/CRandomNumbers.h> #include <boost/math/concepts/real_concept.hpp> #include <boost/math/distributions/beta.hpp> #include <boost/math/distributions/negative_binomial.hpp> #include <boost/math/distributions/students_t.hpp> #include <boost/test/unit_test.hpp> #include <array> #include <bitset> #include <numeric> BOOST_AUTO_TEST_SUITE(CToolsTest) using namespace ml; using namespace maths; using namespace common; using namespace test; namespace { using TDoubleDoublePr = std::pair<double, double>; using TDoubleBoolPr = std::pair<double, bool>; using TDoubleVec = std::vector<double>; namespace adapters { template<typename DISTRIBUTION> bool isDiscrete(const DISTRIBUTION&) { return false; } bool isDiscrete(const boost::math::negative_binomial_distribution<>&) { return true; } template<typename DISTRIBUTION> TDoubleDoublePr support(const DISTRIBUTION& distribution) { return boost::math::support(distribution); } TDoubleDoublePr support(const CLogTDistribution& logt) { CLogTDistribution::TOptionalDouble minimum = localMinimum(logt); return TDoubleDoublePr(minimum ? *minimum : 0.0, boost::math::tools::max_value<double>()); } template<typename DISTRIBUTION> TDoubleBoolPr stationaryPoint(const DISTRIBUTION& distribution) { return TDoubleBoolPr(boost::math::mode(distribution), true); } TDoubleBoolPr stationaryPoint(const CLogTDistribution& logt) { return TDoubleBoolPr(ml::maths::common::mode(logt), true); } TDoubleBoolPr stationaryPoint(const boost::math::beta_distribution<>& beta) { if (beta.alpha() < 1.0 && beta.beta() < 1.0) { return TDoubleBoolPr((beta.alpha() - 1.0) / (beta.alpha() + beta.beta() - 2.0), false); } return TDoubleBoolPr(boost::math::mode(beta), true); } template<typename DISTRIBUTION> double pdf(const DISTRIBUTION& distribution, const double& x) { return CTools::safePdf(distribution, x); } double pdf(const CLogTDistribution& logt, const double& x) { return ml::maths::common::pdf(logt, x); } template<typename DISTRIBUTION> double cdf(const DISTRIBUTION& distribution, const double& x) { return CTools::safeCdf(distribution, x); } double cdf(const CLogTDistribution& logt, const double& x) { return ml::maths::common::cdf(logt, x); } template<typename DISTRIBUTION> double cdfComplement(const DISTRIBUTION& distribution, const double& x) { return CTools::safeCdfComplement(distribution, x); } double cdfComplement(const CLogTDistribution& logt, const double& x) { return ml::maths::common::cdfComplement(logt, x); } } // adapters:: template<typename DISTRIBUTION> double numericalProbabilityOfLessLikelySampleImpl(const DISTRIBUTION& distribution, double x) { TDoubleBoolPr stationaryPoint = adapters::stationaryPoint(distribution); double eps = 1e-8; double pdf = adapters::pdf(distribution, x); LOG_TRACE(<< "x = " << x << ", f(x) = " << pdf << ", stationaryPoint = " << stationaryPoint.first); double x1 = stationaryPoint.first; if (x > stationaryPoint.first) { // Search for lower bound. double minX = adapters::support(distribution).first + eps; for (double increment = std::max(x1 / 2.0, 1.0); x1 > minX && ((stationaryPoint.second && adapters::pdf(distribution, x1) > pdf) || (!stationaryPoint.second && adapters::pdf(distribution, x1) < pdf)); x1 = std::max(x1 - increment, minX), increment *= 2.0) { // Empty. } } double x2 = stationaryPoint.first; if (x < stationaryPoint.first) { // Search for upper bound. double maxX = adapters::support(distribution).second - eps; for (double increment = std::max(x2 / 2.0, 1.0); x2 < maxX && ((stationaryPoint.second && adapters::pdf(distribution, x2) > pdf) || (!stationaryPoint.second && adapters::pdf(distribution, x2) < pdf)); x2 = std::min(x2 + increment, maxX), increment *= 2.0) { // Empty. } } LOG_TRACE(<< "1) x1 = " << x1 << ", x2 = " << x2); if (adapters::pdf(distribution, x1) > adapters::pdf(distribution, x2)) { std::swap(x1, x2); } LOG_TRACE(<< "2) x1 = " << x1 << ", x2 = " << x2); // Binary search. while (std::fabs(x1 - x2) > eps) { double x3 = (x1 + x2) / 2.0; if (adapters::pdf(distribution, x3) > pdf) { x2 = x3; } else { x1 = x3; } } LOG_TRACE(<< "3) x1 = " << x1 << ", x2 = " << x2); double y = (x > (x1 + x2) / 2.0) ? std::min(x1, x2) : std::max(x1, x2); if (x > y) { std::swap(x, y); } LOG_TRACE(<< "x = " << x << ", y = " << y << ", f(x) = " << adapters::pdf(distribution, x) << ", f(y) = " << adapters::pdf(distribution, y)); if (stationaryPoint.second) { double cdfy = adapters::cdfComplement(distribution, y) + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); double cdfx = adapters::cdf(distribution, x); LOG_TRACE(<< "F(x) = " << cdfx << ", 1 - F(y) = " << cdfy); return cdfx + cdfy; } double cdfy = adapters::cdf(distribution, y) + (adapters::isDiscrete(distribution) ? adapters::pdf(distribution, y) : 0.0); double cdfx = adapters::cdf(distribution, x); LOG_TRACE(<< "F(x) = " << cdfx << ", F(y) = " << cdfy); return cdfy - cdfx; } template<typename DISTRIBUTION> double numericalProbabilityOfLessLikelySample(const DISTRIBUTION& distribution, double x) { return numericalProbabilityOfLessLikelySampleImpl(distribution, x); } double numericalProbabilityOfLessLikelySample(const boost::math::negative_binomial_distribution<>& negativeBinomial, double x) { double fx = CTools::safePdf(negativeBinomial, x); double m = boost::math::mode(negativeBinomial); double fm = CTools::safePdf(negativeBinomial, m); if (fx >= fm) { return 1.0; } double f0 = CTools::safePdf(negativeBinomial, 0.0); if (x > m && fx < f0) { return CTools::safeCdfComplement(negativeBinomial, x) + CTools::safePdf(negativeBinomial, x); } return numericalProbabilityOfLessLikelySampleImpl(negativeBinomial, x); } double numericalProbabilityOfLessLikelySample(const CLogTDistribution& logt, double x) { // We need special handling for the case that the p.d.f. is // single sided and if it is greater at x than the local "mode". double m = mode(logt); if (m == 0.0) { return cdfComplement(logt, x); } double fx = pdf(logt, x); double fm = pdf(logt, m); if (fx > fm) { return cdfComplement(logt, x); } CLogTDistribution::TOptionalDouble xmin = localMinimum(logt); if (xmin && (pdf(logt, *xmin) > fx || *xmin == m)) { return cdfComplement(logt, x); } return numericalProbabilityOfLessLikelySampleImpl(logt, x); } double numericalProbabilityOfLessLikelySample(const boost::math::beta_distribution<>& beta, double x) { // We need special handling of the case that the equal p.d.f. // point is very close to 0 or 1. double fx = CTools::safePdf(beta, x); double a = beta.alpha(); double b = beta.beta(); double xmin = 1000.0 * std::numeric_limits<double>::min(); if (a >= 1.0 && fx < CTools::safePdf(beta, xmin)) { return std::exp(a * std::log(xmin) - std::log(a) + std::lgamma(a + b) - std::lgamma(a) - std::lgamma(b)) + CTools::safeCdfComplement(beta, x); } double xmax = 1.0 - std::numeric_limits<double>::epsilon(); if (b >= 1.0 && fx < CTools::safePdf(beta, xmax)) { double y = std::exp(std::log(boost::math::beta(a, b) * fx) / b); return std::pow(y, b) / b / boost::math::beta(a, b) + CTools::safeCdf(beta, x); } return numericalProbabilityOfLessLikelySampleImpl(beta, x); } template<typename DISTRIBUTION> class CPdf { public: CPdf(const DISTRIBUTION& distribution) : m_Distribution(distribution) {} bool operator()(double x, double& result) const { result = boost::math::pdf(m_Distribution, x); return true; } private: DISTRIBUTION m_Distribution; }; class CIdentity { public: bool operator()(double x, double& result) const { result = x; return true; } }; template<typename DISTRIBUTION> double numericalIntervalExpectation(const DISTRIBUTION& distribution, double a, double b, std::size_t steps = 10) { if (a == b) { return b; } double numerator = 0.0; double denominator = 0.0; CPdf<DISTRIBUTION> fx(distribution); maths::common::CCompositeFunctions::CProduct<CPdf<DISTRIBUTION>, CIdentity> xfx(fx); double dx = (b - a) / static_cast<double>(steps); for (std::size_t i = 0; i < steps; ++i, a += dx) { double fxi; BOOST_TEST_REQUIRE( maths::common::CIntegration::gaussLegendre<maths::common::CIntegration::OrderFive>( fx, a, a + dx, fxi)); double xfxi; BOOST_TEST_REQUIRE( maths::common::CIntegration::gaussLegendre<maths::common::CIntegration::OrderFive>( xfx, a, a + dx, xfxi)); numerator += xfxi; denominator += fxi; } return numerator / denominator; } template<typename T> class CTruncatedPdf { public: CTruncatedPdf(const maths::common::CMixtureDistribution<T>& mixture, double cutoff) : m_Mixture(mixture), m_Cutoff(cutoff) {} bool operator()(double x, double& fx) const { fx = maths::common::pdf(m_Mixture, x); if (fx > m_Cutoff) { fx = 0.0; } return true; } private: const maths::common::CMixtureDistribution<T>& m_Mixture; double m_Cutoff; }; template<typename T> class CLogPdf { public: CLogPdf(const maths::common::CMixtureDistribution<T>& mixture) : m_Mixture(mixture) {} double operator()(double x) const { return std::log(maths::common::pdf(m_Mixture, x)); } bool operator()(double x, double& fx) const { fx = std::log(maths::common::pdf(m_Mixture, x)); return true; } private: const maths::common::CMixtureDistribution<T>& m_Mixture; }; void printByte(const unsigned char& byte, std::ostream& o) { o << std::bitset<CHAR_BIT>{byte}; } template<typename T> std::string printBits(const T& t) { std::ostringstream o; const unsigned char* byte{reinterpret_cast<const unsigned char*>(&t)}; for (std::size_t s = 0; s < sizeof(t); ++s, ++byte) { printByte(*byte, o); } return o.str(); } } BOOST_AUTO_TEST_CASE(testProbabilityOfLessLikelySample) { // The probability of a lower likelihood sample x from a single // mode distribution is: // F(a) + 1 - F(b) // // where, // a satisfies f(a) = f(x) and a < m, // b satisfies f(b) = f(x) and b > m, // F(.) denotes the c.d.f. of the distribution, // f(.) denotes the p.d.f. of the distribution and // m is the mode of the distribution. // // We'll use this relation to test our probabilities. CTools::CProbabilityOfLessLikelySample probabilityOfLessLikelySample(maths_t::E_TwoSided); double p1, p2; maths_t::ETail tail = maths_t::E_UndeterminedTail; double m; LOG_DEBUG(<< "******** normal ********"); boost::math::normal_distribution<> normal(3.0, 5.0); p1 = numericalProbabilityOfLessLikelySample(normal, -1.0); p2 = probabilityOfLessLikelySample(normal, -1.0, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); m = adapters::stationaryPoint(normal).first; tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(normal, m, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_EQUAL(p1, p2); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(normal, 8.0); p2 = probabilityOfLessLikelySample(normal, 8.0, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); LOG_DEBUG(<< "******** student's t ********"); boost::math::students_t students(2.0); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(students, -4.0); p2 = probabilityOfLessLikelySample(students, -4.0, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); m = adapters::stationaryPoint(students).first; tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(students, m, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_EQUAL(p1, p2); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(students, 3.0); p2 = probabilityOfLessLikelySample(students, 3.0, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); LOG_DEBUG(<< "******** negative binomial ********"); { double successFraction[] = {0.5, 1.0, 10.0, 100.0, 1000.0}; double successProbability[] = {1e-3, 0.25, 0.5, 0.75, 1.0 - 1e-3}; for (size_t i = 0; i < std::size(successFraction); ++i) { for (size_t j = 0; j < std::size(successProbability); ++j) { LOG_DEBUG(<< "**** r = " << successFraction[i] << ", p = " << successProbability[j] << " ****"); boost::math::negative_binomial_distribution<> negativeBinomial( successFraction[i], successProbability[j]); if (successFraction[i] <= 1.0) { // Monotone decreasing. double x = std::fabs(std::log(successProbability[j])); for (int l = 0; l < 10; ++l) { tail = maths_t::E_UndeterminedTail; x = std::floor(2.0 * x + 0.5); p1 = CTools::safeCdfComplement(negativeBinomial, x) + CTools::safePdf(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } continue; } double m1 = boost::math::mode(negativeBinomial); BOOST_REQUIRE_EQUAL( 1.0, probabilityOfLessLikelySample(negativeBinomial, m1, tail)); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); double offset = m1; for (int l = 0; l < 5; ++l) { offset /= 2.0; double x = std::floor(m1 - offset); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.02 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.02 * std::fabs(std::min(std::log(p1), std::log(p2))))); if (offset > 0.0) BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); if (offset == 0.0) BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); x = std::ceil(m1 + offset); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.02 * std::max(p1, p2)); if (offset > 0.0) BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); if (offset == 0.0) BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } double factor = 1.0; for (int l = 0; l < 5; ++l) { factor *= 2.0; double x = std::floor(m1 / factor); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2))))); if (x != m1) BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); if (x == m1) BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); x = std::ceil(m1 * factor); tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(negativeBinomial, x); p2 = probabilityOfLessLikelySample(negativeBinomial, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2))))); if (x != m1) BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); if (x == m1) BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } } } } // Note when we are testing the tails of the distribution we // use the relative error in the log. LOG_DEBUG(<< "******** log normal ********"); boost::math::lognormal_distribution<> logNormal(1.0, 0.5); tail = maths_t::E_UndeterminedTail; p1 = -std::log(numericalProbabilityOfLessLikelySample(logNormal, 0.3)); p2 = -std::log(probabilityOfLessLikelySample(logNormal, 0.3, tail)); LOG_DEBUG(<< "-log(p1) = " << p1 << ", -log(p2) = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.05 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); m = adapters::stationaryPoint(logNormal).first; tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(logNormal, m, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_EQUAL(p1, p2); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); tail = maths_t::E_UndeterminedTail; p1 = -std::log(numericalProbabilityOfLessLikelySample(logNormal, 12.0)); p2 = -std::log(probabilityOfLessLikelySample(logNormal, 12.0, tail)); LOG_DEBUG(<< "-log(p1) = " << p1 << ", -log(p2) = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.01 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); LOG_DEBUG(<< "******** log t ********"); { double degreesFreedom[] = {1.0, 5.0, 50.0}; double locations[] = {-0.5, 0.1, 1.0, 5.0}; double scales[] = {0.1, 1.0, 5.0}; for (size_t i = 0; i < std::size(degreesFreedom); ++i) { for (size_t j = 0; j < std::size(locations); ++j) { for (size_t k = 0; k < std::size(scales); ++k) { LOG_DEBUG(<< "**** v = " << degreesFreedom[i] << ", l = " << locations[j] << ", s = " << scales[k] << " ****"); CLogTDistribution logt(degreesFreedom[i], locations[j], scales[k]); double m1 = mode(logt); if (m1 == 0.0) { // Monotone decreasing. double x = std::exp(locations[j]) / 32.0; tail = maths_t::E_UndeterminedTail; for (int l = 0; l < 10; ++l) { x *= 2.0; p1 = cdfComplement(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2)); } BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); continue; } double offset = m1; for (int l = 0; l < 5; ++l) { offset /= 2.0; double x = m1 - offset; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.02 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); x = m1 + offset; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.02 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } double factor = 1.0; for (int l = 0; l < 5; ++l) { factor *= 2.0; double x = m1 / factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2))))); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); x = m1 * factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(logt, x); p2 = probabilityOfLessLikelySample(logt, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.01 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.05 * std::fabs(std::min(std::log(p1), std::log(p2))))); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } } } } } LOG_DEBUG(<< "******** gamma ********"); { double shapes[] = {0.1, 1.0, 1.1, 100.0, 10000.0}; double scales[] = {0.0001, 0.01, 1.0, 10.0}; for (size_t i = 0; i < std::size(shapes); ++i) { for (size_t j = 0; j < std::size(scales); ++j) { LOG_DEBUG(<< "***** shape = " << shapes[i] << ", scale = " << scales[j] << " *****"); boost::math::gamma_distribution<> gamma(shapes[i], scales[j]); if (shapes[i] <= 1.0) { double x = boost::math::mean(gamma); tail = maths_t::E_UndeterminedTail; for (int k = 0; k < 10; ++k) { x *= 2.0; p1 = CTools::safeCdfComplement(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } continue; } double m1 = boost::math::mode(gamma); LOG_DEBUG(<< "mode = " << m1); double offset = 1.0; for (int k = 0; k < 5; ++k) { offset /= 2.0; double x = (1.0 - offset) * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2))))); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); double y = (1.0 + offset) * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, y); p2 = probabilityOfLessLikelySample(gamma, y, tail); LOG_TRACE(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.06 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2))))); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } double factor = 1.0; for (int k = 0; k < 5; ++k) { factor *= 2.0; double x = m1 / factor; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, x); p2 = probabilityOfLessLikelySample(gamma, x, tail); LOG_TRACE(<< "x = " << x << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2))))); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); double y = factor * m1; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(gamma, y); p2 = probabilityOfLessLikelySample(gamma, y, tail); LOG_TRACE(<< "y = " << y << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << std::log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.1 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) <= 0.01 * std::fabs(std::min(std::log(p1), std::log(p2))))); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } } } } LOG_DEBUG(<< "******** beta ********"); { double alphas[] = {0.01, 0.98, 1.0, 1.01, 1000.0}; double betas[] = {0.01, 0.98, 1.0, 1.01, 1000.0}; for (size_t i = 0; i < std::size(alphas); ++i) { for (size_t j = 0; j < std::size(betas); ++j) { LOG_DEBUG(<< "**** alpha = " << alphas[i] << ", beta = " << betas[j] << " ****"); boost::math::beta_distribution<> beta(alphas[i], betas[j]); if (alphas[i] == 1.0 && betas[j] == 1.0) { // Constant. for (int k = 0; k < 6; ++k) { double x = static_cast<double>(k) / 5.0; tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(beta, x, tail); LOG_TRACE(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_EQUAL(p1, p2); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } } else if (alphas[i] <= 1.0 && betas[j] >= 1.0) { // Monotone decreasing. for (int k = 0; k < 6; ++k) { double x = static_cast<double>(k) / 5.0; tail = maths_t::E_UndeterminedTail; p1 = CTools::safeCdfComplement(beta, x); p2 = probabilityOfLessLikelySample(beta, x, tail); LOG_TRACE(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); } } else if (alphas[i] >= 1.0 && betas[j] <= 1.0) { // Monotone increasing. for (int k = 0; k < 6; ++k) { double x = static_cast<double>(k) / 5.0; tail = maths_t::E_UndeterminedTail; p1 = CTools::safeCdf(beta, x); p2 = probabilityOfLessLikelySample(beta, x, tail); LOG_TRACE(<< "x = " << x << ", f(x) = " << CTools::safePdf(beta, x) << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 1e-3 * std::max(p1, p2)); BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); } } else { double stationaryPoint = adapters::stationaryPoint(beta).first; bool maximum = adapters::stationaryPoint(beta).second; LOG_DEBUG(<< "stationary point = " << stationaryPoint); double epsMinus = stationaryPoint; double epsPlus = 1.0 - stationaryPoint; for (int k = 0; k < 5; ++k) { epsMinus /= 2.0; double xMinus = stationaryPoint - epsMinus; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(beta, xMinus); p2 = probabilityOfLessLikelySample(beta, xMinus, tail); LOG_TRACE(<< "x- = " << xMinus << ", p1 = " << p1 << ", p2 = " << p2 << ", log(p1) = " << log(p1) << ", log(p2) = " << std::log(p2)); BOOST_TEST_REQUIRE( (std::fabs(p1 - p2) <= 0.05 * std::max(p1, p2) || std::fabs(std::log(p1) - std::log(p2)) < 0.25 * std::fabs(std::min(std::log(p1), std::log(p2))))); if (maximum) BOOST_REQUIRE_EQUAL(maths_t::E_LeftTail, tail); if (!maximum) BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); epsPlus /= 2.0; double xPlus = stationaryPoint + epsPlus; tail = maths_t::E_UndeterminedTail; p1 = numericalProbabilityOfLessLikelySample(beta, xPlus); p2 = probabilityOfLessLikelySample(beta, xPlus, tail); LOG_TRACE(<< "x+ = " << xPlus << ", p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_CLOSE_ABSOLUTE(p1, p2, 0.05 * std::max(p1, p2)); if (maximum) BOOST_REQUIRE_EQUAL(maths_t::E_RightTail, tail); if (!maximum) BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } } } } } // Special cases: { // x at maximum boost::math::beta_distribution<> beta(2.0, 8.0); double mode = adapters::stationaryPoint(beta).first; tail = maths_t::E_UndeterminedTail; p1 = 1.0; p2 = probabilityOfLessLikelySample(beta, mode, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_EQUAL(p1, p2); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } { // x at minimum boost::math::beta_distribution<> beta(0.6, 0.3); double mode = adapters::stationaryPoint(beta).first; tail = maths_t::E_UndeterminedTail; p1 = 0.0; p2 = probabilityOfLessLikelySample(beta, mode, tail); LOG_DEBUG(<< "p1 = " << p1 << ", p2 = " << p2); BOOST_REQUIRE_EQUAL(p1, p2); BOOST_REQUIRE_EQUAL(maths_t::E_MixedOrNeitherTail, tail); } } BOOST_AUTO_TEST_CASE(testIntervalExpectation) { // We check the expectations agree with numerical integration // and also some corner cases. Specifically, that we handle // +/- infinity correctly and the also the case that a and b // are very close. maths::common::CTools::SIntervalExpectation expectation; double expected; double actual; LOG_DEBUG(<< "*** Normal ***"); { boost::math::normal_distribution<> normal(10.0, 5.0); expected = numericalIntervalExpectation(normal, 0.0, 12.0); actual = expectation(normal, 0.0, 12.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); expected = numericalIntervalExpectation(normal, -40.0, 13.0); actual = expectation(normal, std::numeric_limits<double>::lowest(), 13.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); expected = 7.0; actual = expectation(normal, 7.0, 7.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected); expected = 8.1; actual = expectation(normal, 8.1, 8.1 * (1.0 + std::numeric_limits<double>::epsilon())); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected); } LOG_DEBUG(<< "*** Log-Normal ***"); { boost::math::lognormal_distribution<> logNormal(1.5, 0.8); expected = numericalIntervalExpectation(logNormal, 0.5, 7.0); actual = expectation(logNormal, 0.5, 7.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); expected = numericalIntervalExpectation(logNormal, 0.0, 9.0); actual = expectation(logNormal, std::numeric_limits<double>::lowest(), 9.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); expected = 6.0; actual = expectation(logNormal, 6.0, 6.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected); expected = 8.1; actual = expectation(logNormal, 8.1, 8.1 * (1.0 + std::numeric_limits<double>::epsilon())); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected); } LOG_DEBUG(<< "*** Gamma ***"); { boost::math::gamma_distribution<> gamma(5.0, 3.0); expected = numericalIntervalExpectation(gamma, 0.5, 4.0); actual = expectation(gamma, 0.5, 4.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); expected = numericalIntervalExpectation(gamma, 0.0, 5.0); actual = expectation(gamma, std::numeric_limits<double>::lowest(), 5.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); expected = 6.0; actual = expectation(gamma, 6.0, 6.0); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected); expected = 8.1; actual = expectation(gamma, 8.1, 8.1 * (1.0 + std::numeric_limits<double>::epsilon())); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-12 * expected); } } BOOST_AUTO_TEST_CASE(testIntervalExpectationNumericIssues) { // We check some limiting cases which lead to numerically challenging // cases. These can easily generate NaNs if not handled carefully. maths::common::CTools::SIntervalExpectation expectation; LOG_DEBUG(<< "*** Log-Normal ***"); { for (double a : {0.01, 1e-4, 1e-8, 1e-16, 1e-32, 0.0}) { for (double b : {0.01, 0.1}) { for (double location : {-1.0, 1.0}) { for (double scale : {0.1, 2.0}) { boost::math::lognormal logNormal(location, scale); double actual = expectation(logNormal, a, b); double expected = numericalIntervalExpectation(logNormal, a, b, 100); if (maths::common::CMathsFuncs::isNan(expected) == false) { LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE(actual, expected, 1.0 /* percent */); } } } } } } LOG_DEBUG(<< "*** Gamma ***"); { for (double a : {0.01, 1e-4, 1e-8, 1e-16, 1e-32, 0.0}) { for (double b : {0.01, 0.1}) { for (double shape : {1.0, 5.0}) { for (double rate : {1.0, 0.2}) { boost::math::gamma_distribution<> gamma( 0.236554 * shape, 1.0 / 36.9769 / rate); double actual = expectation(gamma, a, b); double expected = numericalIntervalExpectation(gamma, a, b, 100); LOG_DEBUG(<< "expected = " << expected << ", actual = " << actual); BOOST_REQUIRE_CLOSE(actual, expected, 20.0 /* percent */); } } } } } } BOOST_AUTO_TEST_CASE(testMixtureProbabilityOfLessLikelySample) { using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator; test::CRandomNumbers rng; std::size_t n = 500; TDoubleVec x; rng.generateUniformSamples(0.0, 1000.0, 20, x); TDoubleVec means; rng.generateUniformSamples(200.0, 800.0, n, means); TDoubleVec sd; rng.generateUniformSamples(0.1, 50.0, n, sd); TDoubleVec weights; rng.generateUniformSamples(1.0, 10.0, n, weights); for (std::size_t i = 4; i <= 20; i += 4) { LOG_DEBUG(<< "*** modes = " << i << " ***"); TMeanAccumulator meanError; TMeanAccumulator meanLogError; for (std::size_t j = 0; j < n - i; j += i) { TDoubleVec modeWeights; std::vector<boost::math::normal> modes; double a = std::numeric_limits<double>::max(); double b = -std::numeric_limits<double>::max(); for (std::size_t k = 0; k < i; ++k) { modeWeights.push_back(weights[j + k]); modes.push_back(boost::math::normal(means[j + k], sd[j + k])); a = std::min(a, means[j + k]); b = std::max(b, means[j + k]); } maths::common::CMixtureDistribution<boost::math::normal> mixture(modeWeights, modes); for (std::size_t k = 0; k < x.size(); ++k) { double logFx = maths::common::pdf(mixture, x[k]); if (logFx == 0.0) { logFx = 10.0 * core::constants::LOG_MIN_DOUBLE; } else { logFx = std::log(logFx); } maths::common::CTools::CMixtureProbabilityOfLessLikelySample calculator( i, x[k], logFx, a, b); for (std::size_t l = 0; l < modeWeights.size(); ++l) { calculator.addMode((mixture.weights())[l], boost::math::mean(modes[l]), boost::math::standard_deviation(modes[l])); } double pTails = 0.0; CLogPdf<boost::math::normal> logPdf(mixture); maths::common::CEqualWithTolerance<double> equal( maths::common::CToleranceTypes::E_AbsoluteTolerance, 0.5); double xleft; BOOST_TEST_REQUIRE(calculator.leftTail(logPdf, 10, equal, xleft)); pTails += maths::common::cdf(mixture, xleft); double xright; BOOST_TEST_REQUIRE(calculator.rightTail(logPdf, 10, equal, xright)); pTails += maths::common::cdfComplement(mixture, xright); double p = pTails + calculator.calculate(logPdf, pTails); double pExpected = pTails; CTruncatedPdf<boost::math::normal> pdf(mixture, std::exp(logFx)); for (double xi = a, l = 0, step = 0.5 * (b - a) / std::floor(b - a); l < 2 * static_cast<std::size_t>(b - a); xi += step, ++l) { double pi; maths::common::CIntegration::gaussLegendre<maths::common::CIntegration::OrderThree>( pdf, xi, xi + step, pi); pExpected += pi; } LOG_TRACE(<< "pTails = " << pTails); LOG_TRACE(<< "x = " << x[k] << ", log(f(x)) = " << logFx << ", P(x) = " << p << ", expected P(x) = " << pExpected); BOOST_TEST_REQUIRE(pExpected > 0.0); if (pExpected > 0.1) { BOOST_REQUIRE_CLOSE_ABSOLUTE(pExpected, p, 0.12); } else if (pExpected > 1e-10) { BOOST_REQUIRE_CLOSE_ABSOLUTE(std::log(pExpected), std::log(p), 0.15 * std::fabs(std::log(pExpected))); } else { BOOST_REQUIRE_CLOSE_ABSOLUTE(std::log(pExpected), std::log(p), 0.015 * std::fabs(std::log(pExpected))); } meanError.add(std::fabs(p - pExpected)); meanLogError.add(std::fabs(std::log(p) - std::log(pExpected)) / std::max(std::fabs(std::log(pExpected)), std::fabs(std::log(p)))); } } LOG_DEBUG(<< "meanError = " << maths::common::CBasicStatistics::mean(meanError)); LOG_DEBUG(<< "meanLogError = " << maths::common::CBasicStatistics::mean(meanLogError)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.005); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanLogError) < 0.03); } } BOOST_AUTO_TEST_CASE(testAnomalyScore) { // Test p = inverseAnomalyScore(anomalyScore(p)) double p = 0.04; for (std::size_t i = 0; i < 305; ++i, p *= 0.1) { double anomalyScore = CTools::anomalyScore(p); LOG_DEBUG(<< "p = " << p << ", anomalyScore = " << anomalyScore); BOOST_REQUIRE_CLOSE_ABSOLUTE(p, CTools::inverseAnomalyScore(anomalyScore), 1e-3 * p); } } BOOST_AUTO_TEST_CASE(testFastLog) { test::CRandomNumbers rng; // Small { TDoubleVec x; rng.generateUniformSamples(-100.0, 0.0, 10000, x); for (std::size_t i = 0; i < x.size(); ++i) { LOG_TRACE(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] << ", fast log(x) = " << maths::common::CTools::fastLog(std::exp(x[i]))); BOOST_REQUIRE_CLOSE_ABSOLUTE( x[i], maths::common::CTools::fastLog(std::exp(x[i])), 5e-5); } } // Mid { TDoubleVec x; rng.generateUniformSamples(1.0, 1e6, 10000, x); for (std::size_t i = 0; i < x.size(); ++i) { LOG_TRACE(<< "x = " << x[i] << ", log(x) = " << std::log(x[i]) << ", fast log(x) = " << maths::common::CTools::fastLog(x[i])); BOOST_REQUIRE_CLOSE_ABSOLUTE(std::log(x[i]), maths::common::CTools::fastLog(x[i]), 5e-5); } } // Large { TDoubleVec x; rng.generateUniformSamples(20.0, 80.0, 10000, x); for (std::size_t i = 0; i < x.size(); ++i) { LOG_TRACE(<< "x = " << std::exp(x[i]) << ", log(x) = " << x[i] << ", fast log(x) = " << maths::common::CTools::fastLog(std::exp(x[i]))); BOOST_REQUIRE_CLOSE_ABSOLUTE( x[i], maths::common::CTools::fastLog(std::exp(x[i])), 5e-5); } } } BOOST_AUTO_TEST_CASE(testMiscellaneous) { double x_[] = {0.0, 3.2, 2.1, -1.8, 4.5}; maths::common::CVectorNx1<double, 5> x(x_, x_ + 5); maths::common::CVectorNx1<double, 5> a(-2.0); maths::common::CVectorNx1<double, 5> b(5.0); double expected[][5] = { {0.0, 3.2, 2.1, -1.8, 4.5}, {0.0, 3.2, 2.1, -1.5, 4.5}, {0.0, 3.2, 2.1, -1.0, 4.0}, {0.0, 3.2, 2.1, -0.5, 3.5}, {0.0, 3.0, 2.1, 0.0, 3.0}, {0.5, 2.5, 2.1, 0.5, 2.5}, {1.0, 2.0, 2.0, 1.0, 2.0}, {1.5, 1.5, 1.5, 1.5, 1.5}}; for (std::size_t i = 0; a <= b; ++i) { maths::common::CVectorNx1<double, 5> expect(expected[i]); maths::common::CVectorNx1<double, 5> actual = maths::common::CTools::truncate(x, a, b); BOOST_REQUIRE_EQUAL(expect, actual); a = a + maths::common::CVectorNx1<double, 5>(0.5); b = b - maths::common::CVectorNx1<double, 5>(0.5); } } BOOST_AUTO_TEST_CASE(testLgamma) { std::array<double, 8> testData = {{3.5, 0.125, -0.125, 0.000244140625, 1.3552527156068805e-20, 4.95547e+25, 5.01753e+25, 8.64197e+25}}; std::array<double, 8> expectedData = { {1.2009736023470742248160218814507129957702389154682, 2.0194183575537963453202905211670995899482809521344, 2.1653002489051702517540619481440174064962195287626, 8.3176252939431805089043336920440196990966796875000, 45.7477139169563926657247066032141447067260742187500, 2.882355039447984e+27, 2.919076782442754e+27, 5.074673490557339e+27}}; for (std::size_t i = 0; i < testData.size(); ++i) { double actual; double expected = expectedData[i]; BOOST_TEST_REQUIRE(maths::common::CTools::lgamma(testData[i], actual, true)); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, actual, 1e-5 * expected); } double result; BOOST_TEST_REQUIRE(maths::common::CTools::lgamma(0, result, false)); BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity()); BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(0, result, true) == false)); BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity()); BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(-1, result, false))); BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity()); BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(-1, result, true) == false)); BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity()); BOOST_TEST_REQUIRE((maths::common::CTools::lgamma( std::numeric_limits<double>::max() - 1, result, false))); BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity()); BOOST_TEST_REQUIRE((maths::common::CTools::lgamma(std::numeric_limits<double>::max() - 1, result, true) == false)); BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity()); } BOOST_AUTO_TEST_CASE(testSoftmax) { // Test some invariants and that std::vector and maths::common::CDenseVector versions agree. using TDoubleVector = maths::common::CDenseVector<double>; test::CRandomNumbers rng; TDoubleVec z; for (std::size_t t = 0; t < 100; ++t) { rng.generateUniformSamples(-3.0, 3.0, 5, z); TDoubleVec p{z}; CTools::inplaceSoftmax(p); BOOST_REQUIRE_CLOSE(1.0, std::accumulate(p.begin(), p.end(), 0.0), 1e-6); BOOST_TEST_REQUIRE(*std::min_element(p.begin(), p.end()) >= 0.0); TDoubleVector p_{TDoubleVector::fromStdVector(z)}; CTools::inplaceSoftmax(p_); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_CLOSE(p[i], p_[i], 1e-6); } } } BOOST_AUTO_TEST_CASE(testLogSoftmax) { // Test some invariants and that std::vector and maths::common::CDenseVector versions agree. using TDoubleVector = maths::common::CDenseVector<double>; test::CRandomNumbers rng; TDoubleVec z; for (std::size_t t = 0; t < 100; ++t) { rng.generateUniformSamples(-3.0, 3.0, 5, z); TDoubleVec p{z}; CTools::inplaceSoftmax(p); TDoubleVec logP{z}; CTools::inplaceLogSoftmax(logP); BOOST_TEST_REQUIRE(*std::max_element(logP.begin(), logP.end()) <= 0.0); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_CLOSE(std::log(p[i]), logP[i], 1e-6); } TDoubleVector logP_{TDoubleVector::fromStdVector(z)}; CTools::inplaceLogSoftmax(logP_); for (std::size_t i = 0; i < 5; ++i) { BOOST_REQUIRE_CLOSE(logP[i], logP_[i], 1e-6); } } } BOOST_AUTO_TEST_CASE(testStable) { // Test the bit representation of the stable log and exp of some random values. // This will fail if they're different on any of our target platforms. BOOST_REQUIRE_EQUAL("1001000010101011111010010001000011111110001100011111001110111111", printBits(CTools::stableLog(0.301283021))); BOOST_REQUIRE_EQUAL("1001010011010100101100111011101001100110101100010010110101000000", printBits(CTools::stableLog(2803801.9332))); BOOST_REQUIRE_EQUAL("1100010011111111100101000001110111100000001011010001001101000000", printBits(CTools::stableLog(120.880233))); BOOST_REQUIRE_EQUAL("1110111001101001111010100000001100101010100100110000011001000000", printBits(CTools::stableLog(16.808042323))); BOOST_REQUIRE_EQUAL("1000000001000110101011010011001110111111101111000001000101000000", printBits(CTools::stableExp(1.48937498342))); BOOST_REQUIRE_EQUAL("1101111011000110011111000001010001101011111111000011000001000000", printBits(CTools::stableExp(2.832390))); BOOST_REQUIRE_EQUAL("0101111001100101111110101101101100100100111001101001001100111111", printBits(CTools::stableExp(-3.94080233))); BOOST_REQUIRE_EQUAL("1111100000011010010001001000000100011011010100100000010101000000", printBits(CTools::stableExp(0.98023840))); } BOOST_AUTO_TEST_SUITE_END()