lib/maths/time_series/unittest/TestUtils.h (181 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #ifndef INCLUDED_ml_TestUtils_h #define INCLUDED_ml_TestUtils_h #include <core/CSmallVector.h> #include <core/CoreTypes.h> #include <maths/common/CLinearAlgebra.h> #include <maths/common/CMultivariatePrior.h> #include <maths/common/CPrior.h> #include <maths/common/Constants.h> #include <cmath> #include <cstddef> namespace ml { namespace handy_typedefs { using TDouble1Vec = core::CSmallVector<double, 1>; using TDouble10Vec = core::CSmallVector<double, 10>; using TDouble10Vec1Vec = core::CSmallVector<TDouble10Vec, 1>; using TDouble10Vec10Vec = core::CSmallVector<TDouble10Vec, 10>; using TVector2 = maths::common::CVectorNx1<double, 2>; using TVector2Vec = std::vector<TVector2>; using TVector2VecVec = std::vector<TVector2Vec>; using TMatrix2 = maths::common::CSymmetricMatrixNxN<double, 2>; using TMatrix2Vec = std::vector<TMatrix2>; using TVector3 = maths::common::CVectorNx1<double, 3>; using TMatrix3 = maths::common::CSymmetricMatrixNxN<double, 3>; using TGenerator = double (*)(core_t::TTime); using TGeneratorVec = std::vector<TGenerator>; } //! \brief A set of test and utility functions for use in testing only. //! //! DESCRIPTION:\n //! This is a mix in interface for use within the testing framework. class CPriorTestInterface { public: using TDoubleDoublePr = std::pair<double, double>; using TDoubleDoublePr1Vec = core::CSmallVector<TDoubleDoublePr, 1>; using TWeights = maths_t::CUnitWeights; public: explicit CPriorTestInterface(maths::common::CPrior& prior); //! Wrapper which takes care of weights. void addSamples(const handy_typedefs::TDouble1Vec& samples); //! Wrapper which takes care of weights. maths_t::EFloatingPointErrorStatus jointLogMarginalLikelihood(const handy_typedefs::TDouble1Vec& samples, double& result) const; //! Wrapper which takes care of weights. bool minusLogJointCdf(const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; //! Wrapper which takes care of weights. bool minusLogJointCdfComplement(const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; //! Wrapper which takes care of weights. bool probabilityOfLessLikelySamples(maths_t::EProbabilityCalculation calculation, const handy_typedefs::TDouble1Vec& samples, double& lowerBound, double& upperBound) const; //! A wrapper around weighted compute anomaly scores which uses unit //! weights for all samples. bool anomalyScore(maths_t::EProbabilityCalculation calculation, const handy_typedefs::TDouble1Vec& samples, double& result) const; //! Calculate an anomaly score for a collection of independent samples //! from the variable. //! //! \param[in] calculation The style of the probability calculation //! (see maths_t::EProbabilityCalculation for details). //! \param[in] weightStyle Controls the interpretation of the weight that //! is associated with each sample. See maths_t::ESampleWeightStyle for //! more details. //! \param[in] samples A collection of samples of the variable. //! Each pair is the sample and weight, i.e. \f$(x_i, \gamma_i)\f$ where //! \f$x_i\f$ is \f$i^{th}\f$ sample and \f$\gamma_i\f$ is the weight of //! that sample. //! \param[out] result Filled in with the total anomaly score of \p samples. bool anomalyScore(maths_t::EProbabilityCalculation calculation, maths_t::ESampleWeightStyle weightStyle, const TDoubleDoublePr1Vec& samples, double& result) const; //! This is a slow method that uses numerical root finding to compute //! the quantile so ***only*** use this for testing. //! //! \param[in] percentage The desired quantile expressed as a percentage. //! \param[in] eps The tolerated error in the quantile: if it could be //! calculated, \p result will be no further than \p eps away from //! the exact quantile. //! \param[out] result Filled in with the quantile if it could be found. //! \note Since this is for testing purposes only it is not especially //! robust. For example, it won't handle a normal with mean of \f$10^8\f$ //! and standard deviation of \f$10^{-8}\f$ particularly well. bool marginalLikelihoodQuantileForTest(double percentage, double eps, double& result) const; //! This is a slow method that uses numerical integration to compute //! the mean so ***only*** use this for testing. //! //! \param[out] result Filled in with the mean if it could be found. //! \note This makes use of marginalLikelihoodQuantile and suffers //! the same limitations. bool marginalLikelihoodMeanForTest(double& result) const; //! This is a slow method that uses numerical integration to compute //! the variance so ***only*** use this for testing. //! //! \param[out] result Filled in with the variance if it could be //! found. //! \note This makes use of marginalLikelihoodQuantile and suffers //! the same limitations. bool marginalLikelihoodVarianceForTest(double& result) const; protected: maths::common::CPrior* m_Prior; }; //! \brief A mix in of test interface which brings the necessary functions //! into scope and implements value semantics. //! //! IMPLMENTATION:\n //! This is variant of the curiously recurring template pattern to mix //! in some interface for test purposes only. //! //! Note that this also uses double inheritance, contravening the coding //! standards, because it's the cleanest way to implement this functionality. //! DON'T use this elsewhere. template<typename PRIOR> class CPriorTestInterfaceMixin : public PRIOR, public CPriorTestInterface { public: using CPriorTestInterface::addSamples; using CPriorTestInterface::jointLogMarginalLikelihood; using CPriorTestInterface::minusLogJointCdf; using CPriorTestInterface::minusLogJointCdfComplement; using CPriorTestInterface::probabilityOfLessLikelySamples; using PRIOR::addSamples; using PRIOR::jointLogMarginalLikelihood; using PRIOR::minusLogJointCdf; using PRIOR::minusLogJointCdfComplement; using PRIOR::probabilityOfLessLikelySamples; public: CPriorTestInterfaceMixin(const PRIOR& prior) : PRIOR(prior), CPriorTestInterface(static_cast<maths::common::CPrior&>(*this)) {} CPriorTestInterfaceMixin(const CPriorTestInterfaceMixin& other) : PRIOR(static_cast<const PRIOR&>(other)), CPriorTestInterface(static_cast<maths::common::CPrior&>(*this)) {} ~CPriorTestInterfaceMixin() override {} //! Swap the contents efficiently. void swap(CPriorTestInterfaceMixin& other) { this->PRIOR::swap(other); } //! Clone the object. CPriorTestInterfaceMixin* clone() const override { return new CPriorTestInterfaceMixin(*this); } }; //! \brief Kernel for checking normalization with CPrior::expectation. class C1dUnitKernel { public: bool operator()(double /*x*/, double& result) const { result = 1.0; return true; } }; //! \brief Kernel for computing the variance with CPrior::expectation. class CVarianceKernel { public: CVarianceKernel(double mean) : m_Mean(mean) {} bool operator()(double x, double& result) const { result = (x - m_Mean) * (x - m_Mean); return true; } private: double m_Mean; }; //! \brief A constant unit kernel. template<std::size_t N> class CUnitKernel { public: CUnitKernel(const maths::common::CMultivariatePrior& prior) : m_Prior(&prior), m_X(1) {} bool operator()(const maths::common::CVectorNx1<double, N>& x, double& result) const { m_X[0].assign(x.begin(), x.end()); m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, result); result = std::exp(result); return true; } private: static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::common::CMultivariatePrior* m_Prior; mutable handy_typedefs::TDouble10Vec1Vec m_X; }; template<std::size_t N> ml::maths_t::TDouble10VecWeightsAry1Vec CUnitKernel<N>::SINGLE_UNIT{ ml::maths_t::CUnitWeights::unit<ml::maths_t::TDouble10Vec>(N)}; //! \brief The kernel for computing the mean of a multivariate prior. template<std::size_t N> class CMeanKernel { public: CMeanKernel(const maths::common::CMultivariatePrior& prior) : m_Prior(&prior), m_X(1) {} bool operator()(const maths::common::CVectorNx1<double, N>& x, maths::common::CVectorNx1<double, N>& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = x * likelihood; return true; } private: static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::common::CMultivariatePrior* m_Prior; mutable handy_typedefs::TDouble10Vec1Vec m_X; }; template<std::size_t N> ml::maths_t::TDouble10VecWeightsAry1Vec CMeanKernel<N>::SINGLE_UNIT{ ml::maths_t::CUnitWeights::unit<ml::maths_t::TDouble10Vec>(N)}; //! \brief The kernel for computing the variance of a multivariate prior. template<std::size_t N> class CCovarianceKernel { public: CCovarianceKernel(const maths::common::CMultivariatePrior& prior, const maths::common::CVectorNx1<double, N>& mean) : m_Prior(&prior), m_Mean(mean), m_X(1) {} bool operator()(const maths::common::CVectorNx1<double, N>& x, maths::common::CSymmetricMatrixNxN<double, N>& result) const { m_X[0].assign(x.begin(), x.end()); double likelihood; m_Prior->jointLogMarginalLikelihood(m_X, SINGLE_UNIT, likelihood); likelihood = std::exp(likelihood); result = (x - m_Mean).outer() * likelihood; return true; } private: static ml::maths_t::TDouble10VecWeightsAry1Vec SINGLE_UNIT; private: const maths::common::CMultivariatePrior* m_Prior; maths::common::CVectorNx1<double, N> m_Mean; mutable handy_typedefs::TDouble10Vec1Vec m_X; }; template<std::size_t N> ml::maths_t::TDouble10VecWeightsAry1Vec CCovarianceKernel<N>::SINGLE_UNIT{ ml::maths_t::CUnitWeights::unit<ml::maths_t::TDouble10Vec>(N)}; //! A constant function. double constant(core_t::TTime time); //! A linear ramp. double ramp(core_t::TTime time); //! A Markov process. double markov(core_t::TTime time); //! Smooth daily periodic. double smoothDaily(core_t::TTime time); //! Smooth weekly periodic. double smoothWeekly(core_t::TTime time); //! Spikey daily periodic. double spikeyDaily(core_t::TTime time); //! Spikey daily + weekly periodic. double spikeyDailyWeekly(core_t::TTime time); //! Weekday/weekend periodic. double weekends(core_t::TTime time); //! Scales time input to \p generator. double scale(double scale, core_t::TTime time, handy_typedefs::TGenerator generator); } #endif // INCLUDED_ml_TestUtils_h