lib/maths/time_series/unittest/CTimeSeriesModelTest.cc

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <core/CJsonStatePersistInserter.h> #include <core/CJsonStateRestoreTraverser.h> #include <core/CLogger.h> #include <core/CMemoryCircuitBreaker.h> #include <core/CMemoryDef.h> #include <maths/common/CLogNormalMeanPrecConjugate.h> #include <maths/common/CMultimodalPrior.h> #include <maths/common/CMultivariateMultimodalPrior.h> #include <maths/common/CMultivariateNormalConjugate.h> #include <maths/common/CNormalMeanPrecConjugate.h> #include <maths/common/COneOfNPrior.h> #include <maths/common/CXMeansOnline.h> #include <maths/common/CXMeansOnline1d.h> #include <maths/common/Constants.h> #include <maths/common/MathsTypes.h> #include <maths/time_series/CDecayRateController.h> #include <maths/time_series/CTimeSeriesDecomposition.h> #include <maths/time_series/CTimeSeriesDecompositionStub.h> #include <maths/time_series/CTimeSeriesModel.h> #include <maths/time_series/CTimeSeriesSegmentation.h> #include <test/BoostTestCloseAbsolute.h> #include <test/CRandomNumbers.h> #include <test/CTimeSeriesTestData.h> #include "TestUtils.h" #include <boost/test/unit_test.hpp> #include <cmath> #include <fstream> #include <memory> #include <numeric> using TSizeVec = std::vector<std::size_t>; BOOST_TEST_DONT_PRINT_LOG_VALUE(TSizeVec::iterator) BOOST_AUTO_TEST_SUITE(CTimeSeriesModelTest) using namespace ml; namespace { using namespace handy_typedefs; using TBool2Vec = core::CSmallVector<bool, 2>; using TDoubleVec = std::vector<double>; using TDoubleVecVec = std::vector<TDoubleVec>; using TDouble2Vec = core::CSmallVector<double, 2>; using TDouble2Vec1Vec = core::CSmallVector<TDouble2Vec, 1>; using TDouble2VecWeightsAry = maths_t::TDouble2VecWeightsAry; using TDouble2VecWeightsAryVec = std::vector<TDouble2VecWeightsAry>; using TDouble10VecWeightsAry1Vec = maths_t::TDouble10VecWeightsAry1Vec; using TSize1Vec = core::CSmallVector<std::size_t, 1>; using TTime2Vec = core::CSmallVector<core_t::TTime, 2>; using TTime2Vec1Vec = core::CSmallVector<TTime2Vec, 1>; using TTail2Vec = core::CSmallVector<maths_t::ETail, 2>; using TTail10Vec = core::CSmallVector<maths_t::ETail, 10>; using TDoubleWeightsAry1Vec = maths_t::TDoubleWeightsAry1Vec; using TTimeDouble2VecSizeTrVec = maths::common::CModel::TTimeDouble2VecSizeTrVec; using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator; using TMeanAccumulator2Vec = core::CSmallVector<TMeanAccumulator, 2>; using TFloatMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<maths::common::CFloatStorage>::TAccumulator; using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>; using TDecompositionPtr = std::shared_ptr<maths::time_series::CTimeSeriesDecompositionInterface>; using TDecompositionPtr10Vec = core::CSmallVector<TDecompositionPtr, 10>; using TDecayRateController2Ary = maths::time_series::CUnivariateTimeSeriesModel::TDecayRateController2Ary; using TSetWeightsFunc = void (*)(double, std::size_t, TDouble2VecWeightsAry&); const double MINIMUM_SEASONAL_SCALE{0.25}; const double MINIMUM_SIGNIFICANT_CORRELATION{0.4}; const double DECAY_RATE{0.0005}; const std::size_t TAG{0}; //! \brief Implements the allocator for new correlate priors. class CTimeSeriesCorrelateModelAllocator : public maths::time_series::CTimeSeriesCorrelateModelAllocator { public: //! Check if we can still allocate any correlations. bool areAllocationsAllowed() const override { return true; } //! Check if \p correlations exceeds the memory limit. bool exceedsLimit(std::size_t /*correlations*/) const override { return false; } //! Get the maximum number of correlations we should model. std::size_t maxNumberCorrelations() const override { return 5000; } //! Get the chunk size in which to allocate correlations. std::size_t chunkSize() const override { return 500; } //! Create a new prior for a correlation model. TMultivariatePriorPtr newPrior() const override { return TMultivariatePriorPtr(maths::common::CMultivariateNormalConjugate<2>::nonInformativePrior( maths_t::E_ContinuousData, DECAY_RATE) .clone()); } }; maths::common::CModelParams modelParams(core_t::TTime bucketLength) { using TTimeDoubleMap = std::map<core_t::TTime, double>; static TTimeDoubleMap learnRates; learnRates[bucketLength] = static_cast<double>(bucketLength) / 1800.0; double minimumSeasonalVarianceScale{MINIMUM_SEASONAL_SCALE}; return maths::common::CModelParams{bucketLength, learnRates[bucketLength], DECAY_RATE, minimumSeasonalVarianceScale, 12 * core::constants::HOUR, core::constants::DAY}; } maths::common::CModelAddSamplesParams addSampleParams(double interval, const TDouble2VecWeightsAryVec& trendWeights, const TDouble2VecWeightsAryVec& residualWeights) { maths::common::CModelAddSamplesParams params; params.isInteger(false) .propagationInterval(interval) .trendWeights(trendWeights) .priorWeights(residualWeights); return params; } maths::common::CModelAddSamplesParams addSampleParams(double interval, const TDouble2VecWeightsAryVec& weights) { return addSampleParams(interval, weights, weights); } maths::common::CModelAddSamplesParams addSampleParams(const TDouble2VecWeightsAryVec& weights) { return addSampleParams(1.0, weights); } maths::common::CModelProbabilityParams computeProbabilityParams(const TDouble2VecWeightsAry& weight) { maths::common::CModelProbabilityParams params; params.addCalculation(maths_t::E_TwoSided).seasonalConfidenceInterval(50.0).addWeights(weight); return params; } maths::common::CNormalMeanPrecConjugate univariateNormal(double decayRate = DECAY_RATE) { return maths::common::CNormalMeanPrecConjugate::nonInformativePrior( maths_t::E_ContinuousData, decayRate); } maths::common::CLogNormalMeanPrecConjugate univariateLogNormal(double decayRate = DECAY_RATE) { return maths::common::CLogNormalMeanPrecConjugate::nonInformativePrior( maths_t::E_ContinuousData, 0.0, decayRate); } maths::common::CMultimodalPrior univariateMultimodal(double decayRate = DECAY_RATE) { maths::common::CXMeansOnline1d clusterer{ maths_t::E_ContinuousData, maths::common::CAvailableModeDistributions::ALL, maths_t::E_ClustersFractionWeight, decayRate}; return maths::common::CMultimodalPrior{maths_t::E_ContinuousData, clusterer, univariateNormal(), decayRate}; } maths::common::CMultivariateNormalConjugate<3> multivariateNormal(double decayRate = DECAY_RATE) { return maths::common::CMultivariateNormalConjugate<3>::nonInformativePrior( maths_t::E_ContinuousData, decayRate); } maths::common::CMultivariateMultimodalPrior<3> multivariateMultimodal(double decayRate = DECAY_RATE) { maths::common::CXMeansOnline<maths::common::CFloatStorage, 3> clusterer( maths_t::E_ContinuousData, maths_t::E_ClustersFractionWeight, decayRate); return maths::common::CMultivariateMultimodalPrior<3>( maths_t::E_ContinuousData, clusterer, maths::common::CMultivariateNormalConjugate<3>::nonInformativePrior( maths_t::E_ContinuousData, decayRate), decayRate); } maths::time_series::CUnivariateTimeSeriesModel::TDecayRateController2Ary decayRateControllers(std::size_t dimension) { return {{maths::time_series::CDecayRateController( maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease, dimension), maths::time_series::CDecayRateController( maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease | maths::time_series::CDecayRateController::E_PredictionErrorDecrease, dimension)}}; } auto makeComponentDetectedCallback(double learnRate, maths::common::CPrior& residualModel, TDecayRateController2Ary* controllers = nullptr) { return [learnRate, &residualModel, controllers](TFloatMeanAccumulatorVec residuals) { if (controllers != nullptr) { residualModel.decayRate(residualModel.decayRate() / (*controllers)[1].multiplier()); (*controllers)[0].reset(); (*controllers)[1].reset(); } residualModel.setToNonInformative(0.0, residualModel.decayRate()); if (residuals.empty() == false) { maths_t::TDoubleWeightsAry1Vec weights(1); double buckets{ std::accumulate(residuals.begin(), residuals.end(), 0.0, [](auto partialBuckets, const auto& residual) { return partialBuckets + maths::common::CBasicStatistics::count(residual); }) / learnRate}; double time{buckets / static_cast<double>(residuals.size())}; for (const auto& residual : residuals) { double weight(maths::common::CBasicStatistics::count(residual)); if (weight > 0.0) { weights[0] = maths_t::countWeight(weight); residualModel.addSamples( {maths::common::CBasicStatistics::mean(residual)}, weights); residualModel.propagateForwardsByTime(time); } } } }; } void reinitializeResidualModel(double learnRate, TDecompositionPtr10Vec& trends, maths::common::CMultivariatePrior& residualModel, TDecayRateController2Ary* controllers = nullptr) { using TFloatMeanAccumulatorVecVec = std::vector<TFloatMeanAccumulatorVec>; if (controllers != nullptr) { for (auto& trend : trends) { trend->decayRate(trend->decayRate() / (*controllers)[0].multiplier()); } residualModel.decayRate(residualModel.decayRate() / (*controllers)[1].multiplier()); (*controllers)[0].reset(); (*controllers)[1].reset(); } residualModel.setToNonInformative(0.0, residualModel.decayRate()); std::size_t dimension{residualModel.dimension()}; TFloatMeanAccumulatorVecVec residuals(dimension); for (std::size_t d = 0; d < dimension; ++d) { residuals[d] = trends[d]->residuals(false); } if (residuals.empty() == false) { TDouble10Vec1Vec samples; TDoubleVec weights; double time{0.0}; for (std::size_t d = 0; d < dimension; ++d) { samples.resize(residuals[d].size(), TDouble10Vec(dimension)); weights.resize(residuals[d].size(), std::numeric_limits<double>::max()); double buckets{ std::accumulate(residuals[d].begin(), residuals[d].end(), 0.0, [](auto partialBuckets, const auto& residual) { return partialBuckets + maths::common::CBasicStatistics::count(residual); }) / learnRate}; time += buckets / static_cast<double>(residuals.size()); for (std::size_t i = 0; i < residuals[d].size(); ++i) { samples[i][d] = maths::common::CBasicStatistics::mean(residuals[d][i]); weights[i] = std::min( weights[i], static_cast<double>(maths::common::CBasicStatistics::count( residuals[d][i]))); } } time /= static_cast<double>(dimension); maths_t::TDouble10VecWeightsAry1Vec weight(1); for (std::size_t i = 0; i < samples.size(); ++i) { if (weights[i] > 0.0) { weight[0] = maths_t::countWeight(weights[i], dimension); residualModel.addSamples({samples[i]}, weight); residualModel.propagateForwardsByTime(time); } } } } class CDebugGenerator { public: static const bool ENABLED{false}; public: explicit CDebugGenerator(std::string file = "results.py") : m_File{std::move(file)} { if (ENABLED) { m_ModelBounds.resize(3); m_Forecast.resize(3); } } ~CDebugGenerator() { if (ENABLED) { std::ofstream file_; file_.open(m_File); auto file = (file_ << core::CScopePrintContainers{}); file << "import matplotlib.pyplot as plt;\n"; file << "a = " << m_Actual << ";\n"; file << "plt.plot(a, 'b');\n"; for (std::size_t i = 0; i < 3; ++i) { file << "p" << i << " = " << m_ModelBounds[i] << ";\n"; file << "f" << i << " = " << m_Forecast[i] << ";\n"; file << "plt.plot(p" << i << ", 'g');\n"; file << "plt.plot(range(len(a)-len(f" << i << "),len(a)),f" << i << ", 'r');\n"; } file << "plt.show();\n"; } } void addValue(double value) { if (ENABLED) { m_Actual.push_back(value); } } void addValueAndPrediction(core_t::TTime time, double value, const maths::time_series::CUnivariateTimeSeriesModel& model) { if (ENABLED) { m_Actual.push_back(value); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); if (x.size() == 3) { for (std::size_t i = 0; i < 3; ++i) { m_ModelBounds[i].push_back(x[i][0]); } } } } void addForecast(const maths::common::SErrorBar& errorBar) { if (ENABLED) { m_Forecast[0].push_back(errorBar.s_LowerBound); m_Forecast[1].push_back(errorBar.s_Predicted); m_Forecast[2].push_back(errorBar.s_UpperBound); } } private: std::string m_File; TDoubleVec m_Actual; TDoubleVecVec m_ModelBounds; TDoubleVecVec m_Forecast; }; } BOOST_AUTO_TEST_CASE(testClone) { // Test all the state is cloned. core_t::TTime bucketLength{600}; test::CRandomNumbers rng; { maths::time_series::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CTimeSeriesCorrelations correlations{ MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; maths::time_series::CUnivariateTimeSeriesModel model( modelParams(bucketLength), 1, trend, univariateNormal(), &controllers); model.modelCorrelations(correlations); TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } std::uint64_t checksum1 = model.checksum(); std::unique_ptr<maths::time_series::CUnivariateTimeSeriesModel> clone1{ model.clone(1)}; std::uint64_t checksum2 = clone1->checksum(); BOOST_REQUIRE_EQUAL(checksum1, checksum2); std::unique_ptr<maths::time_series::CUnivariateTimeSeriesModel> clone2{ model.clone(2)}; BOOST_REQUIRE_EQUAL(2, clone2->identifier()); } { maths::time_series::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(3); maths::time_series::CMultivariateTimeSeriesModel model( modelParams(bucketLength), trend, multivariateNormal(), &controllers); TDoubleVec mean{13.0, 9.0, 10.0}; TDoubleVecVec covariance{{3.5, 2.9, 0.5}, {2.9, 3.6, 0.1}, {0.5, 0.1, 2.1}}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } std::uint64_t checksum1 = model.checksum(); std::unique_ptr<maths::time_series::CMultivariateTimeSeriesModel> clone1{ model.clone(1)}; std::uint64_t checksum2 = clone1->checksum(); BOOST_REQUIRE_EQUAL(checksum1, checksum2); std::unique_ptr<maths::time_series::CMultivariateTimeSeriesModel> clone2{ model.clone(2)}; std::uint64_t checksum3 = clone2->checksum(); BOOST_REQUIRE_EQUAL(checksum1, checksum3); BOOST_REQUIRE_EQUAL(0, clone2->identifier()); } } BOOST_AUTO_TEST_CASE(testMode) { // Test that we get the modes we expect based versus updating the trend(s) // and prior directly. core_t::TTime bucketLength{600}; test::CRandomNumbers rng; auto testExpectedMode = [](const maths::time_series::CTimeSeriesDecomposition& trend, const maths::common::CNormalMeanPrecConjugate& prior, const maths::time_series::CUnivariateTimeSeriesModel& model, core_t::TTime time) { double expectedMode{trend.value(time, 0.0, false).mean() + prior.marginalLikelihoodMode()}; TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit<TDouble2Vec>(1))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode[0]); BOOST_REQUIRE_EQUAL(1, mode.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedMode, mode[0], 1e-3 * expectedMode); }; auto testExpectedMultivariateMode = [](const TDecompositionPtr10Vec& trends, const maths::common::CMultivariateNormalConjugate<3>& prior, const maths::time_series::CMultivariateTimeSeriesModel& model, core_t::TTime time) { TDouble2Vec expectedMode(prior.marginalLikelihoodMode( maths_t::CUnitWeights::unit<TDouble10Vec>(3))); for (std::size_t i = 0; i < trends.size(); ++i) { expectedMode[i] += trends[i]->value(time, 0.0, false).mean(); } TDouble2Vec mode(model.mode(time, maths_t::CUnitWeights::unit<TDouble2Vec>(3))); LOG_DEBUG(<< "expected mode = " << expectedMode); LOG_DEBUG(<< "mode = " << mode); BOOST_REQUIRE_EQUAL(3, mode.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedMode[0], mode[0], 0.02 * expectedMode[0]); BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedMode[1], mode[1], 0.02 * expectedMode[0]); BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedMode[2], mode[2], 0.02 * expectedMode[0]); }; LOG_DEBUG(<< "Univariate no trend"); { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); maths::time_series::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, prior}; core_t::TTime time{0}; for (auto sample : samples) { trend.addPoint(time, sample); TDouble1Vec sample_{trend.detrend(time, sample, 0.0, false)}; prior.addSamples(sample_, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; time = 0; for (auto sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } testExpectedMode(trend, prior, model, time); } LOG_DEBUG(<< "Univariate trend"); { TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); auto params = modelParams(bucketLength); maths::time_series::CTimeSeriesDecomposition trend{48.0 * DECAY_RATE, bucketLength}; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{params, 0, trend, prior}; core_t::TTime time{0}; for (auto& sample : samples) { sample += 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0); time += bucketLength; } TDouble2VecWeightsAryVec unit{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; time = 0; for (auto sample : samples) { model.addSamples(addSampleParams(unit), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); trend.addPoint(time, sample, core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, makeComponentDetectedCallback(params.learnRate(), prior)); prior.addSamples({trend.detrend(time, sample, 0.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } testExpectedMode(trend, prior, model, time); } LOG_DEBUG(<< "Multivariate no trend"); { TDoubleVec mean{10.0, 15.0, 11.0}; TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), *trends[0], prior}; core_t::TTime time{0}; for (const auto& sample : samples) { TDouble10Vec1Vec detrended{TDouble10Vec(3)}; for (std::size_t i = 0; i < sample.size(); ++i) { trends[i]->addPoint(time, sample[i]); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0, false); } prior.addSamples(detrended, maths_t::CUnitWeights::singleUnit<TDouble10Vec>(3)); prior.propagateForwardsByTime(1.0); } TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; time = 0; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } testExpectedMultivariateMode(trends, prior, model, time); } LOG_DEBUG(<< "Multivariate trend"); { TDoubleVec mean{10.0, 15.0, 11.0}; TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); auto params = modelParams(bucketLength); TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{params, *trends[0], prior}; core_t::TTime time{0}; for (auto& sample : samples) { double amplitude{10.0}; for (std::size_t i = 0; i < sample.size(); ++i) { sample[i] += 30.0 + amplitude * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0); amplitude += 4.0; } time += bucketLength; } TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; time = 0; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool reinitialize{false}; TDouble10Vec1Vec detrended{TDouble10Vec(3)}; for (std::size_t i = 0; i < sample.size(); ++i) { trends[i]->addPoint(time, sample[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&reinitialize](TFloatMeanAccumulatorVec) { reinitialize = true; }); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0, false); } if (reinitialize) { reinitializeResidualModel(params.learnRate(), trends, prior); } prior.addSamples(detrended, maths_t::CUnitWeights::singleUnit<TDouble10Vec>(3)); prior.propagateForwardsByTime(1.0); time += bucketLength; } testExpectedMultivariateMode(trends, prior, model, time); } } BOOST_AUTO_TEST_CASE(testAddBucketValue) { // Test that the prior support is correctly updated to account // for negative bucket values. core_t::TTime bucketLength{600}; maths::time_series::CTimeSeriesDecompositionStub trend; maths::common::CLogNormalMeanPrecConjugate prior{univariateLogNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{modelParams(bucketLength), 0, trend, prior}; TTimeDouble2VecSizeTrVec samples{ core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{1.2}, TAG)}; TDoubleVec weights{1.0, 1.5, 0.9, 1.9}; TDouble2VecWeightsAryVec modelWeights{ maths_t::countWeight(TDouble2Vec{weights[0]}), maths_t::countWeight(TDouble2Vec{weights[1]}), maths_t::countWeight(TDouble2Vec{weights[2]}), maths_t::countWeight(TDouble2Vec{weights[3]})}; for (std::size_t i = 0; i < samples.size(); ++i) { prior.addSamples({samples[i].second[0]}, {maths_t::countWeight(weights[i])}); } prior.propagateForwardsByTime(1.0); prior.adjustOffset({-1.0}, maths_t::CUnitWeights::SINGLE_UNIT); model.addSamples(addSampleParams(modelWeights), samples); model.addBucketValue({core::make_triple(core_t::TTime{20}, TDouble2Vec{-1.0}, TAG)}); BOOST_REQUIRE_EQUAL(prior.checksum(), model.residualModel().checksum()); } BOOST_AUTO_TEST_CASE(testAddMultipleSamples) { // Test adding multiple samples at once. test::CRandomNumbers rng; core_t::TTime bucketLength{1800}; LOG_DEBUG(<< "Multiple samples univariate"); { maths::time_series::CTimeSeriesDecompositionStub trend; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, prior}; TTimeDouble2VecSizeTrVec samples{ core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1}, TAG)}; TDoubleVec weights{1.0, 1.5, 0.9}; TDouble2VecWeightsAryVec modelWeights{ maths_t::countWeight(TDouble2Vec{weights[0]}), maths_t::countWeight(TDouble2Vec{weights[1]}), maths_t::countWeight(TDouble2Vec{weights[2]})}; model.addSamples(addSampleParams(modelWeights), samples); trend.addPoint(samples[1].first, samples[1].second[0], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(weights[1])); trend.addPoint(samples[2].first, samples[2].second[0], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(weights[2])); trend.addPoint(samples[0].first, samples[0].second[0], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(weights[0])); prior.addSamples( {samples[2].second[0], samples[0].second[0], samples[1].second[0]}, {maths_t::countWeight(weights[2]), maths_t::countWeight(weights[0]), maths_t::countWeight(weights[1])}); prior.propagateForwardsByTime(1.0); std::uint64_t checksum1{trend.checksum()}; std::uint64_t checksum2{model.trendModel().checksum()}; LOG_DEBUG(<< "checksum1 = " << checksum1 << " checksum2 = " << checksum2); BOOST_REQUIRE_EQUAL(checksum1, checksum2); checksum1 = prior.checksum(); checksum2 = model.residualModel().checksum(); LOG_DEBUG(<< "checksum1 = " << checksum1 << " checksum2 = " << checksum2); BOOST_REQUIRE_EQUAL(checksum1, checksum2); } LOG_DEBUG(<< "Multiple samples multivariate"); { TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), *trends[0], prior}; TTimeDouble2VecSizeTrVec samples{ core::make_triple(core_t::TTime{20}, TDouble2Vec{3.5, 3.4, 3.3}, TAG), core::make_triple(core_t::TTime{12}, TDouble2Vec{3.9, 3.8, 3.7}, TAG), core::make_triple(core_t::TTime{18}, TDouble2Vec{2.1, 2.0, 1.9}, TAG)}; double weights[][3]{{1.0, 1.1, 1.2}, {1.5, 1.6, 1.7}, {0.9, 1.0, 1.1}}; TDouble2VecWeightsAryVec modelWeights{ maths_t::countWeight(TDouble2Vec(weights[0], weights[0] + 3)), maths_t::countWeight(TDouble2Vec(weights[1], weights[1] + 3)), maths_t::countWeight(TDouble2Vec(weights[2], weights[2] + 3))}; model.addSamples(addSampleParams(modelWeights), samples); for (std::size_t i = 0; i < trends.size(); ++i) { trends[i]->addPoint(samples[1].first, samples[1].second[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(weights[0][i])); trends[i]->addPoint(samples[2].first, samples[2].second[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(weights[1][i])); trends[i]->addPoint(samples[0].first, samples[0].second[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(weights[2][i])); } TDouble10Vec1Vec samples_{samples[2].second, samples[0].second, samples[1].second}; TDouble10VecWeightsAry1Vec weights_{ maths_t::countWeight(TDouble10Vec(weights[2], weights[2] + 3)), maths_t::countWeight(TDouble10Vec(weights[0], weights[0] + 3)), maths_t::countWeight(TDouble10Vec(weights[1], weights[1] + 3))}; prior.addSamples(samples_, weights_); prior.propagateForwardsByTime(1.0); for (std::size_t i = 0; i < trends.size(); ++i) { std::uint64_t checksum1{trends[i]->checksum()}; std::uint64_t checksum2{model.trendModel()[i]->checksum()}; LOG_DEBUG(<< "checksum1 = " << checksum1 << " checksum2 = " << checksum2); BOOST_REQUIRE_EQUAL(checksum1, checksum2); } std::uint64_t checksum1{prior.checksum()}; std::uint64_t checksum2{model.residualModel().checksum()}; LOG_DEBUG(<< "checksum1 = " << checksum1 << " checksum2 = " << checksum2); BOOST_REQUIRE_EQUAL(checksum1, checksum2); } } BOOST_AUTO_TEST_CASE(testNonUnitPropagationIntervalInAddSamples) { // Test a non-unit propagation interval. test::CRandomNumbers rng; core_t::TTime bucketLength{1800}; LOG_DEBUG(<< "Propagation interval univariate"); { maths::time_series::CTimeSeriesDecompositionStub trend; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, prior}; TDoubleVec interval{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{10.0}, {13.9}, {27.1}}; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; maths_t::setCount(TDouble2Vec{1.5}, weights[0]); maths_t::setOutlierWeight(TDouble2Vec{0.9}, weights[0]); maths_t::setCountVarianceScale(TDouble2Vec{1.1}, weights[0]); core_t::TTime time{0}; for (std::size_t i = 0; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; model.addSamples(addSampleParams(interval[i], weights), sample); TDoubleWeightsAry1Vec weight{maths_t::CUnitWeights::UNIT}; for (std::size_t j = 0; j < weights[0].size(); ++j) { weight[0][j] = weights[0][j][0]; } prior.addSamples(TDouble1Vec(samples[i]), weight); prior.propagateForwardsByTime(interval[i]); std::uint64_t checksum1{prior.checksum()}; std::uint64_t checksum2{model.residualModel().checksum()}; LOG_DEBUG(<< "checksum1 = " << checksum1 << " checksum2 = " << checksum2); BOOST_REQUIRE_EQUAL(checksum1, checksum2); time += bucketLength; } } LOG_DEBUG(<< "Propagation interval multivariate"); { TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), *trends[0], prior}; TDoubleVec interval{1.0, 1.1, 0.4}; TDouble2Vec samples[]{{13.5, 13.4, 13.3}, {13.9, 13.8, 13.7}, {20.1, 20.0, 10.9}}; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; maths_t::setCount(TDouble2Vec{1.0, 1.1, 1.2}, weights[0]); maths_t::setOutlierWeight(TDouble2Vec{0.1, 0.1, 0.2}, weights[0]); maths_t::setCountVarianceScale(TDouble2Vec{2.0, 2.1, 2.2}, weights[0]); core_t::TTime time{0}; for (std::size_t i = 0; i < 3; ++i) { TTimeDouble2VecSizeTrVec sample{core::make_triple(time, samples[i], TAG)}; model.addSamples(addSampleParams(interval[i], weights), sample); TDouble10VecWeightsAry1Vec weight{maths_t::CUnitWeights::unit<TDouble10Vec>(3)}; for (std::size_t j = 0; j < weights[0].size(); ++j) { weight[0][j] = weights[0][j]; } prior.addSamples({TDouble10Vec(samples[i])}, weight); prior.propagateForwardsByTime(interval[i]); std::uint64_t checksum1{prior.checksum()}; std::uint64_t checksum2{model.residualModel().checksum()}; LOG_DEBUG(<< "checksum1 = " << checksum1 << " checksum2 = " << checksum2); BOOST_REQUIRE_EQUAL(checksum1, checksum2); time += bucketLength; } } } BOOST_AUTO_TEST_CASE(testWithDecayRateControlInAddSamples) { // Test decay rate control. test::CRandomNumbers rng; core_t::TTime bucketLength{1800}; LOG_DEBUG(<< "Decay rate control univariate"); { auto params = modelParams(bucketLength); maths::time_series::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model(params, 1, trend, prior, &controllers); TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 2000, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto noise : samples) { double sample{20.0 + 4.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0) + (time / bucketLength > 1800 ? 10.0 : 0.0) + noise}; TTimeDouble2VecSizeTrVec sample_{ core::make_triple(time, TDouble2Vec{sample}, TAG)}; model.addSamples(addSampleParams(weights), sample_); trend.addPoint(time, sample, core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, makeComponentDetectedCallback(params.learnRate(), prior, &controllers)); double detrended{trend.detrend(time, sample, 0.0, false)}; prior.addSamples({detrended}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); if (trend.initialized()) { double multiplier{controllers[0].multiplier( {trend.meanValue(time)}, {{detrended}}, bucketLength, model.params().learnRate(), DECAY_RATE)}; trend.decayRate(multiplier * trend.decayRate()); } if (prior.numberSamples() > 20.0) { double multiplier{controllers[1].multiplier( {prior.marginalLikelihoodMean()}, {{detrended - prior.marginalLikelihoodMean()}}, bucketLength, model.params().learnRate(), DECAY_RATE)}; prior.decayRate(multiplier * prior.decayRate()); } std::uint64_t checksum1{trend.checksum()}; std::uint64_t checksum2{model.trendModel().checksum()}; BOOST_REQUIRE_EQUAL(checksum1, checksum2); checksum1 = prior.checksum(); checksum2 = model.residualModel().checksum(); BOOST_REQUIRE_EQUAL(checksum1, checksum2); time += bucketLength; } } LOG_DEBUG(<< "Decay rate control multivariate"); { auto params = modelParams(bucketLength); TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 24.0 * DECAY_RATE, bucketLength}}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); maths::time_series::CMultivariateTimeSeriesModel model{params, *trends[0], prior, &controllers}; TDoubleVecVec samples; { TDoubleVec mean{10.0, 15.0, 11.0}; TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); } TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (auto& sample : samples) { TDouble10Vec1Vec detrended{TDouble10Vec(3)}; TDouble1Vec mean(3); double amplitude{10.0}; for (double& dimension : sample) { dimension += 30.0 + amplitude * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0) + (time / bucketLength > 1800 ? 10.0 : 0.0); amplitude += 4.0; } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool hasTrend{false}; bool reinitialize{false}; for (std::size_t i = 0; i < sample.size(); ++i) { trends[i]->addPoint(time, sample[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&reinitialize](TFloatMeanAccumulatorVec) { reinitialize = true; }); detrended[0][i] = trends[i]->detrend(time, sample[i], 0.0, false); mean[i] = trends[i]->meanValue(time); hasTrend |= true; } if (reinitialize) { reinitializeResidualModel(params.learnRate(), trends, prior, &controllers); } prior.addSamples(detrended, maths_t::CUnitWeights::singleUnit<TDouble10Vec>(3)); prior.propagateForwardsByTime(1.0); if (hasTrend) { double multiplier{controllers[0].multiplier( mean, {detrended[0]}, bucketLength, model.params().learnRate(), DECAY_RATE)}; for (const auto& trend : trends) { trend->decayRate(multiplier * trend->decayRate()); } } if (prior.numberSamples() > 20.0) { TDouble1Vec prediction(prior.marginalLikelihoodMean()); TDouble1Vec predictionError(3); for (std::size_t d = 0; d < 3; ++d) { predictionError[d] = detrended[0][d] - prediction[d]; } double multiplier{controllers[1].multiplier( prediction, {predictionError}, bucketLength, model.params().learnRate(), DECAY_RATE)}; prior.decayRate(multiplier * prior.decayRate()); } for (std::size_t i = 0; i < trends.size(); ++i) { std::uint64_t checksum1{trends[i]->checksum()}; std::uint64_t checksum2{model.trendModel()[i]->checksum()}; BOOST_REQUIRE_EQUAL(checksum1, checksum2); } std::uint64_t checksum1{prior.checksum()}; std::uint64_t checksum2{model.residualModel().checksum()}; BOOST_REQUIRE_EQUAL(checksum1, checksum2); time += bucketLength; } } } BOOST_AUTO_TEST_CASE(testPredict) { // Test prediction with a trend and with multimodal data. core_t::TTime bucketLength{600}; test::CRandomNumbers rng; LOG_DEBUG(<< "Univariate seasonal"); { auto params = modelParams(bucketLength); maths::time_series::CTimeSeriesDecomposition trend{48.0 * DECAY_RATE, bucketLength}; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{params, 0, trend, prior}; TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { sample += 10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0); model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); trend.addPoint(time, sample, core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, makeComponentDetectedCallback(params.learnRate(), prior)); prior.addSamples({trend.detrend(time, sample, 0.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT); prior.propagateForwardsByTime(1.0); time += bucketLength; } TMeanAccumulator meanError; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { double trend_{10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time_) / 86400.0)}; double expected{trend.value(time_, 0.0, false).mean() + maths::common::CBasicStatistics::mean( prior.marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[0]}; LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted << " (trend = " << trend_ << ")"); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, predicted, 1e-3 * std::fabs(expected)); meanError.add(std::fabs(trend_ - predicted) / 10.0); } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(meanError)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.07); } LOG_DEBUG(<< "Univariate nearest mode"); { maths::time_series::CTimeSeriesDecompositionStub trend; maths::common::CMultimodalPrior prior{univariateMultimodal()}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, prior}; TMeanAccumulator modes[2]; TDoubleVec samples, samples_; rng.generateNormalSamples(0.0, 4.0, 500, samples); rng.generateNormalSamples(10.0, 4.0, 500, samples_); modes[0].add(samples); modes[1].add(samples_); samples.insert(samples.end(), samples_.begin(), samples_.end()); rng.random_shuffle(samples.begin(), samples.end()); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } maths::common::CModel::TSizeDoublePr1Vec empty; double predicted[]{model.predict(time, empty, {-2.0})[0], model.predict(time, empty, {12.0})[0]}; LOG_DEBUG(<< "expected(0) = " << maths::common::CBasicStatistics::mean(modes[0]) << " actual(0) = " << predicted[0]); LOG_DEBUG(<< "expected(1) = " << maths::common::CBasicStatistics::mean(modes[1]) << " actual(1) = " << predicted[1]); BOOST_REQUIRE_CLOSE_ABSOLUTE( maths::common::CBasicStatistics::mean(modes[0]), predicted[0], 0.1 * maths::common::CBasicStatistics::mean(modes[0])); BOOST_REQUIRE_CLOSE_ABSOLUTE( maths::common::CBasicStatistics::mean(modes[1]), predicted[1], 0.01 * maths::common::CBasicStatistics::mean(modes[1])); } LOG_DEBUG(<< "Multivariate Seasonal"); { auto params = modelParams(bucketLength); TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 48.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 48.0 * DECAY_RATE, bucketLength}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecomposition{ 48.0 * DECAY_RATE, bucketLength}}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ maths::time_series::CMultivariateTimeSeriesModel{params, *trends[0], prior}}; TDoubleVecVec samples; TDoubleVec mean{0.0, 2.0, 1.0}; rng.generateMultivariateNormalSamples( mean, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { coordinate += 10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0); } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); bool reinitialize{false}; TDouble10Vec detrended; for (std::size_t i = 0; i < sample.size(); ++i) { trends[i]->addPoint(time, sample[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&reinitialize](TFloatMeanAccumulatorVec) { reinitialize = true; }); detrended.push_back(trends[i]->detrend(time, sample[i], 0.0, false)); } if (reinitialize) { reinitializeResidualModel(params.learnRate(), trends, prior); } prior.addSamples({detrended}, maths_t::CUnitWeights::singleUnit<TDouble10Vec>(3)); prior.propagateForwardsByTime(1.0); time += bucketLength; } TMeanAccumulator meanError; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { maths::common::CMultivariatePrior::TSize10Vec marginalize{1, 2}; maths::common::CMultivariatePrior::TSizeDoublePr10Vec condition; for (std::size_t i = 0; i < mean.size(); ++i) { double trend_{mean[i] + 10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time_) / 86400.0)}; maths::common::CMultivariatePrior::TUnivariatePriorPtr margin{ prior.univariate(marginalize, condition).first}; double expected{trends[i]->value(time_, 0.0, false).mean() + maths::common::CBasicStatistics::mean( margin->marginalLikelihoodConfidenceInterval(0.0))}; double predicted{model.predict(time_)[i]}; --marginalize[std::min(i, marginalize.size() - 1)]; LOG_DEBUG(<< "expected = " << expected << " predicted = " << predicted << " (trend = " << trend_ << ")"); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected, predicted, 1e-3 * expected); meanError.add(std::fabs(trend_ - predicted) / 10.0); } } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(meanError)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.07); } LOG_DEBUG(<< "Multivariate nearest mode"); { TDecompositionPtr10Vec trends{ TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}, TDecompositionPtr{new maths::time_series::CTimeSeriesDecompositionStub{}}}; maths::common::CMultivariateMultimodalPrior<3> prior{multivariateMultimodal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), *trends[0], prior}; TMeanAccumulator2Vec modes[2]{TMeanAccumulator2Vec(3), TMeanAccumulator2Vec(3)}; TDoubleVecVec samples; { TDoubleVec means[]{{0.0, 2.0, 1.0}, {10.0, 15.0, 12.0}}; TDoubleVecVec covariance{{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; rng.generateMultivariateNormalSamples(means[0], covariance, 500, samples); TDoubleVecVec samples_; rng.generateMultivariateNormalSamples(means[1], covariance, 500, samples_); for (const auto& sample : samples) { for (std::size_t i = 0; i < 3; ++i) { modes[0][i].add(sample[i]); } } for (const auto& sample : samples_) { for (std::size_t i = 0; i < 3; ++i) { modes[1][i].add(sample[i]); } } samples.insert(samples.end(), samples_.begin(), samples_.end()); rng.random_shuffle(samples.begin(), samples.end()); } TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } maths::common::CModel::TSizeDoublePr1Vec empty; TDouble2Vec expected[]{maths::common::CBasicStatistics::mean(modes[0]), maths::common::CBasicStatistics::mean(modes[1])}; TDouble2Vec predicted[]{model.predict(time, empty, {0.0, 0.0, 0.0}), model.predict(time, empty, {10.0, 10.0, 10.0})}; for (std::size_t i = 0; i < 3; ++i) { LOG_DEBUG(<< "expected(0) = " << expected[0][i] << " actual(0) = " << predicted[0][i]); LOG_DEBUG(<< "expected(1) = " << expected[1][i] << " actual(1) = " << predicted[1][i]); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected[0][i], predicted[0][i], std::fabs(0.2 * expected[0][i])); BOOST_REQUIRE_CLOSE_ABSOLUTE(expected[1][i], predicted[1][i], std::fabs(0.01 * expected[1][i])); } } } BOOST_AUTO_TEST_CASE(testProbability) { // Test: 1) The different the calculation matches the expected values // given the trend and decomposition for different calculations, // seasonal confidence intervals, weights and so on. // 2) Test the calculation with and without trend. // 3) Test manually injected anomalies have low probabilities. using TDoubleSizePr = std::pair<double, std::size_t>; test::CRandomNumbers rng; core_t::TTime bucketLength{600}; LOG_DEBUG(<< "Univariate"); { maths::time_series::CUnivariateTimeSeriesModel model0{ modelParams(bucketLength), 1, // id maths::time_series::CTimeSeriesDecompositionStub{}, univariateNormal(), nullptr, // no decay rate control nullptr, // no multi-bucket false}; maths::time_series::CUnivariateTimeSeriesModel model1{ modelParams(bucketLength), 1, // id maths::time_series::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, univariateNormal(), nullptr, // no decay rate control nullptr, // no multi-bucket false}; TDoubleVec samples; rng.generateNormalSamples(10.0, 4.0, 1000, samples); core_t::TTime time{0}; { const TDouble2VecWeightsAryVec weight{ maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; for (auto sample : samples) { double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0)}; model0.addSamples(addSampleParams(weight), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); model1.addSamples( addSampleParams(weight), {core::make_triple(time, TDouble2Vec{trend + sample}, TAG)}); time += bucketLength; } } TTime2Vec1Vec time_{{time}}; TDouble2Vec sample{15.0}; maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; TDouble2VecWeightsAryVec weights(2, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); maths_t::setCountVarianceScale(TDouble2Vec{0.9}, weights[0]); maths_t::setCountVarianceScale(TDouble2Vec{1.1}, weights[1]); maths_t::setSeasonalVarianceScale(TDouble2Vec{1.8}, weights[1]); for (auto calculation : calculations) { LOG_DEBUG(<< "calculation = " << calculation); for (auto confidence : confidences) { LOG_DEBUG(<< " confidence = " << confidence); for (const auto& weight : weights) { LOG_DEBUG(<< " weights = " << weight); double expectedProbability[2]; maths_t::ETail expectedTail[2]; { maths_t::TDoubleWeightsAry weight_(maths_t::CUnitWeights::UNIT); for (std::size_t i = 0; i < weight.size(); ++i) { weight_[i] = weight[i][0]; } double lb[2]; double ub[2]; model0.residualModel().probabilityOfLessLikelySamples( calculation, sample, {weight_}, lb[0], ub[0], expectedTail[0]); model1.residualModel().probabilityOfLessLikelySamples( calculation, {model1.trendModel().detrend(time, sample[0], confidence, false)}, {weight_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } maths::common::SModelProbabilityResult results[2]; { maths::common::CModelProbabilityParams params; params.addCalculation(calculation) .seasonalConfidenceInterval(confidence) .addWeights(weight) .useMultibucketFeatures(false); model0.probability(params, time_, {sample}, results[0]); model1.probability(params, time_, {sample}, results[1]); } BOOST_REQUIRE_EQUAL(expectedProbability[0], results[0].s_Probability); BOOST_REQUIRE_EQUAL(expectedTail[0], results[0].s_Tail[0]); BOOST_REQUIRE_EQUAL(expectedProbability[1], results[1].s_Probability); BOOST_REQUIRE_EQUAL(expectedTail[1], results[1].s_Tail[0]); } } } } LOG_DEBUG(<< "Multivariate"); { maths::time_series::CMultivariateTimeSeriesModel model0{ modelParams(bucketLength), maths::time_series::CTimeSeriesDecompositionStub{}, multivariateNormal(), nullptr, nullptr, false}; maths::time_series::CMultivariateTimeSeriesModel model1{ modelParams(bucketLength), maths::time_series::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, multivariateNormal(), nullptr, nullptr, false}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples( {10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); core_t::TTime time{0}; { TDouble2VecWeightsAryVec weight{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; for (auto& sample : samples) { TDouble2Vec sample_(sample); model0.addSamples(addSampleParams(weight), {core::make_triple(time, sample_, TAG)}); double trend{5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0)}; for (auto& component : sample_) { component += trend; } model1.addSamples(addSampleParams(weight), {core::make_triple(time, sample_, TAG)}); time += bucketLength; } } TTime2Vec1Vec time_{{time}}; TDouble2Vec sample{15.0, 14.0, 16.0}; maths_t::EProbabilityCalculation calculations[]{maths_t::E_TwoSided, maths_t::E_OneSidedAbove}; double confidences[]{0.0, 20.0, 50.0}; TDouble2VecWeightsAryVec weights(2, maths_t::CUnitWeights::unit<TDouble2Vec>(3)); maths_t::setCountVarianceScale(TDouble2Vec{0.9, 0.9, 0.8}, weights[0]); maths_t::setCountVarianceScale(TDouble2Vec{1.1, 1.0, 1.2}, weights[1]); maths_t::setSeasonalVarianceScale(TDouble2Vec{1.8, 1.7, 1.6}, weights[1]); for (auto calculation : calculations) { LOG_DEBUG(<< "calculation = " << calculation); for (auto confidence : confidences) { LOG_DEBUG(<< " confidence = " << confidence); for (const auto& weight : weights) { LOG_DEBUG(<< " weights = " << weight); double expectedProbability[2]; TTail10Vec expectedTail[2]; { maths_t::TDouble10VecWeightsAry weight_( maths_t::CUnitWeights::unit<TDouble10Vec>(3)); for (std::size_t i = 0; i < weight.size(); ++i) { weight_[i] = weight[i]; } double lb[2]; double ub[2]; model0.residualModel().probabilityOfLessLikelySamples( calculation, {TDouble10Vec(sample)}, {weight_}, lb[0], ub[0], expectedTail[0]); TDouble10Vec detrended; for (std::size_t j = 0; j < sample.size(); ++j) { detrended.push_back(model1.trendModel()[j]->detrend( time, sample[j], confidence, false)); } model1.residualModel().probabilityOfLessLikelySamples( calculation, {detrended}, {weight_}, lb[1], ub[1], expectedTail[1]); expectedProbability[0] = (lb[0] + ub[0]) / 2.0; expectedProbability[1] = (lb[1] + ub[1]) / 2.0; } maths::common::SModelProbabilityResult results[2]; { maths::common::CModelProbabilityParams params; params.addCalculation(calculation) .seasonalConfidenceInterval(confidence) .addWeights(weight) .useMultibucketFeatures(false); model0.probability(params, time_, {sample}, results[0]); model1.probability(params, time_, {sample}, results[1]); } BOOST_REQUIRE_EQUAL(expectedProbability[0], results[0].s_Probability); BOOST_REQUIRE_EQUAL(expectedProbability[1], results[1].s_Probability); for (std::size_t j = 0; j < 3; ++j) { BOOST_REQUIRE_EQUAL(expectedTail[0][j], results[0].s_Tail[j]); BOOST_REQUIRE_EQUAL(expectedTail[1][j], results[1].s_Tail[j]); } } } } } LOG_DEBUG(<< "Anomalies"); { maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 1, trend, univariateNormal()}; TSizeVec anomalies; rng.generateUniformSamples(100, 1000, 10, anomalies); std::sort(anomalies.begin(), anomalies.end()); TDoubleVec samples; rng.generateNormalSamples(10.0, 4.0, 1000, samples); maths::common::CBasicStatistics::COrderStatisticsHeap<TDoubleSizePr> smallest(10); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; std::size_t bucket{0}; core_t::TTime time{0}; for (auto sample : samples) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { sample += 18.0; } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); maths::common::SModelProbabilityResult result; model.probability(computeProbabilityParams(weights[0]), {{time}}, {{sample}}, result); smallest.add({result.s_Probability, bucket - 1}); time += bucketLength; } TSizeVec anomalies_; std::transform(smallest.begin(), smallest.end(), std::back_inserter(anomalies_), [](const TDoubleSizePr& value) { return value.second; }); std::sort(anomalies_.begin(), anomalies_.end()); LOG_DEBUG(<< "expected anomalies = " << anomalies); LOG_DEBUG(<< "actual anomalies = " << anomalies_); BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(anomalies), core::CContainerPrinter::print(anomalies_)); } } BOOST_AUTO_TEST_CASE(testWeights) { // Check that the seasonal weight matches the value we expect given // 1) the trend and residual model // 2) the variation in the input data // // And that the outlier weight is monotonic decreasing with // increasing distance from the expected value. core_t::TTime bucketLength{1800}; test::CRandomNumbers rng; LOG_DEBUG(<< "Univariate"); { maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, prior}; TDoubleVec samples; rng.generateNormalSamples(0.0, 4.0, 1008, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0)}; sample = scale * (1.0 + 0.1 * sample); model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } LOG_DEBUG(<< "Seasonal"); TMeanAccumulator error; TDouble2Vec scale; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { double dataScale{std::pow( 1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time_) / 86400.0), 2.0)}; double expectedScale{model.trendModel().varianceScaleWeight( time_, model.residualModel().marginalLikelihoodVariance(), 0.0)(1)}; model.seasonalWeight(0.0, time_, scale); LOG_DEBUG(<< "expected weight = " << expectedScale << ", scale = " << scale << " (data weight = " << dataScale << ")"); BOOST_REQUIRE_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), scale[0]); error.add(std::fabs(scale[0] - dataScale) / dataScale); } LOG_DEBUG(<< "error = " << maths::common::CBasicStatistics::mean(error)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(error) < 0.26); LOG_DEBUG(<< "Outlier reweighting"); TDouble2Vec prediction(model.predict(time)); TDouble2VecWeightsAry trendWeights; TDouble2VecWeightsAry residualWeights; double lastTrendOutlierWeight{1.0}; double lastResidualOutlierWeight{1.0}; for (std::size_t i = 0; i < 10; ++i) { model.countWeights(time, prediction, 1.0, 1.0, 1.0, 1.0, trendWeights, residualWeights); double trendOutlierWeight{maths_t::outlierWeight(trendWeights)[0]}; double residualOutlierWeight{maths_t::outlierWeight(residualWeights)[0]}; LOG_DEBUG(<< "trend weight = " << trendOutlierWeight << ", residual weight = " << residualOutlierWeight); BOOST_TEST_REQUIRE(trendOutlierWeight <= lastTrendOutlierWeight); BOOST_TEST_REQUIRE(residualOutlierWeight <= lastResidualOutlierWeight); lastTrendOutlierWeight = trendOutlierWeight; lastResidualOutlierWeight = residualOutlierWeight; prediction[0] *= 1.1; } } LOG_DEBUG(<< "Multivariate"); { maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), trend, prior}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples( {10.0, 15.0, 11.0}, {{3.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1008, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (auto& sample : samples) { double scale{10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0)}; for (auto& component : sample) { component = scale * (1.0 + 0.1 * component); } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } LOG_DEBUG(<< "Seasonal"); TMeanAccumulator error; TDouble2Vec scale; for (core_t::TTime time_ = time; time_ < time + 86400; time_ += 3600) { double dataScale{std::pow( 1.0 + 0.5 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time_) / 86400.0), 2.0)}; model.seasonalWeight(0.0, time_, scale); for (std::size_t i = 0; i < 3; ++i) { double expectedScale{model.trendModel()[i]->varianceScaleWeight( time_, model.residualModel().marginalLikelihoodVariances()[i], 0.0)(1)}; LOG_DEBUG(<< "expected weight = " << expectedScale << ", weight = " << scale << " (data weight = " << dataScale << ")"); BOOST_REQUIRE_EQUAL(std::max(expectedScale, MINIMUM_SEASONAL_SCALE), scale[i]); error.add(std::fabs(scale[i] - dataScale) / dataScale); } } LOG_DEBUG(<< "error = " << maths::common::CBasicStatistics::mean(error)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(error) < 0.3); LOG_DEBUG(<< "Outlier reweighting"); TDouble2Vec prediction(model.predict(time)); TDouble2VecWeightsAry trendWeights; TDouble2VecWeightsAry residualWeights; double lastTrendOutlierWeight{1.0}; double lastResidualOutlierWeight{1.0}; for (std::size_t i = 0; i < 10; ++i) { model.countWeights(time, prediction, 1.0, 1.0, 1.0, 1.0, trendWeights, residualWeights); double trendOutlierWeight{maths_t::outlierWeight(trendWeights)[0]}; double residualOutlierWeight{maths_t::outlierWeight(residualWeights)[0]}; LOG_DEBUG(<< "trend weight = " << trendOutlierWeight << ", residual weight = " << residualOutlierWeight); BOOST_TEST_REQUIRE(trendOutlierWeight <= lastTrendOutlierWeight); BOOST_TEST_REQUIRE(residualOutlierWeight <= lastResidualOutlierWeight); lastTrendOutlierWeight = trendOutlierWeight; lastResidualOutlierWeight = residualOutlierWeight; prediction[0] *= 1.1; } } } BOOST_AUTO_TEST_CASE(testMemoryUsage) { // Test we account for the appropriate memory. core_t::TTime bucketLength{600}; test::CRandomNumbers rng; LOG_DEBUG(<< "Univariate"); { maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); std::unique_ptr<maths::common::CModel> model{new maths::time_series::CUnivariateTimeSeriesModel{ modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { sample += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0); trend.addPoint(time, sample); model->addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } std::size_t expectedSize{ sizeof(maths::time_series::CTimeSeriesDecomposition) + trend.memoryUsage() + 3 * sizeof(maths::common::CNormalMeanPrecConjugate) + sizeof(maths::time_series::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + 2 * controllers[0].memoryUsage() + 16 * 12 /*Recent samples*/}; std::size_t size{model->memoryUsage()}; LOG_DEBUG(<< "size " << size << " expected " << expectedSize); BOOST_TEST_REQUIRE(static_cast<double>(size) < 1.1 * static_cast<double>(expectedSize)); } LOG_DEBUG(<< "Multivariate"); { TDoubleVec mean{11.0, 10.0, 12.0}; TDoubleVecVec covariance{{4.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples(mean, covariance, 1000, samples); maths::time_series::CTimeSeriesDecomposition trend[]{ maths::time_series::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, maths::time_series::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}, maths::time_series::CTimeSeriesDecomposition{24.0 * DECAY_RATE, bucketLength}}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); auto model = std::make_unique<maths::time_series::CMultivariateTimeSeriesModel>( modelParams(bucketLength), trend[0], prior, &controllers); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (auto& sample : samples) { for (std::size_t i = 0; i < sample.size(); ++i) { sample[i] += 10.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 86400.0); trend[i].addPoint(time, sample[i]); } model->addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } std::size_t expectedSize{ 3 * sizeof(maths::time_series::CTimeSeriesDecomposition) + trend[0].memoryUsage() + trend[1].memoryUsage() + trend[2].memoryUsage() + 2 * sizeof(maths::common::CMultivariateNormalConjugate<3>) + sizeof(maths::time_series::CUnivariateTimeSeriesModel::TDecayRateController2Ary) + 2 * controllers[0].memoryUsage() + 32 * 12 /*Recent samples*/}; std::size_t size{model->memoryUsage()}; LOG_DEBUG(<< "size " << size << " expected " << expectedSize); BOOST_TEST_REQUIRE(static_cast<double>(size) < 1.2 * static_cast<double>(expectedSize)); } // TODO LOG_DEBUG(<< "Correlates"); } BOOST_AUTO_TEST_CASE(testPersist) { // Test the restored model checksum matches the persisted model. core_t::TTime bucketLength{600}; maths::common::CModelParams params{modelParams(bucketLength)}; test::CRandomNumbers rng; LOG_DEBUG(<< "Univariate"); { maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel origModel{ params, 1, trend, univariateNormal(), &controllers}; TDoubleVec samples; rng.generateNormalSamples(1.0, 4.0, 1000, samples); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { origModel.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); time += bucketLength; } std::ostringstream origJson; core::CJsonStatePersistInserter::persist( origJson, std::bind_front(&maths::time_series::CUnivariateTimeSeriesModel::acceptPersistInserter, &origModel)); LOG_TRACE(<< "model JSON representation:\n" << origJson.str()); LOG_DEBUG(<< "model JSON size: " << origJson.str().size()); // Restore the JSON into a new time series model. std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"}; core::CJsonStateRestoreTraverser traverser(origJsonStrm); maths::common::SDistributionRestoreParams distributionParams{ maths_t::E_ContinuousData, DECAY_RATE}; maths::common::STimeSeriesDecompositionRestoreParams decompositionParams{ 24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::common::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::time_series::CUnivariateTimeSeriesModel restoredModel{restoreParams, traverser}; BOOST_REQUIRE_EQUAL(origModel.checksum(), restoredModel.checksum()); } LOG_DEBUG(<< "Multivariate"); { TDoubleVecVec samples; rng.generateMultivariateNormalSamples( {11.0, 10.0, 12.0}, {{4.0, 2.9, 0.5}, {2.9, 2.6, 0.1}, {0.5, 0.1, 2.0}}, 1000, samples); maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; auto controllers = decayRateControllers(3); maths::time_series::CMultivariateTimeSeriesModel origModel{ modelParams(bucketLength), trend, prior, &controllers}; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (const auto& sample : samples) { origModel.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); time += bucketLength; } std::ostringstream origJson; core::CJsonStatePersistInserter::persist( origJson, std::bind_front(&maths::time_series::CMultivariateTimeSeriesModel::acceptPersistInserter, &origModel)); LOG_TRACE(<< "model JSON representation:\n" << origJson.str()); LOG_DEBUG(<< "model JSON size: " << origJson.str().size()); // Restore the JSON into a new time series model. std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"}; core::CJsonStateRestoreTraverser traverser(origJsonStrm); maths::common::SDistributionRestoreParams distributionParams{ maths_t::E_ContinuousData, DECAY_RATE}; maths::common::STimeSeriesDecompositionRestoreParams decompositionParams{ 24.0 * DECAY_RATE, bucketLength, distributionParams}; maths::common::SModelRestoreParams restoreParams{params, decompositionParams, distributionParams}; maths::time_series::CMultivariateTimeSeriesModel restoredModel{restoreParams, traverser}; BOOST_REQUIRE_EQUAL(origModel.checksum(), restoredModel.checksum()); } // TODO LOG_DEBUG(<< "Correlates"); } BOOST_AUTO_TEST_CASE(testAddSamplesWithCorrelations) { LOG_DEBUG(<< "Correlations no trend"); core_t::TTime bucketLength{600}; test::CRandomNumbers rng; { TDoubleVecVec samples; rng.generateMultivariateNormalSamples({10.0, 15.0}, {{3.0, 2.9}, {2.9, 2.6}}, 1000, samples); maths::time_series::CTimeSeriesDecomposition trend{DECAY_RATE, bucketLength}; maths::time_series::CTimeSeriesCorrelations correlations{ MINIMUM_SIGNIFICANT_CORRELATION, DECAY_RATE}; maths::common::CNormalMeanPrecConjugate prior{univariateNormal()}; maths::time_series::CUnivariateTimeSeriesModel model0{ modelParams(bucketLength), 0, trend, prior, nullptr}; maths::time_series::CUnivariateTimeSeriesModel model1{ modelParams(bucketLength), 1, trend, prior, nullptr}; model0.modelCorrelations(correlations); model1.modelCorrelations(correlations); CTimeSeriesCorrelateModelAllocator allocator; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (auto sample : samples) { correlations.refresh(allocator); model0.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[0]}, TAG)}); model1.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample[1]}, TAG)}); correlations.processSamples(); time += bucketLength; } BOOST_TEST_REQUIRE(correlations.correlationModels().size() > 0); } // TODO LOG_DEBUG(<< "Correlations trend"); // TODO LOG_DEBUG(<< "Correlations with tags (for population)"); } BOOST_AUTO_TEST_CASE(testAnomalyModel) { // We test we can find the "odd anomaly out". using TDoubleSizePr = std::pair<double, std::size_t>; test::CRandomNumbers rng; std::size_t length = 2000; LOG_DEBUG(<< "Univariate"); { TSizeVec anomalies; rng.generateUniformSamples(0, length, 30, anomalies); std::sort(anomalies.begin(), anomalies.end()); TDoubleVec samples; rng.generateNormalSamples(10.0, 2.0, length, samples); core_t::TTime bucketLength{600}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 1, trend, univariateNormal()}; //std::ofstream file; //file.open("results.m"); //TDoubleVec scores; maths::common::CBasicStatistics::COrderStatisticsHeap<TDoubleSizePr> mostAnomalous(10); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; std::size_t bucket{0}; core_t::TTime time{0}; for (auto& sample : samples) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket++)) { sample += 12.0; } if (bucket >= length - 100 && bucket < length - 92) { sample += 8.0; } model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); maths::common::SModelProbabilityResult result; model.probability(computeProbabilityParams(weights[0]), {{time}}, {{sample}}, result); mostAnomalous.add({std::log(result.s_Probability), bucket}); //scores.push_back(maths::common::CTools::anomalyScore(probability)); time += bucketLength; } mostAnomalous.sort(); TSizeVec anomalyBuckets; TDoubleVec anomalyProbabilities; for (const auto& anomaly : mostAnomalous) { anomalyBuckets.push_back(anomaly.second); anomalyProbabilities.push_back(std::exp(anomaly.first)); } LOG_DEBUG(<< "anomalies = " << anomalyBuckets); LOG_DEBUG(<< "probabilities = " << anomalyProbabilities); BOOST_TEST_REQUIRE(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1905) != anomalyBuckets.end()); BOOST_TEST_REQUIRE(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1906) != anomalyBuckets.end()); BOOST_TEST_REQUIRE(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1907) != anomalyBuckets.end()); //file << "v = " << samples << ";\n"; //file << "s = " << scores << ";\n"; //file << "hold on;"; //file << "subplot(2,1,1);\n"; //file << "plot([1:length(v)], v);\n"; //file << "subplot(2,1,2);\n"; //file << "plot([1:length(s)], s, 'r');\n"; } LOG_DEBUG(<< "Multivariate"); { TSizeVec anomalies; rng.generateUniformSamples(0, length, 30, anomalies); std::sort(anomalies.begin(), anomalies.end()); core_t::TTime bucketLength{600}; TDoubleVecVec samples; rng.generateMultivariateNormalSamples( {10.0, 10.0, 10.0}, {{4.0, 0.9, 0.5}, {0.9, 2.6, 0.1}, {0.5, 0.1, 3.0}}, length, samples); maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), trend, prior}; //std::ofstream file; //file.open("results.m"); //TDoubleVec scores; maths::common::CBasicStatistics::COrderStatisticsHeap<TDoubleSizePr> mostAnomalous(10); TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; std::size_t bucket{0}; for (auto& sample : samples) { for (auto& coordinate : sample) { if (std::binary_search(anomalies.begin(), anomalies.end(), bucket)) { coordinate += 12.0; } if (bucket >= length - 100 && bucket < length - 92) { coordinate += 8.0; } } ++bucket; model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); maths::common::SModelProbabilityResult result; model.probability(computeProbabilityParams(weights[0]), {{time}}, {(sample)}, result); mostAnomalous.add({std::log(result.s_Probability), bucket}); //scores.push_back(maths::common::CTools::anomalyScore(probability)); time += bucketLength; } mostAnomalous.sort(); TSizeVec anomalyBuckets; TDoubleVec anomalyProbabilities; for (const auto& anomaly : mostAnomalous) { anomalyBuckets.push_back(anomaly.second); anomalyProbabilities.push_back(std::exp(anomaly.first)); } LOG_DEBUG(<< "anomalies = " << anomalyBuckets); LOG_DEBUG(<< "probabilities = " << anomalyProbabilities); BOOST_TEST_REQUIRE(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1906) != anomalyBuckets.end()); BOOST_TEST_REQUIRE(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1907) != anomalyBuckets.end()); BOOST_TEST_REQUIRE(std::find(anomalyBuckets.begin(), anomalyBuckets.end(), 1908) != anomalyBuckets.end()); //file << "v = ["; //for (const auto &sample : samples) //{ // file << sample[0] << "," << sample[1] << "," << sample[2] << "\n"; //} //file << "];\n"; //file << "s = " << scores << ";\n"; //file << "hold on;"; //file << "subplot(4,1,1);\n"; //file << "plot([1:rows(v)], v(:,1));\n"; //file << "subplot(4,1,2);\n"; //file << "plot([1:rows(v)], v(:,2));\n"; //file << "subplot(4,1,3);\n"; //file << "plot([1:rows(v)], v(:,3));\n"; //file << "subplot(4,1,4);\n"; //file << "plot([1:length(s)], s, 'r');\n"; } } BOOST_AUTO_TEST_CASE(testStepChangeDiscontinuities) { // Test reinitialization of the residual model after detecting a // step change. // // Check detection and modelling of step changes in data with // 1) Piecewise constant, // 2) Saw tooth. using TDouble3Vec = core::CSmallVector<double, 3>; using TDouble3VecVec = std::vector<TDouble3Vec>; TDouble2VecWeightsAryVec trendWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; TDouble2VecWeightsAryVec residualWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::time_series::CUnivariateTimeSeriesModel& model) { model.countWeights(time, {value}, 1.0, 1.0, 0.0, 1.0, trendWeights[0], residualWeights[0]); model.addSamples(addSampleParams(1.0, trendWeights, residualWeights), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; test::CRandomNumbers rng; LOG_DEBUG(<< "Univariate: Residual Model Reinitialization"); { core_t::TTime bucketLength{600}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; CDebugGenerator debug("prior_reinitialization.py"); core_t::TTime time{0}; TDoubleVec samples; for (auto level : {20.0, 40.0}) { for (std::size_t i = 0; i < 200; ++i) { updateModel(time, level, model); debug.addValueAndPrediction(time, level, model); time += bucketLength; } } auto confidenceInterval = model.confidenceInterval( time, 50.0, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); LOG_DEBUG(<< "confidence interval = " << confidenceInterval); BOOST_TEST_REQUIRE(std::fabs(confidenceInterval[1][0] - 40.0) < 1.0); BOOST_TEST_REQUIRE(confidenceInterval[2][0] - confidenceInterval[0][0] < 4.0); } LOG_DEBUG(<< "Univariate: Piecewise Constant"); { core_t::TTime bucketLength{1800}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; CDebugGenerator debug("piecewise_constant.py"); // Add some data to the model. core_t::TTime time{0}; TDoubleVec samples; double level{20.0}; for (auto dl : {10.0, 20.0, 25.0, 50.0, 30.0, 40.0, 15.0, 40.0, 25.0}) { level += dl; rng.generateNormalSamples( level, 2.0, 300 + static_cast<std::size_t>(2.0 * dl), samples); for (auto sample : samples) { updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } } level += 30.0; rng.generateNormalSamples(level, 2.0, 100, samples); for (auto sample : samples) { updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } // Generate expected values from the same process. TDoubleVec expected; rng.generateNormalSamples(level, 2.0, 260, expected); for (auto dl : {25.0, 40.0}) { level += dl; rng.generateNormalSamples( level, 2.0, 300 + static_cast<std::size_t>(2.0 * dl), samples); expected.insert(expected.end(), samples.begin(), samples.end()); } std::for_each(expected.begin(), expected.end(), [&debug](double sample) { debug.addValue(sample); }); // Test forecasting. TDouble3VecVec forecast; auto pushErrorBar = [&](const maths::common::SErrorBar& errorBar) { forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); debug.addForecast(errorBar); }; std::string m; model.forecast(0, time, time, time + expected.size() * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); double outOfBounds{0.0}; for (std::size_t i = 0; i < forecast.size(); ++i) { BOOST_REQUIRE_CLOSE_ABSOLUTE(expected[i], forecast[i][1], 0.1 * expected[i]); outOfBounds += static_cast<double>(expected[i] < forecast[i][0] || expected[i] > forecast[i][2]); } double percentageOutOfBounds{100.0 * outOfBounds / static_cast<double>(forecast.size())}; LOG_DEBUG(<< "% out-of-bounds = " << percentageOutOfBounds); BOOST_TEST_REQUIRE(percentageOutOfBounds < 2.0); } LOG_DEBUG(<< "Univariate: Saw Tooth"); { core_t::TTime bucketLength{1800}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(), &controllers}; CDebugGenerator debug("saw_tooth.py"); // Add some data to the model. core_t::TTime time{0}; double value{10.0}; TDoubleVec noise; for (auto slope : {0.08, 0.056, 0.028, 0.044, 0.06, 0.03}) { value = 5.0; while (value < 95.0) { rng.generateNormalSamples(0.0, 2.0, 1, noise); updateModel(time, value + noise[0], model); debug.addValueAndPrediction(time, value + noise[0], model); time += bucketLength; value += slope; } } for (auto slope : {0.042}) { value = 5.0; for (std::size_t i = 0; i < 1500; ++i) { rng.generateNormalSamples(0.0, 2.0, 1, noise); updateModel(time, value + noise[0], model); debug.addValueAndPrediction(time, value + noise[0], model); time += bucketLength; value += slope; } } // Generate expected values from the same process. TDoubleVec expected; for (auto slope : {0.05, 0.04}) { while (expected.size() < 2000 && value < 95.0) { rng.generateNormalSamples(0.0, 2.0, 1, noise); expected.push_back(value + noise[0]); debug.addValue(value + noise[0]); value += slope; } value = 5.0; } // Test forecasting. TDouble3VecVec forecast; auto pushErrorBar = [&](const maths::common::SErrorBar& errorBar) { forecast.push_back({errorBar.s_LowerBound, errorBar.s_Predicted, errorBar.s_UpperBound}); debug.addForecast(errorBar); }; std::string m; model.forecast(0, time, time, time + expected.size() * bucketLength, 90.0, {-1000.0}, {1000.0}, pushErrorBar, m); double outOfBounds{0.0}; for (std::size_t i = 0; i < forecast.size(); ++i) { outOfBounds += static_cast<double>(expected[i] < forecast[i][0] || expected[i] > forecast[i][2]); } double percentageOutOfBounds{100.0 * outOfBounds / static_cast<double>(forecast.size())}; LOG_DEBUG(<< "% out-of-bounds = " << percentageOutOfBounds); BOOST_TEST_REQUIRE(percentageOutOfBounds < 6.0); } } BOOST_AUTO_TEST_CASE(testLargeAnomalyAfterChange) { // Check large outliers directly after a change do not pollute the model. TDouble2VecWeightsAryVec trendWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; TDouble2VecWeightsAryVec residualWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::time_series::CUnivariateTimeSeriesModel& model) { model.countWeights(time, {value}, 1.0, 1.0, 0.0, 1.0, trendWeights[0], residualWeights[0]); model.addSamples(addSampleParams(1.0, trendWeights, residualWeights), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; auto seasonality = [](core_t::TTime time) { return 2.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(core::constants::DAY)); }; test::CRandomNumbers rng; core_t::TTime bucketLength{1800}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; CDebugGenerator debug("piecewise_constant.py"); // Add some data to the model. core_t::TTime time{0}; TDoubleVec samples; double level{20.0}; for (auto dl : {0.0, 15.0}) { level += dl; rng.generateNormalSamples(level, 0.5, 300, samples); for (auto sample : samples) { sample += seasonality(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } } // Arrange for a change which will just be detected before injecting an anomaly. level += 10.0; rng.generateNormalSamples(level, 0.5, 80, samples); for (auto sample : samples) { sample += seasonality(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } // Anomaly. for (std::size_t i = 0; i < 3; ++i) { updateModel(time, 0.0, model); debug.addValueAndPrediction(time, 0.0, model); time += bucketLength; } rng.generateNormalSamples(level, 0.5, 80, samples); for (auto sample : samples) { sample += seasonality(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; TDouble2Vec1Vec error{{sample}}; model.detrend({{time}}, 0.0, error); BOOST_TEST_REQUIRE(error[0][0] < 1.5); } } BOOST_AUTO_TEST_CASE(testLinearScaling) { // We test that the predictions are good and the bounds do not // blow up after we: // 1) linearly scale down a periodic pattern, // 2) linearly scale up the same periodic pattern. TDouble2VecWeightsAryVec trendWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; TDouble2VecWeightsAryVec residualWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::time_series::CUnivariateTimeSeriesModel& model) { model.countWeights(time, {value}, 1.0, 1.0, 0.0, 1.0, trendWeights[0], residualWeights[0]); model.addSamples(addSampleParams(1.0, trendWeights, residualWeights), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; test::CRandomNumbers rng; double noiseVariance{3.0}; core_t::TTime bucketLength{600}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; CDebugGenerator debug; core_t::TTime time{0}; TDoubleVec samples; rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } LOG_DEBUG(<< "scale by 0.3"); rng.generateNormalSamples(0.0, noiseVariance, 300, samples); for (auto sample : samples) { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); for (auto sample : samples) { sample = 0.3 * (12.0 + 10.0 * smoothDaily(time) + sample); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); BOOST_TEST_REQUIRE(std::fabs(sample - x[1][0]) < 1.6 * std::sqrt(noiseVariance)); BOOST_TEST_REQUIRE(std::fabs(x[2][0] - x[0][0]) < 3.5 * std::sqrt(noiseVariance)); time += bucketLength; } LOG_DEBUG(<< "scale by 6.6"); rng.generateNormalSamples(0.0, noiseVariance, 300, samples); for (auto sample : samples) { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 400, samples); for (auto sample : samples) { sample = 2.0 * (12.0 + 10.0 * smoothDaily(time)) + sample; updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); BOOST_TEST_REQUIRE(std::fabs(sample - x[1][0]) < 3.6 * std::sqrt(noiseVariance)); BOOST_TEST_REQUIRE(std::fabs(x[2][0] - x[0][0]) < 4.5 * std::sqrt(noiseVariance)); time += bucketLength; } } BOOST_AUTO_TEST_CASE(testDaylightSaving) { // Test we detect daylight saving time shifts. TDouble2VecWeightsAryVec trendWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; TDouble2VecWeightsAryVec residualWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::time_series::CUnivariateTimeSeriesModel& model) { model.countWeights(time, {value}, 1.0, 1.0, 0.0, 1.0, trendWeights[0], residualWeights[0]); model.addSamples(addSampleParams(1.0, trendWeights, residualWeights), {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; test::CRandomNumbers rng; core_t::TTime hour{core::constants::HOUR}; double noiseVariance{0.36}; core_t::TTime bucketLength{600}; maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 0, trend, univariateNormal(DECAY_RATE / 3.0), &controllers}; CDebugGenerator debug; core_t::TTime time{0}; TDoubleVec samples; rng.generateNormalSamples(0.0, noiseVariance, 2000, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } LOG_DEBUG(<< "shift by 3600s"); rng.generateNormalSamples(0.0, noiseVariance, 300, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time + hour); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 1500, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time + hour); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); BOOST_REQUIRE_EQUAL(hour, model.trendModel().timeShift()); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); BOOST_TEST_REQUIRE(std::fabs(sample - x[1][0]) < 4.2 * std::sqrt(noiseVariance)); BOOST_TEST_REQUIRE(std::fabs(x[2][0] - x[0][0]) < 3.6 * std::sqrt(noiseVariance)); time += bucketLength; } LOG_DEBUG(<< "shift by -3600s"); rng.generateNormalSamples(0.0, noiseVariance, 300, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); time += bucketLength; } rng.generateNormalSamples(0.0, noiseVariance, 400, samples); for (auto sample : samples) { sample += 12.0 + 10.0 * smoothDaily(time); updateModel(time, sample, model); debug.addValueAndPrediction(time, sample, model); BOOST_REQUIRE_EQUAL(core_t::TTime(0), model.trendModel().timeShift()); auto x = model.confidenceInterval( time, 90.0, maths_t::CUnitWeights::unit<TDouble2Vec>(1)); BOOST_TEST_REQUIRE(std::fabs(sample - x[1][0]) < 3.5 * std::sqrt(noiseVariance)); BOOST_TEST_REQUIRE(std::fabs(x[2][0] - x[0][0]) < 3.8 * std::sqrt(noiseVariance)); time += bucketLength; } } BOOST_AUTO_TEST_CASE(testNonNegative) { // Test after a non-negative time series drops to zero we always predict // high probability for zero values using the one-sided above calculation. TDouble2VecWeightsAryVec trendWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; TDouble2VecWeightsAryVec residualWeights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; auto updateModel = [&](core_t::TTime time, double value, maths::time_series::CUnivariateTimeSeriesModel& model) { model.countWeights(time, {value}, 1.0, 1.0, 0.0, 1.0, trendWeights[0], residualWeights[0]); auto params = addSampleParams(1.0, trendWeights, residualWeights); params.isNonNegative(true); model.addSamples(params, {core::make_triple(time, TDouble2Vec{value}, TAG)}); }; test::CRandomNumbers rng; core_t::TTime day{core::constants::DAY}; core_t::TTime week{core::constants::WEEK}; auto trend = [&](core_t::TTime time) { return (time > 10 * day ? -2.0 : std::max(15.0 - 0.5 * static_cast<double>(time) / static_cast<double>(day), 1.0)) + std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(day)); }; core_t::TTime bucketLength{600}; maths::time_series::CTimeSeriesDecomposition trendModel{24.0 * DECAY_RATE, bucketLength}; auto controllers = decayRateControllers(1); maths::time_series::CUnivariateTimeSeriesModel timeSeriesModel{ modelParams(bucketLength), 0, trendModel, univariateNormal(DECAY_RATE / 3.0), &controllers}; CDebugGenerator debug; core_t::TTime time{0}; TDoubleVec noise; rng.generateNormalSamples(0.0, 0.1, 3 * week / bucketLength, noise); TDouble2VecWeightsAry probabilityCalculationWeights{ maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; double pMinAfterDrop{1.0}; for (auto sample : noise) { sample = std::max(trend(time) + sample, 0.0); updateModel(time, sample, timeSeriesModel); if (time > 10 * day) { maths::common::CModelProbabilityParams params; params.addCalculation(maths_t::E_OneSidedAbove) .seasonalConfidenceInterval(50.0) .addWeights(probabilityCalculationWeights); maths::common::SModelProbabilityResult result; timeSeriesModel.probability(params, {{time}}, {{sample}}, result); pMinAfterDrop = std::min(pMinAfterDrop, result.s_Probability); } debug.addValueAndPrediction(time, sample, timeSeriesModel); time += bucketLength; } LOG_DEBUG(<< "pMinAfterDrop = " << pMinAfterDrop); BOOST_TEST_REQUIRE(pMinAfterDrop > 0.3); } BOOST_AUTO_TEST_CASE(testSkipAnomalyModelUpdate) { // We test that when parameters dictate to skip updating the anomaly model // probabilities become smaller despite seeing many anomalies. test::CRandomNumbers rng; std::size_t length = 2000; core_t::TTime bucketLength{600}; LOG_DEBUG(<< "Univariate"); { TDoubleVec samples; rng.generateNormalSamples(10.0, 2.0, length, samples); maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::time_series::CUnivariateTimeSeriesModel model{ modelParams(bucketLength), 1, trend, univariateNormal()}; TDoubleVec probabilities; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(1)}; core_t::TTime time{0}; for (std::size_t bucket = 0; bucket < samples.size(); ++bucket) { auto sample = samples[bucket]; auto currentComputeProbabilityParams = computeProbabilityParams(weights[0]); maths::common::SModelProbabilityResult result; // We create anomalies in 10 consecutive buckets if (bucket >= 1700 && bucket < 1710) { sample = 100.0; currentComputeProbabilityParams.initialCountWeight(0.0); model.probability(currentComputeProbabilityParams, {{time}}, {{sample}}, result); probabilities.push_back(result.s_Probability); } else { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec{sample}, TAG)}); model.probability(currentComputeProbabilityParams, {{time}}, {{sample}}, result); } time += bucketLength; } LOG_DEBUG(<< "probabilities = " << probabilities); // Assert probs are decreasing BOOST_TEST_REQUIRE(probabilities[0] < 0.00001); BOOST_TEST_REQUIRE(std::is_sorted(probabilities.rbegin(), probabilities.rend())); } LOG_DEBUG(<< "Multivariate"); { TDoubleVecVec samples; rng.generateMultivariateNormalSamples( {10.0, 10.0, 10.0}, {{4.0, 0.9, 0.5}, {0.9, 2.6, 0.1}, {0.5, 0.1, 3.0}}, length, samples); maths::time_series::CTimeSeriesDecomposition trend{24.0 * DECAY_RATE, bucketLength}; maths::common::CMultivariateNormalConjugate<3> prior{multivariateNormal()}; maths::time_series::CMultivariateTimeSeriesModel model{ modelParams(bucketLength), trend, prior}; TDoubleVec probabilities; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit<TDouble2Vec>(3)}; core_t::TTime time{0}; for (std::size_t bucket = 0; bucket < samples.size(); ++bucket) { auto& sample = samples[bucket]; auto currentComputeProbabilityParams = computeProbabilityParams(weights[0]); maths::common::SModelProbabilityResult result; if (bucket >= 1700 && bucket < 1710) { for (auto& coordinate : sample) { coordinate += 100.0; } currentComputeProbabilityParams.initialCountWeight(0.0); model.probability(currentComputeProbabilityParams, {{time}}, {(sample)}, result); probabilities.push_back(result.s_Probability); } else { model.addSamples(addSampleParams(weights), {core::make_triple(time, TDouble2Vec(sample), TAG)}); model.probability(currentComputeProbabilityParams, {{time}}, {(sample)}, result); } time += bucketLength; } LOG_DEBUG(<< "probabilities = " << probabilities); // Assert probs are decreasing BOOST_TEST_REQUIRE(probabilities[0] < 0.00001); BOOST_TEST_REQUIRE(std::is_sorted(probabilities.rbegin(), probabilities.rend())); } } BOOST_AUTO_TEST_SUITE_END()

lib/maths/time_series/unittest/CTimeSeriesModelTest.cc (2,153 lines of code) (raw):