lib/maths/time_series/unittest/CTimeSeriesDecompositionTest.cc (2,062 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <core/CJsonStatePersistInserter.h> #include <core/CJsonStateRestoreTraverser.h> #include <core/CLogger.h> #include <core/CMemoryCircuitBreaker.h> #include <core/CTimezone.h> #include <core/Constants.h> #include <maths/common/CBasicStatistics.h> #include <maths/common/CIntegerTools.h> #include <maths/common/CLinearAlgebraFwd.h> #include <maths/common/CMathsFuncs.h> #include <maths/common/CNormalMeanPrecConjugate.h> #include <maths/common/CRestoreParams.h> #include <maths/common/MathsTypes.h> #include <maths/time_series/CDecayRateController.h> #include <maths/time_series/CTimeSeriesDecomposition.h> #include <maths/time_series/CTimeSeriesDecompositionDetail.h> #include <maths/time_series/CTimeSeriesTestForSeasonality.h> #include <test/BoostTestCloseAbsolute.h> #include <test/CRandomNumbers.h> #include <test/CTimeSeriesTestData.h> #include "TestUtils.h" #include <boost/math/constants/constants.hpp> #include <boost/test/unit_test.hpp> #include <fstream> #include <utility> #include <vector> BOOST_AUTO_TEST_SUITE(CTimeSeriesDecompositionTest) using namespace ml; namespace { using TDoubleDoublePr = std::pair<double, double>; using TDoubleVec = std::vector<double>; using TDoubleVecVec = std::vector<TDoubleVec>; using TDouble1Vec = core::CSmallVector<double, 1>; using TTimeVec = std::vector<core_t::TTime>; using TTimeDoublePr = std::pair<core_t::TTime, double>; using TTimeDoublePrVec = std::vector<TTimeDoublePr>; using TSeasonalComponentVec = maths_t::TSeasonalComponentVec; using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator; using TMeanAccumulatorVec = std::vector<TMeanAccumulator>; using TFloatMeanAccumulatorVec = std::vector<maths::common::CBasicStatistics::SSampleMean<maths::common::CFloatStorage>::TAccumulator>; class CDebugGenerator { public: static const bool ENABLED{false}; public: explicit CDebugGenerator(std::string file = "results.py") : m_File{std::move(file)} {} ~CDebugGenerator() { if (ENABLED) { std::ofstream file_; file_.open(m_File); auto file = (file_ << core::CScopePrintContainers{}); file << "import matplotlib.pyplot as plt;\n"; file << "import numpy as np;\n"; file << "t = " << m_ValueTimes << ";\n"; file << "f = " << m_Values << ";\n"; file << "te = " << m_PredictionTimes << ";\n"; file << "fe = " << m_Predictions << ";\n"; file << "r = " << m_Errors << ";\n"; file << "plt.figure(1);\n"; file << "plt.clf();\n"; file << "plt.plot(t, f);\n"; file << "plt.plot(te, fe, 'r');\n"; file << "plt.xlim(t[0], t[-1]);\n"; file << "plt.ylim(min(np.min(f),np.min(fe)), max(np.max(f),np.max(fe)));\n"; file << "plt.show();\n"; file << "plt.figure(2);\n"; file << "plt.clf();\n"; file << "plt.plot(te, r, 'k');\n"; file << "plt.xlim(t[0], t[-1]);\n"; file << "plt.ylim(np.min(r), np.max(r));\n"; file << "plt.show();\n"; } } void addValue(core_t::TTime time, double value) { if (ENABLED) { m_ValueTimes.push_back(time); m_Values.push_back(value); } } void addPrediction(core_t::TTime time, double prediction, double error) { if (ENABLED) { m_PredictionTimes.push_back(time); m_Predictions.push_back(prediction); m_Errors.push_back(error); } } private: std::string m_File; TTimeVec m_ValueTimes; TDoubleVec m_Values; TTimeVec m_PredictionTimes; TDoubleVec m_Predictions; TDoubleVec m_Errors; }; const core_t::TTime ONE_MIN{60}; const core_t::TTime FIVE_MINS{300}; const core_t::TTime TEN_MINS{600}; const core_t::TTime HALF_HOUR{core::constants::HOUR / 2}; const core_t::TTime HOUR{core::constants::HOUR}; const core_t::TTime DAY{core::constants::DAY}; const core_t::TTime WEEK{core::constants::WEEK}; const core_t::TTime YEAR{core::constants::YEAR}; } class CNanInjector { public: // insert a NaN into a seasonal component bucket void injectNan(maths::time_series::CTimeSeriesDecomposition& decomposition, size_t bucketIndex) { firstRegressionStatistic(seasonalComponent(decomposition), bucketIndex) = std::numeric_limits<double>::quiet_NaN(); } private: // helper methods to get access to the state of a seasonal component // return the regression statistics from the provided seasonal component static maths::common::CFloatStorage& firstRegressionStatistic(maths::time_series::CSeasonalComponent& component, size_t bucketIndex) { return maths::common::CBasicStatistics::moment<0>( component.m_Bucketing.m_Buckets[bucketIndex].s_Regression.m_S)(0); } // return the first seasonal component from the provided decomposition static maths::time_series::CSeasonalComponent& seasonalComponent(maths::time_series::CTimeSeriesDecomposition& decomposition) { return decomposition.m_Components.m_Seasonal->m_Components[0]; } }; class CTestFixture { public: CTestFixture() { core::CTimezone::instance().setTimezone("GMT"); } ~CTestFixture() { core::CTimezone::instance().setTimezone(""); } }; class CConfigurableMemoryCircuitBreaker : public core::CMemoryCircuitBreaker { public: //! Constructor explicit CConfigurableMemoryCircuitBreaker(bool allowAllocations) : m_AllowAllocations(allowAllocations) {} //! In hard_limit mode we don't allow any new allocations. bool areAllocationsAllowed() const override { return m_AllowAllocations; } //! Set hard_limit mode. void areAllocationsAllowed(bool allowAllocations) { m_AllowAllocations = allowAllocations; } private: bool m_AllowAllocations; }; class CComponentsTest : public CTestFixture { public: using TComponents = ml::maths::time_series::CTimeSeriesDecompositionDetail::CComponents; using TSeasonal = TComponents::CSeasonal; using TFloatMeanAccumulatorVec = ml::maths::time_series::CTimeSeriesDecompositionTypes::TFloatMeanAccumulatorVec; using TSeasonalDecomposition = ml::maths::time_series::CSeasonalDecomposition; using TSeasonalComponent = ml::maths::time_series::CSeasonalDecomposition::TSeasonalComponent; public: static void testAddSeasonalComponents() { // Test that in the hard_limit state we still can add new seasonal components if // at the same time we remove old seasonal components of the same total size or larger. // Initialise CTimeSeriesDecompositionDetails::CComponents double decayRate{0.01}; core_t::TTime bucketLength{HALF_HOUR}; std::size_t seasonalComponentSize{4}; TComponents components{decayRate, bucketLength, seasonalComponentSize}; components.m_Seasonal = std::make_unique<TSeasonal>(); TComponents::CScopeAttachComponentChangeCallback attach{ components, [](TFloatMeanAccumulatorVec) {}, [](const std::string&) {}}; { // initialise CSeasonalDecomposition maths::time_series::CSeasonalDecomposition seasonalDecompositionComponents; core_t::TTime startTime = 0; maths::time_series::CNewTrendSummary::TFloatMeanAccumulatorVec initialValues; maths::time_series::CNewTrendSummary trendComponent{ startTime, bucketLength, initialValues}; // No component to remove so far maths::time_series::CSeasonalDecomposition::TBoolVec componentsToRemove{}; seasonalDecompositionComponents.add(trendComponent); seasonalDecompositionComponents.add(componentsToRemove); // Create the first seasonal component and add it to the decomposition TSeasonalDecomposition::TSeasonalComponent firstSeasonalComponent; TSeasonalDecomposition::TPeriodDescriptor periodDescriptor{ TSeasonalDecomposition::TPeriodDescriptor::E_Day}; TSeasonalDecomposition::TOptionalTime startOfWeekTime; TSeasonalDecomposition::TFloatMeanAccumulatorVec seasonalValues; seasonalDecompositionComponents.add( "Test component 1", firstSeasonalComponent, 10.0, periodDescriptor, 10.0, 10.0, 10.0, startOfWeekTime, seasonalValues); CConfigurableMemoryCircuitBreaker allocator{true}; // add seasonal components components.addSeasonalComponents(seasonalDecompositionComponents, allocator); BOOST_REQUIRE_EQUAL(1, components.seasonal().size()); LOG_DEBUG(<< "First add seasonal components finished"); } { // initialise CSeasonalDecomposition maths::time_series::CSeasonalDecomposition seasonalDecompositionComponents; core_t::TTime startTime = 0; maths::time_series::CNewTrendSummary::TFloatMeanAccumulatorVec initialValues; maths::time_series::CNewTrendSummary trendComponent{ startTime, bucketLength, initialValues}; seasonalDecompositionComponents.add(trendComponent); // Mark the first seasonal component for removal maths::time_series::CSeasonalDecomposition::TBoolVec componentsToRemove{true}; seasonalDecompositionComponents.add(componentsToRemove); // Create the second seasonal component and add it to the decomposition TSeasonalDecomposition::TSeasonalComponent secondSeasonalComponent; TSeasonalDecomposition::TPeriodDescriptor periodDescriptor{ TSeasonalDecomposition::TPeriodDescriptor::E_Week}; TSeasonalDecomposition::TOptionalTime startOfWeekTime; TSeasonalDecomposition::TFloatMeanAccumulatorVec seasonalValues; seasonalDecompositionComponents.add("Test component 2", secondSeasonalComponent, 0, periodDescriptor, 0.0, 0.0, 1.0, startOfWeekTime, seasonalValues); CConfigurableMemoryCircuitBreaker allocator{false}; // make sure that when addind the second seasonal component we remove the first one auto oldLastSeasonalComponent = components.seasonal().back().checksum(); components.addSeasonalComponents(seasonalDecompositionComponents, allocator); auto newLastSeasonalComponent = components.seasonal().back().checksum(); BOOST_REQUIRE_EQUAL(1, components.seasonal().size()); BOOST_TEST_REQUIRE(oldLastSeasonalComponent != newLastSeasonalComponent); } } }; BOOST_FIXTURE_TEST_CASE(testSuperpositionOfSines, CTestFixture) { // Test mixture of two sine waves. TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 50 * WEEK + 1; time += HALF_HOUR) { double weekly = 1200.0 + 1000.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(WEEK)); double daily = 5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time); trend.push_back(weekly * daily); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(0.0, 400.0, times.size(), noise); core_t::TTime lastWeek = 0; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; for (std::size_t i = 0; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { auto prediction = decomposition.value(t, 70.0, false); double residual = std::fabs(trend[t / HALF_HOUR] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += std::max(std::max(prediction(0) - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction(1)), 0.0); debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_TRACE(<< "'max residual' / 'max value' = " << maxResidual / maxValue); LOG_TRACE(<< "70% error = " << percentileError / sumValue); if (time >= 2 * WEEK) { BOOST_TEST_REQUIRE(sumResidual < 0.03 * sumValue); BOOST_TEST_REQUIRE(maxResidual < 0.03 * maxValue); BOOST_TEST_REQUIRE(percentileError < 0.01 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; } lastWeek += WEEK; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.011 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.013 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.01 * totalSumValue); } BOOST_FIXTURE_TEST_CASE(testDistortedPeriodicProblemCase, CTestFixture) { // Test accuracy on real data set which caused issues historically. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_TEST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/distorted_periodic.csv", timeseries, startTime, endTime, "^([0-9]+), ([0-9\\.]+)")); BOOST_TEST_REQUIRE(!timeseries.empty()); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastWeek = startTime; const core_t::TTime bucketLength = HOUR; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, bucketLength); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time; double value; std::tie(time, value) = timeseries[i]; decomposition.addPoint(time, value); debug.addValue(time, value); if (time >= lastWeek + WEEK || i == timeseries.size() - 1) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK && static_cast<std::size_t>(t / HOUR) < timeseries.size(); t += HOUR) { double actual = timeseries[t / HOUR].second; auto prediction = decomposition.value(t, 70.0, false); double residual = std::fabs(actual - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(actual); maxValue = std::max(maxValue, std::fabs(actual)); percentileError += std::max( std::max(prediction(0) - actual, actual - prediction(1)), 0.0); debug.addPrediction(t, prediction.mean(), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_DEBUG(<< "'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= 2 * WEEK) { BOOST_TEST_REQUIRE(sumResidual < 0.27 * sumValue); BOOST_TEST_REQUIRE(maxResidual < 0.54 * maxValue); BOOST_TEST_REQUIRE(percentileError < 0.18 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; } lastWeek += WEEK; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.17 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.23 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.10 * totalSumValue); } BOOST_FIXTURE_TEST_CASE(testMinimizeLongComponents, CTestFixture) { // Test we make longer components as smooth as possible. double weights[]{1.0, 0.1, 1.0, 1.0, 0.1, 1.0, 1.0}; TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 50 * WEEK; time += HALF_HOUR) { double weight = weights[(time / DAY) % 7]; double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time); trend.push_back(weight * daily); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(0.0, 16.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; double meanSlope = 0.0; double refinements = 0.0; core_t::TTime lastWeek = 0; for (std::size_t i = 0; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { auto prediction = decomposition.value(t, 70.0, false); double residual = std::fabs(trend[t / HALF_HOUR] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += std::max(std::max(prediction(0) - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction(1)), 0.0); debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_TRACE(<< "'max residual' / 'max value' = " << maxResidual / maxValue); LOG_TRACE(<< "70% error = " << percentileError / sumValue); if (time >= 2 * WEEK) { BOOST_TEST_REQUIRE(sumResidual < 0.12 * sumValue); BOOST_TEST_REQUIRE(maxResidual < 0.27 * maxValue); BOOST_TEST_REQUIRE(percentileError < 0.04 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; for (const auto& component : decomposition.seasonalComponents()) { if (component.initialized() && component.time().period() == WEEK) { double slope = component.valueSpline().absSlope(); meanSlope += slope; LOG_TRACE(<< "weekly |slope| = " << slope); BOOST_TEST_REQUIRE(slope < 0.004); refinements += 1.0; } } } lastWeek += WEEK; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.04 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.11 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.01 * totalSumValue); meanSlope /= refinements; LOG_DEBUG(<< "mean weekly |slope| = " << meanSlope); BOOST_TEST_REQUIRE(meanSlope < 0.002); } BOOST_FIXTURE_TEST_CASE(testWeekend, CTestFixture) { // Test weekday/weekend modulation of daily seasonality. double weights[]{0.1, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0}; for (auto offset : {0 * DAY, 5 * DAY}) { LOG_DEBUG(<< "offset = " << offset); TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 100 * WEEK; time += HALF_HOUR) { double weight = weights[(time / DAY) % 7]; double daily = 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time + offset); trend.push_back(weight * daily); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(0.0, 20.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; core_t::TTime lastWeek = offset; for (std::size_t i = 0; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { auto prediction = decomposition.value(t, 70.0, false); double actual = trend[(t - offset) / HALF_HOUR]; double residual = std::fabs(actual - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(actual); maxValue = std::max(maxValue, std::fabs(actual)); percentileError += std::max( std::max(prediction(0) - actual, actual - prediction(1)), 0.0); debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_TRACE(<< "'max residual' / 'max value' = " << maxResidual / maxValue); LOG_TRACE(<< "70% error = " << percentileError / sumValue); if (time >= 3 * WEEK) { BOOST_TEST_REQUIRE(sumResidual < 0.07 * sumValue); BOOST_TEST_REQUIRE(maxResidual < 0.17 * maxValue); BOOST_TEST_REQUIRE(percentileError < 0.03 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; } lastWeek += WEEK; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.021 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.033 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.01 * totalSumValue); } } BOOST_FIXTURE_TEST_CASE(testNanHandling, CTestFixture) { // Test flushing data which contains NaNs. TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { double daily = 120.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time); trend.push_back(daily); } TDoubleVec noise; test::CRandomNumbers rng; rng.generateNormalSamples(0.0, 16.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); int componentsModifiedBefore{0}; // Run through half of the periodic data. std::size_t i = 0; for (; i < times.size() / 2; ++i) { decomposition.addPoint(times[i], trend[i] + noise[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&componentsModifiedBefore](TFloatMeanAccumulatorVec) { ++componentsModifiedBefore; }); } BOOST_REQUIRE_EQUAL(2, componentsModifiedBefore); // Inject a NaN into one of the seasonal components. CNanInjector nanInjector; nanInjector.injectNan(decomposition, 0); int componentsModifiedAfter{0}; // Run through the 2nd half of the periodic data set. for (++i; i < times.size(); ++i) { core_t::TTime time{times[i]}; decomposition.addPoint(time, trend[i] + noise[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&componentsModifiedAfter](TFloatMeanAccumulatorVec) { ++componentsModifiedAfter; }); auto value = decomposition.value(time, 0.0, false); BOOST_TEST_REQUIRE(maths::common::CMathsFuncs::isFinite(value(0))); BOOST_TEST_REQUIRE(maths::common::CMathsFuncs::isFinite(value(1))); } // The call to 'addPoint' that results in the removal of the component // containing the NaN value also triggers an immediate re-detection of // a daily seasonal component. Hence we only expect it to report that the // components have been modified just the once even though two modification // event have occurred. BOOST_REQUIRE_EQUAL(2, componentsModifiedAfter); // Check that only the daily component has been initialized. const TSeasonalComponentVec& components = decomposition.seasonalComponents(); BOOST_REQUIRE_EQUAL(1, components.size()); BOOST_REQUIRE_EQUAL(DAY, components[0].time().period()); BOOST_TEST_REQUIRE(components[0].initialized()); } BOOST_FIXTURE_TEST_CASE(testSinglePeriodicity, CTestFixture) { // Test modelling of a single seasonl component. TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { double daily = 100.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time); trend.push_back(daily); } const double noiseMean = 20.0; const double noiseVariance = 16.0; test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(noiseMean, noiseVariance, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; core_t::TTime lastWeek = 0; for (std::size_t i = 0; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HALF_HOUR) { auto prediction = decomposition.value(t, 70.0, false); double residual = std::fabs(trend[t / HALF_HOUR] + noiseMean - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HALF_HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HALF_HOUR])); percentileError += std::max( std::max(prediction(0) - (trend[t / HALF_HOUR] + noiseMean), (trend[t / HALF_HOUR] + noiseMean) - prediction(1)), 0.0); debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_TRACE(<< "'max residual' / 'max value' = " << maxResidual / maxValue); LOG_TRACE(<< "70% error = " << percentileError / sumValue); if (time >= 1 * WEEK) { BOOST_TEST_REQUIRE(sumResidual < 0.025 * sumValue); BOOST_TEST_REQUIRE(maxResidual < 0.045 * maxValue); BOOST_TEST_REQUIRE(percentileError < 0.02 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; // Check that only the daily component has been initialized. const TSeasonalComponentVec& components = decomposition.seasonalComponents(); BOOST_REQUIRE_EQUAL(1, components.size()); BOOST_REQUIRE_EQUAL(DAY, components[0].time().period()); BOOST_TEST_REQUIRE(components[0].initialized()); } lastWeek += WEEK; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.014 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.021 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.01 * totalSumValue); // Check that only the daily component has been initialized. const TSeasonalComponentVec& components = decomposition.seasonalComponents(); BOOST_REQUIRE_EQUAL(1, components.size()); BOOST_REQUIRE_EQUAL(DAY, components[0].time().period()); BOOST_TEST_REQUIRE(components[0].initialized()); } BOOST_FIXTURE_TEST_CASE(testSeasonalOnset, CTestFixture) { // Test a signal which only becomes seasonal after some time. const double daily[]{0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 40.0, 40.0, 40.0, 30.0, 30.0, 35.0, 35.0, 40.0, 50.0, 60.0, 80.0, 80.0, 10.0, 5.0, 0.0, 0.0, 0.0}; const double weekly[]{0.1, 0.1, 1.2, 1.0, 1.0, 0.9, 1.5}; TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < 150 * WEEK + 1; time += HOUR) { double value = 0.0; if (time > 10 * WEEK) { value += daily[(time % DAY) / HOUR]; value *= weekly[(time % WEEK) / DAY]; } times.push_back(time); trend.push_back(value); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HOUR); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; core_t::TTime lastWeek = 0; for (std::size_t i = 0; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (core_t::TTime t = lastWeek; t < lastWeek + WEEK; t += HOUR) { auto prediction = decomposition.value(t, 70.0, false); double residual = std::fabs(trend[t / HOUR] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[t / HOUR]); maxValue = std::max(maxValue, std::fabs(trend[t / HOUR])); percentileError += std::max(std::max(prediction(0) - trend[t / HOUR], trend[t / HOUR] - prediction(1)), 0.0); debug.addPrediction(t, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_TRACE(<< "70% error = " << (percentileError == 0.0 ? 0.0 : percentileError / sumValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; const TSeasonalComponentVec& components = decomposition.seasonalComponents(); if (time > 16 * WEEK) { // Check that there are at two least components. BOOST_TEST_REQUIRE(components.size() >= 2); BOOST_TEST_REQUIRE(components[0].initialized()); BOOST_TEST_REQUIRE(components[1].initialized()); } else if (time > 11 * WEEK) { // Check that there is at least one component. BOOST_TEST_REQUIRE(components.size() >= 1); BOOST_TEST_REQUIRE(components[0].initialized()); } else { // Check that there are no components. BOOST_TEST_REQUIRE(components.empty()); } lastWeek += WEEK; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.10 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.11 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.05 * totalSumValue); } BOOST_FIXTURE_TEST_CASE(testVarianceScale, CTestFixture) { // Test that variance scales are correctly computed. test::CRandomNumbers rng; LOG_DEBUG(<< "Variance Spike"); { core_t::TTime time = 0; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); for (std::size_t i = 0; i < 50; ++i) { for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double value = 1.0; double variance = 1.0; if (t >= 3600 && t < 7200) { value = 5.0; variance = 10.0; } TDoubleVec noise; rng.generateNormalSamples(value, variance, 1, noise); decomposition.addPoint(time + t, noise[0]); } time += DAY; } double meanVariance = (1.0 * 23.0 + 10.0 * 1.0) / 24.0; time -= DAY; TMeanAccumulator error; TMeanAccumulator percentileError; TMeanAccumulator meanScale; for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double variance = 1.0; if (t >= 3600 && t < 7200) { variance = 10.0; } double expectedScale = variance / meanVariance; auto interval = decomposition.varianceScaleWeight(time + t, meanVariance, 70.0); LOG_TRACE(<< "time = " << t << ", expectedScale = " << expectedScale << ", scale = " << interval); double scale = interval.mean(); error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); percentileError.add(std::max( std::max(interval(0) - expectedScale, expectedScale - interval(1)), 0.0)); } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(error)); LOG_DEBUG(<< "mean 70% error = " << maths::common::CBasicStatistics::mean(percentileError)); LOG_DEBUG(<< "mean scale = " << maths::common::CBasicStatistics::mean(meanScale)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(error) < 0.4); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(percentileError) < 0.1); BOOST_REQUIRE_CLOSE_ABSOLUTE( 1.0, maths::common::CBasicStatistics::mean(meanScale), 0.02); } LOG_DEBUG(<< "Smoothly Varying Variance"); { core_t::TTime time = 0; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); for (std::size_t i = 0; i < 50; ++i) { for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double value = 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(t) / static_cast<double>(DAY)); double variance = 1.0; if (t >= 3600 && t < 7200) { variance = 10.0; } TDoubleVec noise; rng.generateNormalSamples(0.0, variance, 1, noise); decomposition.addPoint(time + t, value + noise[0]); } time += DAY; } double meanVariance = (1.0 * 23.0 + 10.0 * 1.0) / 24.0; time -= DAY; TMeanAccumulator error; TMeanAccumulator percentileError; TMeanAccumulator meanScale; for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { double variance = 1.0; if (t >= 3600 && t < 7200) { variance = 10.0; } double expectedScale = variance / meanVariance; auto interval = decomposition.varianceScaleWeight(time + t, meanVariance, 70.0); LOG_TRACE(<< "time = " << t << ", expectedScale = " << expectedScale << ", scale = " << interval); double scale = interval.mean(); error.add(std::fabs(scale - expectedScale)); meanScale.add(scale); percentileError.add(std::max( std::max(interval(0) - expectedScale, expectedScale - interval(1)), 0.0)); } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(error)); LOG_DEBUG(<< "mean 70% error = " << maths::common::CBasicStatistics::mean(percentileError)); LOG_DEBUG(<< "mean scale = " << maths::common::CBasicStatistics::mean(meanScale)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(error) < 0.3); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(percentileError) < 0.15); BOOST_REQUIRE_CLOSE_ABSOLUTE( 1.0, maths::common::CBasicStatistics::mean(meanScale), 0.02); } LOG_DEBUG(<< "Long Term Trend"); { const core_t::TTime length = 120 * DAY; TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); double x = static_cast<double>(time); trend.push_back(150.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast<double>(240 * DAY) / (1.0 - x / static_cast<double>(2 * length))) + 10.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast<double>(DAY))); } TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.048, HALF_HOUR); for (std::size_t i = 0; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); } TMeanAccumulator meanScale; double meanVariance = decomposition.meanVariance(); for (core_t::TTime t = 0; t < DAY; t += TEN_MINS) { auto interval = decomposition.varianceScaleWeight(times.back() + t, meanVariance, 70.0); LOG_TRACE(<< "time = " << t << ", scale = " << interval); meanScale.add(interval.mean()); } LOG_DEBUG(<< "mean scale = " << maths::common::CBasicStatistics::mean(meanScale)); BOOST_REQUIRE_CLOSE_ABSOLUTE( 1.0, maths::common::CBasicStatistics::mean(meanScale), 0.06); } } BOOST_FIXTURE_TEST_CASE(testSpikeyDataProblemCase, CTestFixture) { // Test accuracy on real data set which caused issues historically. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_TEST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/spikey_data.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); BOOST_TEST_REQUIRE(!timeseries.empty()); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); maths::common::CNormalMeanPrecConjugate model = maths::common::CNormalMeanPrecConjugate::nonInformativePrior( maths_t::E_ContinuousData, 0.01); CDebugGenerator debug; core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; if (time > lastWeek + WEEK) { LOG_TRACE(<< "Processing week"); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (std::size_t j = 0; j < lastWeekTimeseries.size(); ++j) { auto prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0, false); double residual = std::fabs(lastWeekTimeseries[j].second - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); percentileError += std::max(std::max(prediction(0) - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_TRACE(<< "70% error = " << percentileError / sumValue); if (time >= startTime + WEEK) { totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; } lastWeekTimeseries.clear(); lastWeek += WEEK; } if (time > lastWeek) { lastWeekTimeseries.push_back(timeseries[i]); } decomposition.addPoint( time, value, core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&model](TFloatMeanAccumulatorVec residuals) { model.setToNonInformative(0.0, 0.01); for (const auto& residual : residuals) { if (maths::common::CBasicStatistics::count(residual) > 0.0) { model.addSamples({maths::common::CBasicStatistics::mean(residual)}, maths_t::CUnitWeights::SINGLE_UNIT); } } }); model.addSamples({decomposition.detrend(time, value, 70.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT); debug.addValue(time, value); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.17 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.25 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.12 * totalSumValue); double pMinScaled = 1.0; double pMinUnscaled = 1.0; for (std::size_t i = 0; timeseries[i].first < startTime + DAY; ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; double variance = model.marginalLikelihoodVariance(); double lb; double ub; maths_t::ETail tail; model.probabilityOfLessLikelySamples( maths_t::E_TwoSided, {decomposition.detrend(time, value, 0.0, false)}, {maths_t::seasonalVarianceScaleWeight(std::max( decomposition.varianceScaleWeight(time, variance, 70.0)(1), 0.25))}, lb, ub, tail); double pScaled = (lb + ub) / 2.0; pMinScaled = std::min(pMinScaled, pScaled); model.probabilityOfLessLikelySamples( maths_t::E_TwoSided, {decomposition.detrend(time, value, 0.0, false)}, maths_t::CUnitWeights::SINGLE_UNIT, lb, ub, tail); double pUnscaled = (lb + ub) / 2.0; pMinUnscaled = std::min(pMinUnscaled, pUnscaled); } LOG_DEBUG(<< "pMinScaled = " << pMinScaled); LOG_DEBUG(<< "pMinUnscaled = " << pMinUnscaled); BOOST_TEST_REQUIRE(pMinScaled > 1e3 * pMinUnscaled); } BOOST_FIXTURE_TEST_CASE(testVeryLargeValuesProblemCase, CTestFixture) { // Test accuracy on real data set which caused issues historically. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_TEST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/diurnal.csv", timeseries, startTime, endTime, "^([0-9]+),([0-9\\.]+)")); BOOST_TEST_REQUIRE(!timeseries.empty()); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); CDebugGenerator debug; core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; if (time > lastWeek + WEEK) { LOG_DEBUG(<< "Processing week at " << time); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (std::size_t j = 0; j < lastWeekTimeseries.size(); ++j) { auto prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0, false); double residual = std::fabs(lastWeekTimeseries[j].second - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); percentileError += std::max(std::max(prediction(0) - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_DEBUG(<< "'max residual' / 'max value' = " << maxResidual / maxValue); LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= startTime + 2 * WEEK) { totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; } lastWeekTimeseries.clear(); lastWeek += WEEK; } if (time > lastWeek) { lastWeekTimeseries.push_back(timeseries[i]); } decomposition.addPoint(time, value); debug.addValue(time, value); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.34 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.74 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.24 * totalSumValue); TMeanAccumulator scale; double variance = decomposition.meanVariance(); core_t::TTime time = maths::common::CIntegerTools::floor(endTime, DAY); for (core_t::TTime t = time; t < time + WEEK; t += TEN_MINS) { scale.add(decomposition.varianceScaleWeight(t, variance, 70.0).mean()); } LOG_DEBUG(<< "scale = " << maths::common::CBasicStatistics::mean(scale)); BOOST_REQUIRE_CLOSE_ABSOLUTE(1.0, maths::common::CBasicStatistics::mean(scale), 0.1); } BOOST_FIXTURE_TEST_CASE(testMixedSmoothAndSpikeyDataProblemCase, CTestFixture) { // Test accuracy on real data set which caused issues historically. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_TEST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/thirty_minute_samples.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_ISO8601_REGEX, test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); BOOST_TEST_REQUIRE(!timeseries.empty()); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; double totalPercentileError = 0.0; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug; core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time = timeseries[i].first; double value = timeseries[i].second; if (time > lastWeek + WEEK) { LOG_DEBUG(<< "Processing week at " << time); double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; for (std::size_t j = 0; j < lastWeekTimeseries.size(); ++j) { auto prediction = decomposition.value(lastWeekTimeseries[j].first, 70.0, false); double residual = std::fabs(lastWeekTimeseries[j].second - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(lastWeekTimeseries[j].second); maxValue = std::max(maxValue, std::fabs(lastWeekTimeseries[j].second)); percentileError += std::max(std::max(prediction(0) - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction(1)), 0.0); debug.addPrediction(lastWeekTimeseries[j].first, prediction.mean(), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_DEBUG(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= startTime + 2 * WEEK) { totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; totalPercentileError += percentileError; } lastWeekTimeseries.clear(); lastWeek += WEEK; } if (time > lastWeek) { lastWeekTimeseries.push_back(timeseries[i]); } decomposition.addPoint(time, value); debug.addValue(time, value); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); BOOST_TEST_REQUIRE(totalSumResidual < 0.20 * totalSumValue); BOOST_TEST_REQUIRE(totalMaxResidual < 0.47 * totalMaxValue); BOOST_TEST_REQUIRE(totalPercentileError < 0.09 * totalSumValue); } BOOST_FIXTURE_TEST_CASE(testDiurnalPeriodicityWithMissingValues, CTestFixture) { // Test the accuracy of the modeling when there are periodically missing // values. test::CRandomNumbers rng; LOG_DEBUG(<< "Daily Periodic"); { maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug("daily.py"); TMeanAccumulator mape; core_t::TTime time = 0; for (std::size_t t = 0; t < 50; ++t) { for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); value += noise[0]; decomposition.addPoint(time, value); debug.addValue(time, value); double prediction = decomposition.value(time, 0.0, false).mean(); if (decomposition.initialized()) { mape.add(std::fabs((value - prediction) / value)); } debug.addPrediction(time, prediction, value - prediction); } time += HALF_HOUR; } } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 0.11); } LOG_DEBUG(<< "Weekly Periodic"); { maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HOUR); CDebugGenerator debug("weekly.py"); TMeanAccumulator mape; core_t::TTime time = 0; for (std::size_t t = 0; t < 10; ++t) { for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); value += noise[0]; decomposition.addPoint(time, value); debug.addValue(time, value); double prediction = decomposition.value(time, 0.0, false).mean(); if (decomposition.initialized()) { mape.add(std::fabs((value - prediction) / value)); } debug.addPrediction(time, prediction, value - prediction); } time += HOUR; } } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 0.12); } } BOOST_FIXTURE_TEST_CASE(testLongTermTrend, CTestFixture) { // Test a simple linear ramp and non-periodic saw tooth series. const core_t::TTime length = 120 * DAY; TTimeVec times; TDoubleVec trend; test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(0.0, 25.0, length / HALF_HOUR, noise); LOG_DEBUG(<< "Linear Ramp"); { for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); trend.push_back(5.0 + static_cast<double>(time) / static_cast<double>(DAY)); } maths::time_series::CTimeSeriesDecomposition decomposition(0.024, HALF_HOUR); CDebugGenerator debug("ramp.py"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; core_t::TTime lastDay = times[0]; for (std::size_t i = 0; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); debug.addValue(times[i], trend[i] + noise[i]); if (times[i] > lastDay + DAY) { LOG_TRACE(<< "Processing day " << times[i] / DAY); if (decomposition.initialized()) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (std::size_t j = i - 48; j < i; ++j) { auto prediction = decomposition.value(times[j], 70.0, false); double residual = std::fabs(trend[j] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); debug.addPrediction(times[j], prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; BOOST_TEST_REQUIRE(sumResidual / sumValue < 0.05); BOOST_TEST_REQUIRE(maxResidual / maxValue < 0.05); } lastDay += DAY; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); BOOST_TEST_REQUIRE(totalSumResidual / totalSumValue < 0.01); BOOST_TEST_REQUIRE(totalMaxResidual / totalMaxValue < 0.01); } LOG_DEBUG(<< "Saw Tooth Not Periodic"); { core_t::TTime drops[]{0, 30 * DAY, 50 * DAY, 60 * DAY, 85 * DAY, 100 * DAY, 115 * DAY, 120 * DAY}; times.clear(); trend.clear(); { std::size_t i = 1; for (core_t::TTime time = 0; time < length; time += HALF_HOUR, (time > drops[i] ? ++i : i)) { times.push_back(time); trend.push_back(25.0 * static_cast<double>(time - drops[i - 1]) / static_cast<double>(drops[i] - drops[i - 1] + 1)); } } maths::time_series::CTimeSeriesDecomposition decomposition(0.024, HALF_HOUR); CDebugGenerator debug("saw_tooth.py"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; core_t::TTime lastDay = times[0]; for (std::size_t i = 0; i < times.size(); ++i) { decomposition.addPoint( times[i], trend[i] + 0.3 * noise[i], core::CMemoryCircuitBreakerStub::instance(), maths_t::countWeight(decomposition.countWeight(times[i]))); debug.addValue(times[i], trend[i] + 0.3 * noise[i]); if (times[i] > lastDay + DAY) { LOG_TRACE(<< "Processing day " << times[i] / DAY); if (decomposition.initialized()) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (std::size_t j = i - 48; j < i; ++j) { auto prediction = decomposition.value(times[j], 70.0, false); double residual = std::fabs(trend[j] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); debug.addPrediction(times[j], prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; } lastDay += DAY; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); BOOST_TEST_REQUIRE(totalSumResidual / totalSumValue < 0.19); BOOST_TEST_REQUIRE(totalMaxResidual / totalMaxValue < 0.20); } } BOOST_FIXTURE_TEST_CASE(testLongTermTrendAndPeriodicity, CTestFixture) { // Test a long term mean reverting component plus daily periodic component. TTimeVec times; TDoubleVec trend; const core_t::TTime length = 120 * DAY; for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { times.push_back(time); double x = static_cast<double>(time); trend.push_back(150.0 + 100.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast<double>(240 * DAY) / (1.0 - x / static_cast<double>(2 * length))) + 10.0 * std::sin(boost::math::double_constants::two_pi * x / static_cast<double>(DAY))); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition(0.072, HALF_HOUR); CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; core_t::TTime lastDay = times[0]; for (std::size_t i = 0; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); debug.addValue(times[i], trend[i] + 0.3 * noise[i]); if (times[i] > lastDay + DAY) { LOG_TRACE(<< "Processing day " << times[i] / DAY); if (decomposition.initialized()) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (std::size_t j = i - 48; j < i; ++j) { auto prediction = decomposition.value(times[j], 70.0, false); double residual = std::fabs(trend[j] - prediction.mean()); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); debug.addPrediction(times[j], prediction.mean(), residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; BOOST_TEST_REQUIRE(sumResidual / sumValue < 0.45); BOOST_TEST_REQUIRE(maxResidual / maxValue < 0.45); } lastDay += DAY; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); BOOST_TEST_REQUIRE(totalSumResidual / totalSumValue < 0.04); BOOST_TEST_REQUIRE(totalMaxResidual / totalMaxValue < 0.04); } BOOST_FIXTURE_TEST_CASE(testNonDiurnal, CTestFixture) { // Test the accuracy of the modeling of some non-daily or weekly // seasonal components. test::CRandomNumbers rng; LOG_DEBUG(<< "Hourly"); for (auto pad : {0 * DAY, 28 * DAY}) { TDoubleVec periodicPattern{10.0, 1.0, 0.5, 0.5, 1.0, 5.0, 2.0, 1.0, 0.5, 0.5, 1.0, 6.0}; TDoubleVec trend(pad / FIVE_MINS, 0.0); TTimeVec times; for (core_t::TTime time = 0; time < pad + 21 * DAY; time += FIVE_MINS) { times.push_back(time); trend.push_back(periodicPattern[(time / FIVE_MINS) % 12]); } TDoubleVec noise; rng.generateNormalSamples(0.0, 1.0, trend.size(), noise); core_t::TTime startTesting{pad + 28 * HOUR}; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); CDebugGenerator debug("hourly." + core::CStringUtils::typeToString(pad) + ".py"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; core_t::TTime lastHour = times[0] + 3 * DAY; for (std::size_t i = 0; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); debug.addValue(times[i], trend[i] + noise[i]); if (times[i] > lastHour + HOUR) { LOG_TRACE(<< "Processing hour " << times[i] / HOUR); if (times[i] > startTesting) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (std::size_t j = i - 12; j < i; ++j) { double prediction = decomposition.value(times[j], 70.0, false).mean(); double residual = std::fabs(trend[j] - prediction); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); debug.addPrediction(times[j], prediction, residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; BOOST_TEST_REQUIRE(sumResidual / sumValue < 0.60); BOOST_TEST_REQUIRE(maxResidual / maxValue < 0.60); } lastHour += HOUR; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); BOOST_TEST_REQUIRE(totalSumResidual / totalSumValue < 0.17); BOOST_TEST_REQUIRE(totalMaxResidual / totalMaxValue < 0.14); } LOG_DEBUG(<< "Two daily"); { const core_t::TTime length = 20 * DAY; TDoubleVec periodicPattern{10.0, 8.0, 5.5, 2.5, 2.0, 5.0, 2.0, 1.0, 1.5, 3.5, 4.0, 7.0}; TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time < length; time += TEN_MINS) { times.push_back(time); trend.push_back(periodicPattern[(time / 4 / HOUR) % 12]); } TDoubleVec noise; rng.generateNormalSamples(0.0, 2.0, times.size(), noise); core_t::TTime startTesting{14 * DAY}; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); CDebugGenerator debug("two_day.py"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; double totalMaxValue = 0.0; core_t::TTime lastTwoDay = times[0] + 3 * DAY; for (std::size_t i = 0; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); debug.addValue(times[i], trend[i] + noise[i]); if (times[i] > lastTwoDay + 2 * DAY) { LOG_TRACE(<< "Processing two days " << times[i] / 2 * DAY); if (times[i] > startTesting) { double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (std::size_t j = i - 288; j < i; ++j) { double prediction = decomposition.value(times[j], 70.0, false).mean(); double residual = std::fabs(trend[j] - prediction); sumResidual += residual; maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); debug.addPrediction(times[j], prediction, residual); } LOG_TRACE(<< "'sum residual' / 'sum value' = " << (sumResidual == 0.0 ? 0.0 : sumResidual / sumValue)); LOG_TRACE(<< "'max residual' / 'max value' = " << (maxResidual == 0.0 ? 0.0 : maxResidual / maxValue)); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; totalMaxValue += maxValue; BOOST_TEST_REQUIRE(sumResidual / sumValue < 0.17); BOOST_TEST_REQUIRE(maxResidual / maxValue < 0.24); } lastTwoDay += 2 * DAY; } } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); BOOST_TEST_REQUIRE(totalSumResidual / totalSumValue < 0.09); BOOST_TEST_REQUIRE(totalMaxResidual / totalMaxValue < 0.20); } } BOOST_FIXTURE_TEST_CASE(testPrecession, CTestFixture) { // Test the case the period is not a multiple of the bucket length. test::CRandomNumbers rng; TDoubleVec noise{0.0}; TDoubleVec delta{0.0}; double period = 3600.0; for (std::size_t t = 0; t < 5; ++t) { rng.generateUniformSamples(0, 120, 1, delta); delta[0] = std::floor(delta[0]); LOG_DEBUG(<< "period = " << 3600.0 + delta[0]); maths::time_series::CTimeSeriesDecomposition decomposition(0.048, FIVE_MINS); CDebugGenerator debug{"period_" + std::to_string(period + delta[0]) + ".py"}; double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (core_t::TTime time = 0; time < WEEK; time += FIVE_MINS) { double trend = 2.0 + std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(period + delta[0])); rng.generateNormalSamples(0.0, 0.1, 1, noise); decomposition.addPoint(time, trend + noise[0]); if (decomposition.initialized()) { double prediction = decomposition.value(time, 0.0, false).mean(); double residual = decomposition.detrend(time, trend, 0.0, false, FIVE_MINS); sumResidual += std::fabs(residual); maxResidual = std::max(maxResidual, std::fabs(residual)); sumValue += std::fabs(trend); maxValue = std::max(maxValue, std::fabs(trend)); debug.addValue(time, trend); debug.addPrediction(time, prediction, trend - prediction); } } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_DEBUG(<< "'max residual' / 'max value' = " << maxResidual / maxValue); BOOST_TEST_REQUIRE(sumResidual / sumValue < 0.01); BOOST_TEST_REQUIRE(maxResidual / maxValue < 0.15); } } BOOST_FIXTURE_TEST_CASE(testRandomShifts, CTestFixture) { // Test small sporadic random time shifts. test::CRandomNumbers rng; TDoubleVec noise{0.0}; double period = 3600.0; double shift = 0.0; TDoubleVec u01{0.0}; auto trend = [&] { auto period_ = static_cast<core_t::TTime>(period); TDoubleVec periodicPattern{10.0, 1.0, 0.5, 0.5, 1.0, 1.0, 2.0, 1.0, 0.5, 0.5, 1.0, 6.0}; return [periodicPattern, period_, &shift](core_t::TTime time) { auto offset = (time + static_cast<core_t::TTime>(shift)) % period_; auto i = (12 * offset) / period_; auto j = (i + 1) % 12; return maths::common::CTools::linearlyInterpolate( 0.0, 1.0, periodicPattern[i], periodicPattern[j], static_cast<double>(offset % (period_ / 12))); }; }(); maths::time_series::CTimeSeriesDecomposition decomposition(0.048, FIVE_MINS); CDebugGenerator debug{"shifting.py"}; double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; for (core_t::TTime time = 0; time < 3 * WEEK; time += FIVE_MINS) { rng.generateNormalSamples(0.0, 0.1, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); if (decomposition.initialized()) { double prediction = decomposition.value(time, 0.0, false).mean(); double residual = decomposition.detrend(time, trend(time), 0.0, false, FIVE_MINS); sumResidual += residual; maxResidual = std::max(maxResidual, std::fabs(residual)); sumValue += std::fabs(trend(time)); maxValue = std::max(maxValue, std::fabs(trend(time))); debug.addValue(time, trend(time)); debug.addPrediction(time, prediction, trend(time) - prediction); } rng.generateUniformSamples(0.0, 1.0, 1, u01); if (u01[0] < 0.001) { TDoubleVec shift_; rng.generateUniformSamples(0, static_cast<double>(FIVE_MINS), 1, shift_); shift += std::floor(shift_[0] + 0.5); } } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); LOG_DEBUG(<< "'max residual' / 'max value' = " << maxResidual / maxValue); BOOST_TEST_REQUIRE(sumResidual / sumValue < 0.025); BOOST_TEST_REQUIRE(maxResidual / maxValue < 0.4); } BOOST_FIXTURE_TEST_CASE(testYearly, CTestFixture) { // Test a yearly seasonal component. test::CRandomNumbers rng; maths::time_series::CTimeSeriesDecomposition decomposition(0.012, 6 * HOUR); maths::time_series::CDecayRateController controller( maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease, 1); CDebugGenerator debug; TDoubleVec noise; core_t::TTime time = 2 * HOUR; for (/**/; time < 4 * YEAR; time += 2 * HOUR) { double trend = 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(YEAR))) + 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); rng.generateNormalSamples(0.0, 1.0, 1, noise); decomposition.addPoint(time, trend + noise[0]); if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{decomposition.detrend(time, trend, 0.0, false)}; double multiplier{controller.multiplier(prediction, {predictionError}, 2 * HOUR, 1.0, 0.0005)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } } // Predict over one year and check we get reasonable accuracy. double maxRelativeError{0.0}; TMeanAccumulator mape; for (/**/; time < 5 * YEAR; time += 2 * HOUR) { double trend = 15.0 * (2.0 + std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(YEAR))) + 7.5 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); double prediction = decomposition.value(time, 0.0, false).mean(); double relativeError = std::fabs((prediction - trend) / trend); LOG_TRACE(<< "relative error = " << relativeError); maxRelativeError = std::max(maxRelativeError, relativeError); mape.add(relativeError); debug.addValue(time, trend); debug.addPrediction(time, prediction, trend - prediction); } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); LOG_DEBUG(<< "max relative error = " << maxRelativeError); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 0.022); BOOST_TEST_REQUIRE(maxRelativeError < 0.08); } BOOST_FIXTURE_TEST_CASE(testWithOutliers, CTestFixture) { // Test smooth periodic signal polluted with pepper and salt outliers. using TSizeVec = std::vector<std::size_t>; test::CRandomNumbers rng; TDoubleVec noise; TSizeVec outliers; TDoubleVec spikeOrTroughSelector; core_t::TTime buckets{WEEK / TEN_MINS}; std::size_t numberOutliers{static_cast<std::size_t>(0.1 * static_cast<double>(buckets))}; rng.generateUniformSamples(0, buckets, numberOutliers, outliers); rng.generateUniformSamples(0, 1.0, numberOutliers, spikeOrTroughSelector); rng.generateNormalSamples(0.0, 9.0, buckets, noise); std::sort(outliers.begin(), outliers.end()); auto trend = [](core_t::TTime time) { return 25.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); }; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); CDebugGenerator debug; for (core_t::TTime time = 0; time < WEEK; time += TEN_MINS) { std::size_t bucket(time / TEN_MINS); auto outlier = std::lower_bound(outliers.begin(), outliers.end(), bucket); double value = outlier != outliers.end() && *outlier == bucket ? (spikeOrTroughSelector[outlier - outliers.begin()] > 0.5 ? 0.0 : 50.0) : trend(time); bool newComponents{false}; decomposition.addPoint( time, value, core::CMemoryCircuitBreakerStub::instance(), maths_t::CUnitWeights::UNIT, [&newComponents](TFloatMeanAccumulatorVec) { newComponents = true; }); if (newComponents) { TMeanAccumulator mape; for (core_t::TTime endTime = time + DAY; time < endTime; time += TEN_MINS) { double prediction = decomposition.value(time, 0.0, false).mean(); mape.add(std::fabs(prediction - trend(time)) / trend(time)); debug.addValue(time, value); debug.addPrediction(time, prediction, trend(time) - prediction); } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 0.03); break; } debug.addValue(time, value); } } BOOST_FIXTURE_TEST_CASE(testCalendar, CTestFixture) { // Test that we significantly reduce the error on the last Friday of each // month after estimating the appropriate component. TTimeVec months{2505600, // Fri 30th Jan 4924800, // Fri 27th Feb 7344000, // Fri 27th Mar 9763200, // Fri 24th Apr 12787200, // Fri 29th May 15206400, // Fri 26th Jun 18230400, // Fri 31st Jul 18316800}; core_t::TTime end = months.back(); TDoubleVec errors{5.0, 15.0, 35.0, 32.0, 25.0, 36.0, 22.0, 12.0, 3.0}; auto trend = [&months, &errors](core_t::TTime t) { double result = 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(t) / static_cast<double>(DAY)); auto i = std::lower_bound(months.begin(), months.end(), t - DAY); if (t >= *i + 7200 && t < *i + 7200 + static_cast<core_t::TTime>(errors.size()) * HALF_HOUR) { result += errors[(t - (*i + 7200)) / HALF_HOUR]; } return result; }; test::CRandomNumbers rng; maths::time_series::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); CDebugGenerator debug; TDoubleVec noise; std::size_t count{0}; for (core_t::TTime time = 0; time < end; time += HALF_HOUR) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); debug.addValue(time, trend(time) + noise[0]); if (time - DAY == *std::lower_bound(months.begin(), months.end(), time - DAY)) { LOG_DEBUG(<< "time = " << time); std::size_t largeErrorCount = 0; for (core_t::TTime time_ = time - DAY; time_ < time; time_ += TEN_MINS) { double prediction = decomposition.value(time_, 0.0, false).mean(); double variance = 4.0 * decomposition.varianceScaleWeight(time_, 4.0, 0.0).mean(); double actual = trend(time_); if (std::fabs(prediction - actual) / std::sqrt(variance) > 3.0) { LOG_TRACE(<< " prediction = " << prediction); LOG_TRACE(<< " variance = " << variance); LOG_TRACE(<< " trend = " << trend(time_)); ++largeErrorCount; } debug.addPrediction(time_, prediction, actual - prediction); } LOG_DEBUG(<< "large error count = " << largeErrorCount); if (++count <= 5) { BOOST_TEST_REQUIRE(largeErrorCount > 15); } if (count >= 6) { BOOST_TEST_REQUIRE(largeErrorCount <= 1); } } } // Check that we can detect the calendar component. BOOST_REQUIRE_EQUAL(false, decomposition.calendarComponents().empty()); } BOOST_FIXTURE_TEST_CASE(testConditionOfTrend, CTestFixture) { // Test numerical stability of the trend model over very long time spans. auto trend = [](core_t::TTime time) { return std::pow(static_cast<double>(time) / static_cast<double>(WEEK), 2.0); }; const core_t::TTime bucketLength{6 * HOUR}; test::CRandomNumbers rng; maths::time_series::CTimeSeriesDecomposition decomposition(0.0005, bucketLength); CDebugGenerator debug; TDoubleVec noise; TMeanAccumulator mape; for (core_t::TTime time = 0; time < 9 * YEAR; time += 6 * HOUR) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); debug.addValue(time, trend(time) + noise[0]); if (time > 10 * WEEK) { double prediction{decomposition.value(time, 0.0, false).mean()}; double error{decomposition.detrend(time, trend(time), 0.0, false)}; debug.addPrediction(time, prediction, error); mape.add(std::fabs(error) / trend(time)); BOOST_TEST_REQUIRE(std::fabs(error) / trend(time) < 0.01); } } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 1e-4); } BOOST_FIXTURE_TEST_CASE(testComponentLifecycle, CTestFixture) { // Test we adapt to changing seasonality adding and removing components // as necessary. test::CRandomNumbers rng; auto trend = [](core_t::TTime time) { return 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)) + 3.0 * (time > 4 * WEEK ? std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(HOUR)) : 0.0) - 3.0 * (time > 9 * WEEK ? std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(HOUR)) : 0.0) + 8.0 * (time > 16 * WEEK ? std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 4.0 / static_cast<double>(DAY)) : 0.0) - 8.0 * (time > 21 * WEEK ? std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / 4.0 / static_cast<double>(DAY)) : 0.0); }; maths::time_series::CTimeSeriesDecomposition decomposition(0.012, FIVE_MINS); maths::time_series::CDecayRateController controller( maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease, 1); CDebugGenerator debug; TMeanAccumulatorVec mapes(4); TDoubleVec noise; for (core_t::TTime time = 0; time < 35 * WEEK; time += FIVE_MINS) { rng.generateNormalSamples(0.0, 1.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); debug.addValue(time, trend(time) + noise[0]); if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{ decomposition.detrend(time, trend(time) + noise[0], 0.0, false)}; double multiplier{controller.multiplier(prediction, {predictionError}, FIVE_MINS, 1.0, 0.0001)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } double prediction = decomposition.value(time, 0.0, false).mean(); if (time > 24 * WEEK) { mapes[3].add(std::fabs(prediction - trend(time)) / trend(time)); } else if (time > 18 * WEEK && time < 21 * WEEK) { mapes[2].add(std::fabs(prediction - trend(time)) / trend(time)); } else if (time > 11 * WEEK && time < 14 * WEEK) { mapes[1].add(std::fabs(prediction - trend(time)) / trend(time)); } else if (time > 6 * WEEK && time < 9 * WEEK) { mapes[0].add(std::fabs(prediction - trend(time)) / trend(time)); } debug.addPrediction(time, prediction, trend(time) + noise[0] - prediction); } TDoubleVec bounds{0.013, 0.013, 0.15, 0.02}; for (std::size_t i = 0; i < 4; ++i) { double mape{maths::common::CBasicStatistics::mean(mapes[i])}; LOG_DEBUG(<< "MAPE = " << mape); BOOST_TEST_REQUIRE(mape < bounds[i]); } } BOOST_FIXTURE_TEST_CASE(testStability, CTestFixture) { auto trend = [](core_t::TTime time) { return 2000.0 + (time < 10 * WEEK ? 100.0 * weekends(time) : -2000.0); }; maths::time_series::CTimeSeriesDecomposition decomposition(0.012, HALF_HOUR); maths::time_series::CDecayRateController controller( maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease, 1); CDebugGenerator debug; for (core_t::TTime time = 0; time < 2 * YEAR; time += HALF_HOUR) { decomposition.addPoint(time, trend(time)); debug.addValue(time, trend(time)); if (decomposition.initialized()) { TDouble1Vec mean{decomposition.meanValue(time)}; TDouble1Vec predictionError{decomposition.detrend(time, trend(time), 0.0, false)}; double multiplier{controller.multiplier(mean, {predictionError}, HALF_HOUR, 1.0, 0.0005)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } double prediction{decomposition.value(time, 0.0, false).mean()}; debug.addPrediction(time, prediction, trend(time) - prediction); if (time > 20 * WEEK) { BOOST_TEST_REQUIRE(std::fabs(trend(time) - prediction) < 5.0); } } } BOOST_FIXTURE_TEST_CASE(testRemoveSeasonal, CTestFixture) { // Check we correctly remove all seasonal components. test::CRandomNumbers rng; auto trend = [](core_t::TTime time) { return 20.0 + 10.0 * (time <= 4 * WEEK ? std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)) : 0.0); }; for (auto noiseVariance : {1.0, 0.0}) { maths::time_series::CTimeSeriesDecomposition decomposition(0.012, FIVE_MINS); maths::time_series::CDecayRateController controller{ maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease, 1}; CDebugGenerator debug; TDoubleVec noise; for (core_t::TTime time = 0; time < 20 * WEEK; time += FIVE_MINS) { if (noiseVariance > 0.0) { rng.generateNormalSamples(0.0, 1.0, 1, noise); } else { noise.assign(1, 0.0); } decomposition.addPoint(time, trend(time) + noise[0]); debug.addValue(time, trend(time) + noise[0]); if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{ decomposition.detrend(time, trend(time) + noise[0], 0.0, false)}; double multiplier{controller.multiplier( prediction, {predictionError}, FIVE_MINS, 1.0, 0.0001)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } double prediction{decomposition.value(time, 0.0, false).mean()}; debug.addPrediction(time, prediction, trend(time) + noise[0] - prediction); } // We shouldn't have any components left at this point. BOOST_REQUIRE_EQUAL(0, decomposition.seasonalComponents().size()); } } BOOST_FIXTURE_TEST_CASE(testFastAndSlowSeasonality, CTestFixture) { // Test we have good modelling of the fast component after detecting a slow // periodic component. test::CRandomNumbers rng; auto trend = [] { TDoubleVec fast{0.0, 7.0, 10.0, 7.0, 0.0, 0.0}; return [=](core_t::TTime time) { return 2.0 + std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)) + fast[static_cast<std::size_t>(time / FIVE_MINS) % fast.size()]; }; }(); maths::time_series::CTimeSeriesDecomposition decomposition(0.012, ONE_MIN); maths::time_series::CDecayRateController controller{ maths::time_series::CDecayRateController::E_PredictionBias | maths::time_series::CDecayRateController::E_PredictionErrorIncrease, 1}; CDebugGenerator debug; TMeanAccumulator mape; TDoubleVec noise; for (core_t::TTime time = 0; time < WEEK; time += ONE_MIN) { rng.generateNormalSamples(0.0, 0.2, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); debug.addValue(time, trend(time) + noise[0]); if (decomposition.initialized()) { TDouble1Vec prediction{decomposition.meanValue(time)}; TDouble1Vec predictionError{ decomposition.detrend(time, trend(time) + noise[0], 0.0, false)}; double multiplier{controller.multiplier(prediction, {predictionError}, FIVE_MINS, 1.0, 0.0001)}; decomposition.decayRate(multiplier * decomposition.decayRate()); } double prediction{decomposition.value(time, 0.0, false).mean()}; debug.addPrediction(time, prediction, trend(time) + noise[0] - prediction); if (time > 4 * DAY) { double error{decomposition.detrend(time, trend(time), 0.0, false, FIVE_MINS)}; double relativeError{std::fabs(error / trend(time))}; BOOST_TEST_REQUIRE(relativeError < 0.25); mape.add(relativeError); } } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 0.01); // We should be modelling both seasonalities. BOOST_TEST_REQUIRE(2, decomposition.seasonalComponents().size()); } BOOST_FIXTURE_TEST_CASE(testNonNegative, CTestFixture) { // Test if we specify the time series is non-negative then we never predict // a negative value for it. test::CRandomNumbers rng; auto trend = [](core_t::TTime time) { return std::max(15.0 - 0.5 * static_cast<double>(time) / static_cast<double>(DAY), 1.0) + std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); }; maths::time_series::CTimeSeriesDecomposition decomposition(0.012, FIVE_MINS); CDebugGenerator debug; TMeanAccumulator mape; TDoubleVec noise; for (core_t::TTime time = 0; time < 6 * WEEK; time += FIVE_MINS) { rng.generateNormalSamples(0.0, 0.1, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); debug.addValue(time, trend(time) + noise[0]); auto prediction = decomposition.value(time, 0.0, true); BOOST_TEST_REQUIRE(prediction(0) >= 0.0); BOOST_TEST_REQUIRE(prediction(1) >= 0.0); debug.addPrediction(time, prediction.mean(), trend(time) + noise[0] - prediction.mean()); if (time > 4 * DAY && trend(time) > 1.0) { double error{decomposition.detrend(time, trend(time), 0.0, true, FIVE_MINS)}; double relativeError{std::fabs(error / trend(time))}; BOOST_TEST_REQUIRE(relativeError < 0.8); mape.add(relativeError); } } LOG_DEBUG(<< "MAPE = " << maths::common::CBasicStatistics::mean(mape)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(mape) < 0.1); } BOOST_FIXTURE_TEST_CASE(testSwap, CTestFixture) { // Test that swapping preserves checksums. const double decayRate = 0.01; const core_t::TTime bucketLength = HALF_HOUR; TTimeVec times; TDoubleVec trend1; TDoubleVec trend2; for (core_t::TTime time = 0; time <= 10 * WEEK; time += HALF_HOUR) { double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time); trend1.push_back(daily); trend2.push_back(2.0 * daily); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(20.0, 16.0, 2 * times.size(), noise); maths::time_series::CTimeSeriesDecomposition decomposition1(decayRate, bucketLength); maths::time_series::CTimeSeriesDecomposition decomposition2(2.0 * decayRate, 2 * bucketLength); for (std::size_t i = 0; i < times.size(); i += 2) { decomposition1.addPoint(times[i], trend1[i] + noise[i]); decomposition2.addPoint(times[i], trend2[i] + noise[i + 1]); } std::uint64_t checksum1 = decomposition1.checksum(); std::uint64_t checksum2 = decomposition2.checksum(); LOG_DEBUG(<< "checksum1 = " << checksum1 << ", checksum2 = " << checksum2); decomposition1.swap(decomposition2); BOOST_REQUIRE_EQUAL(checksum1, decomposition2.checksum()); BOOST_REQUIRE_EQUAL(checksum2, decomposition1.checksum()); } BOOST_FIXTURE_TEST_CASE(testPersist, CTestFixture) { // Check that serialization is idempotent. const double decayRate = 0.01; const core_t::TTime bucketLength = HALF_HOUR; TTimeVec times; TDoubleVec trend; for (core_t::TTime time = 0; time <= 10 * WEEK; time += HALF_HOUR) { double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY)); times.push_back(time); trend.push_back(daily); } test::CRandomNumbers rng; TDoubleVec noise; rng.generateNormalSamples(20.0, 16.0, times.size(), noise); maths::time_series::CTimeSeriesDecomposition origDecomposition(decayRate, bucketLength); for (std::size_t i = 0; i < times.size(); ++i) { origDecomposition.addPoint(times[i], trend[i] + noise[i]); } std::ostringstream origJson; core::CJsonStatePersistInserter::persist( origJson, std::bind_front(&maths::time_series::CTimeSeriesDecomposition::acceptPersistInserter, &origDecomposition)); LOG_TRACE(<< "Decomposition Json representation:\n" << origJson.str()); // Restore the Json into a new decomposition std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"}; core::CJsonStateRestoreTraverser traverser(origJsonStrm); maths::common::STimeSeriesDecompositionRestoreParams params{ decayRate + 0.1, bucketLength, maths::common::SDistributionRestoreParams{maths_t::E_ContinuousData, decayRate + 0.1}}; maths::time_series::CTimeSeriesDecomposition restoredDecomposition(params, traverser); std::ostringstream newJson; core::CJsonStatePersistInserter::persist( newJson, std::bind_front(&maths::time_series::CTimeSeriesDecomposition::acceptPersistInserter, &restoredDecomposition)); BOOST_REQUIRE_EQUAL(origJson.str(), newJson.str()); } BOOST_FIXTURE_TEST_CASE(testNoAllocationsAllowed, CTestFixture) { // Test that when CTimeSeriesDecompositionAllocator::areAllocationsAllowed returns false, // the call of addPoint() does not lead to creation of new seasonal or calendar components // in the decomposition. TTimeVec months{2505600, // Fri 30th Jan 4924800, // Fri 27th Feb 7344000, // Fri 27th Mar 9763200, // Fri 24th Apr 12787200, // Fri 29th May 15206400, // Fri 26th Jun 18230400, // Fri 31st Jul 18316800}; // Sat 1st Aug core_t::TTime end = months.back(); TDoubleVec errors{5.0, 15.0, 35.0, 32.0, 25.0, 36.0, 22.0, 12.0, 3.0}; double decayRate{0.01}; auto trend = [](core_t::TTime t) { double weekly = 1200.0 + 1000.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(t) / static_cast<double>(WEEK)); double daily = 5.0 + 5.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(t) / static_cast<double>(DAY)); double result = weekly + daily; return result; }; test::CRandomNumbers rng; { CConfigurableMemoryCircuitBreaker allocator{false}; maths::time_series::CTimeSeriesDecomposition decomposition(decayRate, HALF_HOUR); TDoubleVec noise; for (core_t::TTime time = 0; time < end; time += HALF_HOUR) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0], allocator); } // Check that we don't have any seasonal components. BOOST_REQUIRE_EQUAL(true, decomposition.seasonalComponents().empty()); // Check that we don't have any calendar components. BOOST_REQUIRE_EQUAL(true, decomposition.calendarComponents().empty()); } } BOOST_FIXTURE_TEST_CASE(testAddSeasonalComponentsNoAllocations, CComponentsTest) { // Test that in the hard_limit state we still can add new seasonal components if // at the same time we remove old seasonal components of the same total size or larger. this->testAddSeasonalComponents(); } BOOST_AUTO_TEST_SUITE_END()