lib/maths/time_series/unittest/CSignalTest.cc (1,101 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <boost/test/unit_test_suite.hpp> #include <core/CLogger.h> #include <core/CoreTypes.h> #include <maths/common/CBasicStatistics.h> #include <maths/common/CBasicStatisticsPersist.h> #include <maths/common/CIntegerTools.h> #include <maths/time_series/CSignal.h> #include <test/BoostTestCloseAbsolute.h> #include <test/CRandomNumbers.h> #include "TestUtils.h" #include <boost/math/constants/constants.hpp> #include <boost/test/unit_test.hpp> #include <string> BOOST_AUTO_TEST_SUITE(CSignalTest) using namespace ml; namespace { using TDoubleVec = std::vector<double>; using TDoubleVecVec = std::vector<TDoubleVec>; using TSizeVec = std::vector<std::size_t>; using TSizeVecVec = std::vector<TSizeVec>; using TPredictorVec = std::vector<maths::time_series::CSignal::TPredictor>; using TPredictorVecVec = std::vector<TPredictorVec>; using TMeanVarAccumulator = maths::common::CBasicStatistics::SSampleMeanVar<double>::TAccumulator; std::string print(const maths::time_series::CSignal::TComplexVec& f) { std::ostringstream result; for (std::size_t i = 0; i < f.size(); ++i) { LOG_DEBUG(<< f[i].real() << " + " << f[i].imag() << 'i'); } return result.str(); } void bruteForceDft(maths::time_series::CSignal::TComplexVec& f, double sign) { maths::time_series::CSignal::TComplexVec result( f.size(), maths::time_series::CSignal::TComplex(0.0, 0.0)); for (std::size_t k = 0; k < f.size(); ++k) { for (std::size_t n = 0; n < f.size(); ++n) { double t{-sign * boost::math::double_constants::two_pi * static_cast<double>(k * n) / static_cast<double>(f.size())}; result[k] += maths::time_series::CSignal::TComplex(std::cos(t), std::sin(t)) * f[n]; } if (sign < 0.0) { result[k] /= static_cast<double>(f.size()); } } f.swap(result); } maths::time_series::CSignal::TSeasonalComponentVec seasonalComponentSummary(TSizeVec periods) { maths::time_series::CSignal::TSeasonalComponentVec result; result.reserve(periods.size()); for (auto period : periods) { result.emplace_back(period, 0, period, std::pair<std::size_t, std::size_t>{0, period}); } return result; } } BOOST_AUTO_TEST_CASE(testFFTVersusOctave) { // Test versus values calculated using octave fft. double x[][20] = { {2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, 4271.63, 4648.81, -727.90, 2285.24, 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, 435.21, 3613.32, 3471.37, -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; maths::time_series::CSignal::TComplexVec fx; for (std::size_t i = 0; i < 20; ++i) { fx.emplace_back(x[0][i], x[1][i]); } LOG_DEBUG(<< "*** Power of 2 Length ***"); { double expected[][2]{// length 2 {4007.1, -341.9}, {1103.5, 9289.4}, // length 4 {8867.5, -3114.0}, {-772.18, 8235.2}, {-2825.7, 10425.0}, {4951.6, 2349.1}, // length 8 {2813.8, -3565.1}, {-2652.4, -1739.5}, {-1790.1, 6488.9}, {4933.6, 6326.1}, {-7833.9, 15118.0}, {344.29, 6447.2}, {10819.0, -9439.9}, {13809.0, 16155.0}, // length 16 {14359.0, -5871.2}, {1176.5, -3143.9}, {636.25, -1666.2}, {-2819.0, 8259.4}, {-12844.0, 9601.7}, {-2292.3, -5598.5}, {11737.0, 4809.3}, {-2499.2, -143.95}, {-10045.0, 6570.2}, {27277.0, 10002.0}, {870.01, 16083.0}, {21695.0, 19192.0}, {1601.9, 3220.9}, {-7675.7, 5483.5}, {-1921.5, 31949.0}, {1629.0, -27167.0}}; for (std::size_t i = 0, l = 2; l < fx.size(); i += l, l <<= 1) { LOG_DEBUG(<< "Testing length " << l); maths::time_series::CSignal::TComplexVec actual(fx.begin(), fx.begin() + l); maths::time_series::CSignal::fft(actual); LOG_DEBUG(<< print(actual)); double error{0.0}; for (std::size_t j = 0; j < l; ++j) { error += std::abs(actual[j] - maths::time_series::CSignal::TComplex( expected[i + j][0], expected[i + j][1])); } error /= static_cast<double>(l); LOG_DEBUG(<< "error = " << error); BOOST_TEST_REQUIRE(error < 0.2); } } LOG_DEBUG(<< "*** Arbitrary Length ***"); { double expected[][2]{ {18042.0, 755.0}, {961.0, 5635.6}, {-5261.8, 7542.2}, {-12814.0, 2250.2}, {-8248.5, 6620.5}, {-21626.0, 3570.6}, {6551.5, -12732.0}, {6009.5, 10622.0}, {9954.0, -1224.2}, {-2871.5, 7073.6}, {-14409.0, 10939.0}, {13682.0, 25304.0}, {-10468.0, -6338.5}, {6506.0, 6283.3}, {32665.0, 5127.7}, {3190.7, 4323.4}, {-6988.7, -3865.0}, {-3881.4, 4360.8}, {46434.0, 20556.0}, {-6319.6, -7329.0}}; maths::time_series::CSignal::TComplexVec actual(fx.begin(), fx.end()); maths::time_series::CSignal::fft(actual); double error{0.0}; for (std::size_t j = 0; j < actual.size(); ++j) { error += std::abs(actual[j] - maths::time_series::CSignal::TComplex( expected[j][0], expected[j][1])); } error /= static_cast<double>(actual.size()); LOG_DEBUG(<< "error = " << error); BOOST_TEST_REQUIRE(error < 0.2); } } BOOST_AUTO_TEST_CASE(testIFFTVersusOctave) { // Test versus values calculated using octave ifft. double x[][20]{ {2555.33, 1451.79, 465.60, 4394.83, -1553.24, -2772.07, -3977.73, 2249.31, -2006.04, 3540.84, 4271.63, 4648.81, -727.90, 2285.24, 3129.56, -3596.79, -1968.66, 3795.18, 1627.84, 228.40}, {4473.77, -4815.63, -818.38, -1953.72, -2323.39, -3007.25, 4444.24, 435.21, 3613.32, 3471.37, -1735.72, 2560.82, -2383.29, -2370.23, -4921.04, -541.25, 1516.69, -2028.42, 3981.02, 3156.88}}; maths::time_series::CSignal::TComplexVec fx; for (std::size_t i = 0; i < 20; ++i) { fx.emplace_back(x[0][i], x[1][i]); } LOG_DEBUG(<< "*** Powers of 2 Length ***"); { double expected[][2]{// length 2 {2003.56, -170.93}, {551.77, 4644.70}, // length 4 {2216.89, -778.49}, {1237.91, 587.28}, {-706.42, 2606.19}, {-193.04, 2058.80}, {351.73, -445.64}, // length 8 {1726.09, 2019.35}, {1352.32, -1179.99}, {43.04, 805.89}, {-979.24, 1889.70}, {616.70, 790.77}, {-223.77, 811.12}, {-331.55, -217.44}, {897.45, -366.95}, // length 16 {101.81, -1697.92}, {-120.10, 1996.81}, {-479.73, 342.72}, {100.12, 201.31}, {1355.94, 1199.49}, {54.38, 1005.18}, {1704.78, 625.13}, {-627.80, 410.64}, {-156.20, -9.00}, {733.56, 300.58}, {-143.27, -349.91}, {-802.73, 600.10}, {-176.19, 516.21}, {39.77, -104.14}, {73.53, -196.49}}; for (std::size_t i = 0, l = 2; l < fx.size(); i += l, l <<= 1) { LOG_DEBUG(<< "Testing length " << l); maths::time_series::CSignal::TComplexVec actual(fx.begin(), fx.begin() + l); maths::time_series::CSignal::ifft(actual); LOG_DEBUG(<< print(actual)); double error{0.0}; for (std::size_t j = 0; j < l; ++j) { error += std::abs(actual[j] - maths::time_series::CSignal::TComplex( expected[i + j][0], expected[i + j][1])); } error /= static_cast<double>(l); LOG_DEBUG(<< "error = " << error); BOOST_TEST_REQUIRE(error < 0.01); } } } BOOST_AUTO_TEST_CASE(testFFTRandomized) { // Test on randomized input versus brute force. test::CRandomNumbers rng; TDoubleVec components; rng.generateUniformSamples(-100000.0, 100000.0, 20000, components); TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); for (std::size_t i = 0, j = 0; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { maths::time_series::CSignal::TComplexVec expected; for (std::size_t k = 0; k < lengths[i]; ++k) { expected.emplace_back(components[j + 2 * k], components[j + 2 * k + 1]); } maths::time_series::CSignal::TComplexVec actual(expected); bruteForceDft(expected, +1.0); maths::time_series::CSignal::fft(actual); double error{0.0}; for (std::size_t k = 0; k < actual.size(); ++k) { error += std::abs(actual[k] - expected[k]); } if (i % 5 == 0 || error >= 1e-5) { LOG_DEBUG(<< "length = " << lengths[i] << ", error = " << error); } BOOST_TEST_REQUIRE(error < 1e-5); } } BOOST_AUTO_TEST_CASE(testIFFTRandomized) { // Test on randomized input versus brute force. test::CRandomNumbers rng; TDoubleVec components; rng.generateUniformSamples(-100000.0, 100000.0, 20000, components); TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); for (std::size_t i = 0, j = 0; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { maths::time_series::CSignal::TComplexVec expected; for (std::size_t k = 0; k < lengths[i]; ++k) { expected.emplace_back(components[j + 2 * k], components[j + 2 * k + 1]); } maths::time_series::CSignal::TComplexVec actual(expected); bruteForceDft(expected, -1.0); maths::time_series::CSignal::ifft(actual); double error = 0.0; for (std::size_t k = 0; k < actual.size(); ++k) { error += std::abs(actual[k] - expected[k]); } if (i % 5 == 0 || error >= 1e-5) { LOG_DEBUG(<< "length = " << lengths[i] << ", error = " << error); } BOOST_TEST_REQUIRE(error < 1e-5); } } BOOST_AUTO_TEST_CASE(testFFTIFFTIdempotency) { // Test on randomized input that x = F(F^-1(x)). test::CRandomNumbers rng; TDoubleVec components; rng.generateUniformSamples(-100000.0, 100000.0, 20000, components); TSizeVec lengths; rng.generateUniformSamples(2, 100, 1000, lengths); for (std::size_t i = 0, j = 0; i < lengths.size() && j + 2 * lengths[i] < components.size(); ++i, j += 2 * lengths[i]) { maths::time_series::CSignal::TComplexVec expected; for (std::size_t k = 0; k < lengths[i]; ++k) { expected.emplace_back(components[j + 2 * k], components[j + 2 * k + 1]); } maths::time_series::CSignal::TComplexVec actual(expected); maths::time_series::CSignal::fft(actual); maths::time_series::CSignal::ifft(actual); double error = 0.0; for (std::size_t k = 0; k < actual.size(); ++k) { error += std::abs(actual[k] - expected[k]); } if (i % 5 == 0 || error >= 1e-5) { LOG_DEBUG(<< "length = " << lengths[i] << ", error = " << error); } BOOST_TEST_REQUIRE(error < 1e-5); } } BOOST_AUTO_TEST_CASE(testCyclicAutocorrelations) { // Test the cyclic autocorrelation matches the autocorrelation calculated with FFT. test::CRandomNumbers rng; TSizeVec sizes; rng.generateUniformSamples(10, 30, 100, sizes); for (std::size_t t = 0; t < sizes.size(); ++t) { TDoubleVec values_; rng.generateUniformSamples(-10.0, 10.0, sizes[t], values_); maths::time_series::CSignal::TFloatMeanAccumulatorVec values(sizes[t]); for (std::size_t i = 0; i < values_.size(); ++i) { values[i].add(values_[i]); } TDoubleVec expected; for (std::size_t offset = 1; offset < values.size(); ++offset) { expected.push_back(maths::time_series::CSignal::cyclicAutocorrelation( maths::time_series::CSignal::seasonalComponentSummary(offset), values)); } TDoubleVec actual; maths::time_series::CSignal::autocorrelations(values, actual); if (t % 10 == 0) { LOG_DEBUG(<< "expected = " << expected); LOG_DEBUG(<< "actual = " << actual); } BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expected), core::CContainerPrinter::print(actual)); } } BOOST_AUTO_TEST_CASE(testSeasonalComponentSummary) { using TSizeSizePrVec = std::vector<std::pair<std::size_t, std::size_t>>; TSizeSizePrVec expectedWindows; { maths::time_series::CSignal::SSeasonalComponentSummary period{10, 0, 10, {0, 10}}; BOOST_REQUIRE_EQUAL(10, period.period()); BOOST_REQUIRE_EQUAL(false, period.windowed()); BOOST_REQUIRE(period == period); BOOST_REQUIRE((period < period) == false); for (std::size_t i = 0; i < 100; ++i) { BOOST_REQUIRE_EQUAL(true, period.contains(i)); BOOST_REQUIRE_EQUAL(i % 10, period.offset(i)); expectedWindows.assign(1, {0, i}); BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedWindows), core::CContainerPrinter::print(period.windows(i))); } } { maths::time_series::CSignal::SSeasonalComponentSummary period{10, 5, 15, {0, 5}}; BOOST_REQUIRE_EQUAL(5, period.period()); BOOST_REQUIRE_EQUAL(true, period.windowed()); BOOST_REQUIRE(period == period); BOOST_REQUIRE((period < period) == false); TSizeSizePrVec windows{{5, 10}, {20, 25}, {35, 40}, {50, 55}}; for (std::size_t i = 0; i < 50; ++i) { std::size_t expectedOffset{[&] { for (const auto& window : windows) { if (i >= window.first && i < window.second) { return i - window.first; } } return std::size_t{5}; }()}; BOOST_REQUIRE_EQUAL(expectedOffset < 5, period.contains(i)); if (expectedOffset < 5) { BOOST_REQUIRE_EQUAL(expectedOffset, period.offset(i)); } BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print( windows.begin(), windows.begin() + (i + 14) / 15), core::CContainerPrinter::print(period.windows(i))); } } } BOOST_AUTO_TEST_CASE(testCountNotMissing) { maths::time_series::CSignal::TFloatMeanAccumulatorVec values; BOOST_REQUIRE_EQUAL(0, maths::time_series::CSignal::countNotMissing(values)); values.resize(5); BOOST_REQUIRE_EQUAL(0, maths::time_series::CSignal::countNotMissing(values)); for (std::size_t i = 0; i < values.size(); ++i) { values[i].add(1.0); BOOST_REQUIRE_EQUAL(i + 1, maths::time_series::CSignal::countNotMissing(values)); } } BOOST_AUTO_TEST_CASE(testRestrictTo) { maths::time_series::CSignal::TFloatMeanAccumulatorVec values(200); for (std::size_t i = 0; i < 200; ++i) { values[i].add(static_cast<double>(i)); } maths::time_series::CSignal::TSizeSizePr2Vec windows; maths::time_series::CSignal::TFloatMeanAccumulatorVec restricted; // Exactly half the values are in windows. restricted = values; windows = {{10, 20}, {50, 120}, {190, 210}}; maths::time_series::CSignal::restrictTo(windows, restricted); BOOST_REQUIRE_EQUAL(100, restricted.size()); std::size_t j{0}; for (const auto& window : windows) { for (std::size_t i = window.first; i < window.second; ++i, ++j) { BOOST_REQUIRE_EQUAL(static_cast<double>(i % 200), maths::common::CBasicStatistics::mean(restricted[j])); } } test::CRandomNumbers rng; std::size_t tests{0}; TSizeVec endpoints; for (std::size_t test = 0; test < 1000; ++test) { rng.generateUniformSamples(100, 300, 10, endpoints); std::sort(endpoints.begin(), endpoints.end()); endpoints.erase(std::unique(endpoints.begin(), endpoints.end()), endpoints.end()); if (endpoints.size() % 2 == 0) { std::size_t size{0}; windows.resize(endpoints.size() / 2); for (std::size_t i = 0; i < endpoints.size(); i += 2) { size += endpoints[i + 1] - endpoints[i]; windows[i / 2] = std::make_pair(endpoints[i], endpoints[i + 1]); } LOG_TRACE(<< "windows = " << windows); restricted = values; maths::time_series::CSignal::restrictTo(windows, restricted); j = 0; BOOST_REQUIRE_EQUAL(size, restricted.size()); for (const auto& window : windows) { for (std::size_t i = window.first; i < window.second; ++i, ++j) { BOOST_REQUIRE_EQUAL( static_cast<double>(i % 200), maths::common::CBasicStatistics::mean(restricted[j])); } } ++tests; } } BOOST_TEST_REQUIRE(tests > 0); } BOOST_AUTO_TEST_CASE(testRemoveExtremeOutliers) { test::CRandomNumbers rng; double fraction{0.05}; TDoubleVec value; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; // Edge cases don't fallover. maths::time_series::CSignal::removeExtremeOutliers(fraction, values); values.resize(20); maths::time_series::CSignal::removeExtremeOutliers(fraction, values); maths::time_series::CSignal::removeExtremeOutliers(-0.1, values); maths::time_series::CSignal::removeExtremeOutliers(1.1, values); TSizeVec outlier; for (std::size_t i = 0; i < 100; ++i) { values.clear(); values.resize(50); for (std::size_t j = 0; j < 50; ++j) { rng.generateUniformSamples(499.0, 501.0, 1, value); values[j].add(value[0]); } rng.generateUniformSamples(0, 50, 1, outlier); values[outlier[0]].add(i % 2 == 0 ? 0 : 1000); maths::time_series::CSignal::removeExtremeOutliers(fraction, values); BOOST_REQUIRE_EQUAL( 0.0, maths::common::CBasicStatistics::count(values[outlier[0]])); BOOST_TEST_REQUIRE( std::count_if(values.begin(), values.end(), [](const auto& value_) { return maths::common::CBasicStatistics::count(value_) == 0.0; }) <= static_cast<std::size_t>( std::ceil(fraction * static_cast<double>(values.size())))); } } BOOST_AUTO_TEST_CASE(testReweightOutliers) { // Check that we pickout pepper and salt outliers for a variety of components. TPredictorVec components{ [](std::size_t) { return 10.0; }, [](std::size_t index) { return static_cast<double>(index) / 5.0; }, [](std::size_t index) { return 10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(index) / 50.0); }}; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; TDoubleVec noise; TDoubleVec u01; for (std::size_t test = 0; test < 100; ++test) { const auto& component = components[test % components.size()]; values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateUniformSamples(0.0, 1.0, values.size(), u01); rng.generateNormalSamples(0.0, 4.0, values.size(), noise); for (std::size_t i = 0; i < noise.size(); ++i) { if (u01[i] < 0.02) { values[i].add(-10.0); } else if (u01[i] > 0.98) { values[i].add(30.0); } else { values[i].add(component(i) + noise[i]); } } maths::time_series::CSignal::reweightOutliers(component, 0.1, values); for (std::size_t i = 0; i < values.size(); ++i) { if (u01[i] < 0.02 || u01[i] > 0.98) { BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::count(values[i]) < 1.0); } } } } BOOST_AUTO_TEST_CASE(testFitSingleSeasonalComponent) { // Test the accuracy with which we estimate one seasonal component. TSizeVec periods{5, 20, 10}; TPredictorVec expectedComponents{ [](std::size_t i) { double values[]{10.0, 5.0, 6.0, 15.0, 18.0}; return values[i % 5]; }, [](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 20.0); }, [](std::size_t i) { return i % 10 == 3 ? 10.0 : 0.0; }}; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; for (std::size_t test = 0; test < 100; ++test) { auto expected = expectedComponents[test % expectedComponents.size()]; std::size_t period{periods[test % periods.size()]}; values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 4.0, values.size(), noise); for (std::size_t i = 0; i < noise.size(); ++i) { values[i].add(expected(i) + noise[i]); } maths::time_series::CSignal::fitSeasonalComponents( seasonalComponentSummary({period}), values, actuals); BOOST_REQUIRE_EQUAL(1, actuals.size()); BOOST_REQUIRE_EQUAL(period, actuals[0].size()); TMeanVarAccumulator meanError; double sigma{std::sqrt(4.0 / (static_cast<double>(values.size()) / period))}; for (std::size_t i = 0; i < actuals[0].size(); ++i) { BOOST_REQUIRE_CLOSE_ABSOLUTE( expected(i), maths::common::CBasicStatistics::mean(actuals[0][i]), 4.0 * sigma); meanError(std::fabs(expected(i) - maths::common::CBasicStatistics::mean(actuals[0][i])) / sigma); } BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 1.5); } } BOOST_AUTO_TEST_CASE(testFitMultipleSeasonalComponents) { // Test the accuracy with which we estimate a mixture of two seasonal components. TPredictorVecVec expectedComponents{ {[](std::size_t i) { double values[]{10.0, 5.0, 6.0, 15.0, 18.0}; return values[i % 5]; }, [](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 12.0); }}, {[](std::size_t i) { double values[]{10.0, 5.0, 6.0, 15.0, 18.0}; return values[i % 5]; }, [](std::size_t i) { return i % 10 == 3 ? 10.0 : 0.0; }}, {[](std::size_t i) { return i % 10 == 3 ? 10.0 : 0.0; }, [](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 15.0); }}}; test::CRandomNumbers rng; TSizeVec lengths{72, 43, 95}; TSizeVecVec periods{{5, 12}, {5, 10}, {10, 15}}; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; TMeanVarAccumulator overallMeanError; for (std::size_t test = 0; test < 100; ++test) { const auto& expected = expectedComponents[test % expectedComponents.size()]; const auto& period = periods[test % periods.size()]; values.assign(lengths[test % lengths.size()], maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 4.0, values.size(), noise); for (std::size_t i = 0; i < noise.size(); ++i) { double sum{0.0}; for (std::size_t j = 0; j < expected.size(); ++j) { sum += expected[j](i); } values[i].add(sum + noise[i]); } maths::time_series::CSignal::fitSeasonalComponents( seasonalComponentSummary(period), values, actuals); BOOST_REQUIRE_EQUAL(period.size(), actuals.size()); for (std::size_t i = 0; i < period.size(); ++i) { BOOST_REQUIRE_EQUAL(period[i], actuals[i].size()); } TMeanVarAccumulator meanError; for (std::size_t i = 0; i < actuals.size(); ++i) { double sigma{std::sqrt(4.0 / (static_cast<double>(values.size()) / static_cast<double>(period[i])))}; for (std::size_t j = 0; j < actuals[i].size(); ++j) { meanError(std::fabs(expected[i](j) - maths::common::CBasicStatistics::mean(actuals[i][j])) / sigma); } } BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 2.5); overallMeanError += meanError; // We can only really guaranty individual component values up to additive // constants which sum to zero. Therefore, we check their sum is close to // the expected value w.r.t. the standard deviation of the noise. double sigma{std::sqrt(4.0 / (static_cast<double>(values.size()) / static_cast<double>(*std::max_element( period.begin(), period.end()))))}; for (std::size_t j = 0; j < period[0] * period[1]; ++j) { double expectedSum{0.0}; double actualSum{0.0}; for (std::size_t i = 0; i < actuals.size(); ++i) { expectedSum += expected[i](j); actualSum += maths::common::CBasicStatistics::mean(actuals[i][j % period[i]]); } BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedSum, actualSum, 5.0 * sigma); } } LOG_DEBUG(<< "Overall mean error = " << maths::common::CBasicStatistics::mean(overallMeanError)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(overallMeanError) < 1.25); } BOOST_AUTO_TEST_CASE(testFitTradingDaySeasonalComponents) { // The idea of this test is to test we correctly identify weekday/weekend // modulation of a daily seasonality. We randomize over the start offset // of the weekend to simulate different sliding window starts and check // we always partition where we should. test::CRandomNumbers rng; TDoubleVecVec amplitude{{0.2, 0.2, 1.0, 1.0, 1.0, 1.0, 1.0}, {0.2, 0.4, 1.3, 1.2, 1.0, 1.0, 1.4}}; maths::time_series::CSignal::TSeasonalComponentVec periods; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TSizeVec offset; for (std::size_t test = 0; test < 10; ++test) { rng.generateUniformSamples(0, 168, 1, offset); auto expectedComponent = [&](std::size_t i) { i = (168 + i - offset[0]) % 168; return 10.0 * amplitude[test % 2][(i % 168) / 24] * std::sin(boost::math::double_constants::pi * static_cast<double>(i % 24) / 24.0); }; if (test % 2 == 0) { periods.assign({{24, offset[0], 168, {0, 48}}, {24, offset[0], 168, {48, 168}}}); } else { periods.assign({{24, offset[0], 168, {0, 48}}, {24, offset[0], 168, {48, 168}}, {168, offset[0], 168, {0, 48}}, {168, offset[0], 168, {48, 168}}}); } values.assign(336, maths::time_series::CSignal::TFloatMeanAccumulator{}); for (std::size_t i = 0; i < values.size(); ++i) { values[i].add(expectedComponent(i)); } maths::time_series::CSignal::fitSeasonalComponents(periods, values, actuals); for (std::size_t i = 0; i < values.size(); ++i) { double prediction{0.0}; for (std::size_t j = 0; j < periods.size(); ++j) { if (periods[j].contains(i)) { prediction += maths::common::CBasicStatistics::mean( actuals[j][periods[j].offset(i)]); } } BOOST_REQUIRE_CLOSE(maths::common::CBasicStatistics::mean(values[i]), prediction, 1e-4); } } } BOOST_AUTO_TEST_CASE(testFitSingleSeasonalComponentRobust) { // Test the improvement we get using the robust approach with pepper and salt // outliers for a single seasonal component. std::size_t period{10}; auto expected = [](std::size_t i) { double values[]{10.0, 11.0, 7.0, 5.0, 6.0, 15.0, 18.0, 19.0, 17.0, 14.0}; return values[i % 10]; }; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; maths::time_series::CSignal::TMeanAccumulatorVecVec actualsRobust; TDoubleVec noise; TDoubleVec u01; TMeanVarAccumulator overallImprovement; for (std::size_t test = 0; test < 100; ++test) { values.assign(50, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 1.0, values.size(), noise); rng.generateUniformSamples(0.0, 1.0, values.size(), u01); for (std::size_t i = 0; i < noise.size(); ++i) { if (u01[i] < 0.05) { values[i].add(0.0); } else if (u01[i] > 0.95) { values[i].add(30.0); } else { values[i].add(expected(i) + noise[i]); } } maths::time_series::CSignal::fitSeasonalComponents( seasonalComponentSummary({period}), values, actuals); maths::time_series::CSignal::fitSeasonalComponentsRobust( seasonalComponentSummary({period}), 0.1, values, actualsRobust); BOOST_REQUIRE_EQUAL(1, actuals.size()); BOOST_REQUIRE_EQUAL(period, actuals[0].size()); BOOST_REQUIRE_EQUAL(1, actualsRobust.size()); BOOST_REQUIRE_EQUAL(period, actualsRobust[0].size()); TMeanVarAccumulator meanError; TMeanVarAccumulator meanErrorRobust; double sigma{1.0 / std::sqrt(static_cast<double>(values.size()) / static_cast<double>(actualsRobust[0].size()))}; for (std::size_t i = 0; i < period; ++i) { meanError.add(std::fabs(maths::common::CBasicStatistics::mean(actuals[0][i]) - expected(i)) / sigma); meanErrorRobust.add( std::fabs(maths::common::CBasicStatistics::mean(actualsRobust[0][i]) - expected(i)) / sigma); } overallImprovement.add(maths::common::CBasicStatistics::mean(meanError) - maths::common::CBasicStatistics::mean(meanErrorRobust)); } LOG_DEBUG(<< "Overall improvement = " << maths::common::CBasicStatistics::mean(overallImprovement)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(overallImprovement) > 1.75); } BOOST_AUTO_TEST_CASE(testFitMultipleSeasonalComponentsRobust) { // Test the improvement we get using the robust approach with pepper and salt // outliers for a mixture of two seasonal components. TSizeVec period{10, 15}; TPredictorVec expected{ [](std::size_t i) { double values[]{10.0, 11.0, 7.0, 5.0, 6.0, 15.0, 18.0, 19.0, 17.0, 14.0}; return values[i % 10]; }, [](std::size_t i) { return 10.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 15.0); }}; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; maths::time_series::CSignal::TMeanAccumulatorVecVec actualsRobust; TDoubleVec noise; TDoubleVec u01; TMeanVarAccumulator overallImprovement; for (std::size_t test = 0; test < 100; ++test) { values.assign(175, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 1.0, values.size(), noise); rng.generateUniformSamples(0.0, 1.0, values.size(), u01); for (std::size_t i = 0; i < noise.size(); ++i) { if (u01[i] < 0.05) { values[i].add(0.0); } else if (u01[i] > 0.95) { values[i].add(40.0); } else { double sum{0.0}; for (const auto& component : expected) { sum += component(i); } values[i].add(sum + noise[i]); } } maths::time_series::CSignal::fitSeasonalComponents( seasonalComponentSummary(period), values, actuals); maths::time_series::CSignal::fitSeasonalComponentsRobust( seasonalComponentSummary(period), 0.1, values, actualsRobust); BOOST_REQUIRE_EQUAL(period.size(), actuals.size()); BOOST_REQUIRE_EQUAL(period.size(), actualsRobust.size()); for (std::size_t i = 0; i < period.size(); ++i) { BOOST_REQUIRE_EQUAL(period[i], actuals[i].size()); BOOST_REQUIRE_EQUAL(period[i], actualsRobust[i].size()); } TMeanVarAccumulator meanError; TMeanVarAccumulator meanErrorRobust; double sigma{1.0 / std::sqrt(static_cast<double>(values.size()) / 15.0)}; for (std::size_t j = 0; j < 60; ++j) { double expectedSum{0.0}; double actualSum{0.0}; double actualRobustSum{0.0}; for (std::size_t i = 0; i < period.size(); ++i) { expectedSum += expected[i](j); actualSum += maths::common::CBasicStatistics::mean(actuals[i][j % period[i]]); actualRobustSum += maths::common::CBasicStatistics::mean( actualsRobust[i][j % period[i]]); } meanError.add(std::fabs(actualSum - expectedSum) / sigma); meanErrorRobust.add(std::fabs(actualRobustSum - expectedSum) / sigma); } overallImprovement.add(maths::common::CBasicStatistics::mean(meanError) - maths::common::CBasicStatistics::mean(meanErrorRobust)); } LOG_DEBUG(<< "Overall improvement = " << maths::common::CBasicStatistics::mean(overallImprovement)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(overallImprovement) > 4.0); } BOOST_AUTO_TEST_CASE(testSingleComponentSeasonalDecomposition) { // Test that we reliably find a single seasonal component. TSizeVec periods{7, 20, 10}; TPredictorVec expectedComponents{ [](std::size_t i) { double values[]{10.0, 5.0, 6.0, 15.0, 18.0, 17.0, 14.0}; return values[i % 7]; }, [](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 20.0); }, [](std::size_t i) { return i % 10 == 3 ? 15.0 : 0.0; }}; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; for (std::size_t test = 0; test < 100; ++test) { auto expected = expectedComponents[test % expectedComponents.size()]; std::size_t period{periods[test % periods.size()]}; values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 2.0, values.size(), noise); for (std::size_t i = 0; i < noise.size(); ++i) { values[i].add(expected(i) + noise[i]); } auto decomposition = maths::time_series::CSignal::seasonalDecomposition( values, 0.1, {24, 7 * 24, 365 * 24}); // We can detect additional components but must detect the real // component first. BOOST_TEST_REQUIRE(decomposition.size() >= 1); decomposition.resize(1); BOOST_REQUIRE_EQUAL(period, decomposition[0].period()); } } BOOST_AUTO_TEST_CASE(testMultipleSeasonalDecomposition) { // Test that we reliably find a mixture of two seasonal component. TSizeVecVec periods{{7, 12}, {20, 13}, {7, 10}}; TPredictorVecVec expectedComponents{ {[](std::size_t i) { double values[]{10.0, 5.0, 6.0, 15.0, 18.0, 17.0, 14.0}; return values[i % 7]; }, [](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 12.0); }}, {[](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 20.0); }, [](std::size_t i) { return 7.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 13.0); }}, {[](std::size_t i) { double values[]{10.0, 5.0, 6.0, 15.0, 18.0, 17.0, 14.0}; return values[i % 7]; }, [](std::size_t i) { return i % 10 == 3 ? 15.0 : 0.0; }}}; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; TMeanVarAccumulator meanError; for (std::size_t test = 0; test < 100; ++test) { auto expected = expectedComponents[test % expectedComponents.size()]; auto period = periods[test % periods.size()]; values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 2.0, values.size(), noise); for (std::size_t i = 0; i < noise.size(); ++i) { double sum{0.0}; for (const auto& component : expected) { sum += component(i); } values[i].add(sum + noise[i]); } auto decomposition = maths::time_series::CSignal::seasonalDecomposition( values, 0.1, {24, 7 * 24, 365 * 24}); // We can detect additional components but must detect the two real // components first. BOOST_TEST_REQUIRE(decomposition.size() >= 2); decomposition.resize(2); std::sort(period.begin(), period.end()); std::sort(decomposition.begin(), decomposition.end()); for (std::size_t i = 0; i < 2; ++i) { meanError.add(std::fabs( static_cast<double>(decomposition[i].period() - period[i]))); } } LOG_DEBUG(<< "mean error = " << maths::common::CBasicStatistics::mean(meanError)); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.01); } BOOST_AUTO_TEST_CASE(testMultipleDiurnalSeasonalDecomposition) { // Test seasonal decomposition with weekdays/weekend. test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; TSizeVec offset; TDoubleVecVec amplitudes{{0.3, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0}, {0.3, 0.3, 1.3, 1.0, 1.0, 1.3, 1.2}, {0.3, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0}}; for (std::size_t test = 0; test < 100; ++test) { rng.generateUniformSamples(0, 168, 1, offset); const auto& amplitude = amplitudes[test % amplitudes.size()]; std::string expectedDecomposition{ "[24/" + std::to_string(offset[0]) + "/168/(0, 48)," + " 24/" + std::to_string(offset[0]) + "/168/(48, 168)," + " 168/" + std::to_string(offset[0]) + "/168/(0, 48)," + " 168/" + std::to_string(offset[0]) + "/168/(48, 168)]"}; auto component = [&](std::size_t i) { i = (168 + i - offset[0]) % 168; return 20.0 * amplitude[i / 24] * (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 24.0)); }; values.assign(336, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0.0, 1.0, values.size(), noise); for (std::size_t i = 0; i < noise.size(); ++i) { values[i].add(component(i) + noise[i]); } auto decomposition = maths::time_series::CSignal::seasonalDecomposition( values, 0.0, {24, 7 * 24, 365 * 24}, {}, 1e-6); BOOST_REQUIRE_EQUAL(expectedDecomposition, core::CContainerPrinter::print(decomposition)); } } BOOST_AUTO_TEST_CASE(testTradingDayDecomposition) { // Test decomposing into weekdays/weekend with and without and override. test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; TSizeVec offset; TDoubleVecVec modulations{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, {0.3, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0}, {0.3, 0.3, 1.3, 1.0, 1.0, 1.3, 1.2}, {0.3, 0.1, 1.0, 1.0, 1.0, 1.0, 1.0}}; for (std::size_t test = 0; test < 100; ++test) { rng.generateUniformSamples(0, 168, 1, offset); const auto& modulation = modulations[test % modulations.size()]; std::string expectedDecomposition{ "[24/" + std::to_string(offset[0]) + "/168/(0, 48)," + " 24/" + std::to_string(offset[0]) + "/168/(48, 168)," + " 168/" + std::to_string(offset[0]) + "/168/(0, 48)," + " 168/" + std::to_string(offset[0]) + "/168/(48, 168)]"}; auto component = [&](std::size_t i) { i = (168 + i - offset[0]) % 168; return 20.0 * modulation[i / 24] * (1.0 + std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 24.0)); }; rng.generateNormalSamples(0.0, 1.0, values.size(), noise); for (auto startOfWeekOverride : {maths::time_series::CSignal::TOptionalSize{}, maths::time_series::CSignal::TOptionalSize{offset[0]}}) { values.assign(336, maths::time_series::CSignal::TFloatMeanAccumulator{}); for (std::size_t i = 0; i < noise.size(); ++i) { values[i].add(component(i) + noise[i]); } auto decomposition = maths::time_series::CSignal::tradingDayDecomposition( values, 0.0, 168, startOfWeekOverride); if (test % modulations.size() == 0) { BOOST_REQUIRE(decomposition.empty()); } else { BOOST_REQUIRE_EQUAL(expectedDecomposition, core::CContainerPrinter::print(decomposition)); } } } } BOOST_AUTO_TEST_CASE(testMeanNumberRepeatedValues) { // Test we correctly count the mean number of repeated values. test::CRandomNumbers rng; std::size_t period{20}; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; // Edge cases: all missing and no missing. values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); BOOST_REQUIRE_EQUAL( 0.0, maths::time_series::CSignal::meanNumberRepeatedValues( values, maths::time_series::CSignal::seasonalComponentSummary(period))); for (auto& value : values) { value.add(1.0); } BOOST_REQUIRE_CLOSE( 100.0 / static_cast<double>(period), maths::time_series::CSignal::meanNumberRepeatedValues( values, maths::time_series::CSignal::seasonalComponentSummary(period)), 1e-4); TDoubleVec repeats; TDoubleVec u01; for (double fraction : {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}) { for (const auto& time : {maths::time_series::CSignal::SSeasonalComponentSummary{ period, 0, period, {0, period}}, maths::time_series::CSignal::SSeasonalComponentSummary{ period, 5, 2 * period, {0, period}}}) { values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); repeats.assign(period, 0.0); rng.generateUniformSamples(0.0, 1.0, values.size(), u01); for (std::size_t i = 0; i < values.size(); ++i) { if (u01[i] < fraction) { if (time.contains(i)) { repeats[i % period] += 1.0; } values[i].add(1.0); } } TMeanVarAccumulator expectedMeanRepeats; for (auto repeat : repeats) { if (repeat > 0.0) { expectedMeanRepeats.add(repeat); } } BOOST_REQUIRE_CLOSE( maths::common::CBasicStatistics::mean(expectedMeanRepeats), maths::time_series::CSignal::meanNumberRepeatedValues(values, time), 1e-4); } } } BOOST_AUTO_TEST_CASE(testResidualVariance) { // Test we get the residual variance we expect. test::CRandomNumbers rng; std::size_t period{20}; auto component = [=](std::size_t index) { return 10.0 * std::sin(boost::math::double_constants::pi * static_cast<double>(index % period) / static_cast<double>(period)); }; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; TDoubleVec noise; maths::time_series::CSignal::TMeanAccumulatorVecVec actualComponent(1); for (std::size_t test = 0; test < 10; ++test) { values.assign(100, maths::time_series::CSignal::TFloatMeanAccumulator{}); actualComponent[0].assign(20, maths::time_series::CSignal::TMeanAccumulator{}); TMeanVarAccumulator moments; rng.generateNormalSamples(0.0, 1.0, values.size(), noise); for (std::size_t i = 0; i < values.size(); ++i) { values[i].add(component(i) + noise[i]); actualComponent[0][i % period].add(component(i)); moments.add(noise[i]); } double residualVariance{maths::time_series::CSignal::residualVariance( values, {maths::time_series::CSignal::seasonalComponentSummary(20)}, actualComponent)}; BOOST_REQUIRE_CLOSE(maths::common::CBasicStatistics::maximumLikelihoodVariance(moments), residualVariance, 1e-4); } } BOOST_AUTO_TEST_CASE(testSelectComponentSize) { // Test that the selected component decreases monotonically with increasing // noise. This is a straight variance/bias tradeoff. auto component = [](std::size_t i) { return 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(i) / 24.0); }; test::CRandomNumbers rng; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; maths::time_series::CSignal::TMeanAccumulatorVecVec actuals; TDoubleVec noise; maths::time_series::CSignal::TMeanAccumulatorVec sizes(5); for (std::size_t test = 0; test < 50; ++test) { for (std::size_t i = 0; i < 5; ++i) { values.assign(168, maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateNormalSamples(0, 4.0 * static_cast<double>(i), 168, noise); for (std::size_t j = 0; j < noise.size(); ++j) { values[j].add(component(j) + noise[j]); } std::size_t size{maths::time_series::CSignal::selectComponentSize(values, 24)}; sizes[i].add(size); } } LOG_DEBUG(<< "sizes = " << sizes); for (std::size_t i = 1; i < sizes.size(); ++i) { BOOST_TEST_REQUIRE(sizes[i] < sizes[i - 1]); } } BOOST_AUTO_TEST_CASE(testSmoothResample) { // Test that smoothing: // 1. Doesn't introduce a bias. // 2. Doesn't change the total count. // 3. Produces the correct residual error variance. // 4. Properly handles missing values. test::CRandomNumbers rng; TDoubleVec u01; maths::time_series::CSignal::TMeanAccumulatorVec values(144); TDoubleVec noise; rng.generateNormalSamples(0.0, 1.0, values.size(), noise); for (core_t::TTime time = 0; time < 86400; time += 600) { values[time / 600].add(10.0 * smoothDaily(time) + noise[time / 600]); } auto resampled = maths::time_series::CSignal::smoothResample(values.size(), values); BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(values), core::CContainerPrinter::print(resampled)); auto resampled0 = maths::time_series::CSignal::smoothResample(0, values); auto resampled1 = maths::time_series::CSignal::smoothResample(1, values); BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(resampled1), core::CContainerPrinter::print(resampled0)); TMeanVarAccumulator averageVariance; for (std::size_t test = 0; test < 50; ++test) { values.assign(values.size(), maths::time_series::CSignal::TFloatMeanAccumulator{}); rng.generateUniformSamples(0.0, 1.0, values.size(), u01); double fractionMissing{static_cast<double>(test) / 100.0}; for (core_t::TTime time = 0; time < 86400; time += 600) { if (u01[time / 600] > fractionMissing) { values[time / 600].add(10.0 * smoothDaily(time) + noise[time / 600]); } } for (std::size_t reduction = 2; reduction <= 6; ++reduction) { resampled = maths::time_series::CSignal::smoothResample( values.size() / reduction, values); BOOST_REQUIRE_EQUAL(values.size(), resampled.size()); TMeanVarAccumulator errorMoments; double totalCount{0.0}; double totalResampledCount{0.0}; for (std::size_t i = 0; i < values.size(); ++i) { errorMoments.add(maths::common::CBasicStatistics::mean(values[i]) - maths::common::CBasicStatistics::mean(resampled[i])); totalCount += maths::common::CBasicStatistics::count(values[i]); totalResampledCount += maths::common::CBasicStatistics::count(resampled[i]); } BOOST_REQUIRE_CLOSE(totalCount, totalResampledCount, 0.1 /*%*/); BOOST_TEST_REQUIRE( std::fabs(maths::common::CBasicStatistics::mean(errorMoments)) < 0.05); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::variance(errorMoments) < 1.2); averageVariance.add(maths::common::CBasicStatistics::variance(errorMoments)); } } BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(averageVariance) > 0.4); BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(averageVariance) < 1.1); } BOOST_AUTO_TEST_CASE(testBucketPredictor) { TPredictorVec predictors{smoothDaily, spikeyDaily}; core_t::TTime zero{0}; core_t::TTime startTime{86400}; maths::time_series::CSignal::TFloatMeanAccumulatorVec values; for (auto predictor : predictors) { for (core_t::TTime bucketLength : {3600, 7200, 14400, 43200}) { for (core_t::TTime sampleInterval : {300, 600, 1800, 3600}) { for (core_t::TTime offset : {zero, sampleInterval / 2}) { values.assign(604800 / bucketLength, maths::time_series::CSignal::TFloatMeanAccumulator{}); for (core_t::TTime time = startTime; time < startTime + 604800; time += sampleInterval) { values[(time - startTime) / bucketLength].add(predictor(time + offset)); } auto bucketPredictor = maths::time_series::CSignal::bucketPredictor( predictor, startTime, bucketLength, maths::common::CIntegerTools::floor(bucketLength - 1, sampleInterval) / 2 + offset, sampleInterval); for (std::size_t i = 0; i < values.size(); ++i) { BOOST_REQUIRE_CLOSE_ABSOLUTE( double{maths::common::CBasicStatistics::mean(values[i])}, bucketPredictor(bucketLength * static_cast<core_t::TTime>(i)), 1e-3); } } } } } } BOOST_AUTO_TEST_SUITE_END()