lib/maths/time_series/unittest/CTimeSeriesTestForSeasonalityTest.cc (1,224 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <core/CLogger.h> #include <core/Constants.h> #include <core/CoreTypes.h> #include <maths/common/CBasicStatistics.h> #include <maths/common/CIntegerTools.h> #include <maths/common/COrderings.h> #include <maths/common/MathsTypes.h> #include <maths/time_series/CSeasonalTime.h> #include <maths/time_series/CTimeSeriesTestForSeasonality.h> #include <test/BoostTestCloseAbsolute.h> #include <test/CRandomNumbers.h> #include <test/CTimeSeriesTestData.h> #include "TestUtils.h" #include <boost/math/constants/constants.hpp> #include <boost/test/unit_test.hpp> #include <cmath> #include <vector> BOOST_AUTO_TEST_SUITE(CTimeSeriesTestForSeasonalityTest) using namespace ml; using namespace handy_typedefs; namespace { using TDoubleVec = std::vector<double>; using TSizeVec = std::vector<std::size_t>; using TTimeVec = std::vector<core_t::TTime>; using TTimeDoublePr = std::pair<core_t::TTime, double>; using TTimeDoublePrVec = std::vector<TTimeDoublePr>; using TStrVec = std::vector<std::string>; using TStrVecVec = std::vector<TStrVec>; using TFloatMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<maths::common::CFloatStorage>::TAccumulator; using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>; using TDiurnalTimeVec = std::vector<maths::time_series::CDiurnalTime>; using TDiurnalTimeVecVec = std::vector<TDiurnalTimeVec>; using TLinearModel = std::function<double(core_t::TTime)>; using TLinearModelVec = std::vector<TLinearModel>; void generateNoise(std::size_t type, test::CRandomNumbers& rng, core_t::TTime window, core_t::TTime interval, TDoubleVec& noise) { switch (type % 3) { case 0: rng.generateNormalSamples(0.0, 1.0, window / interval, noise); break; case 1: rng.generateGammaSamples(1.0, 1.0, window / interval, noise); break; case 2: rng.generateLogNormalSamples(0.2, 0.3, window / interval, noise); break; } } const core_t::TTime FIVE_MINS{core::constants::HOUR / 12}; const core_t::TTime HALF_HOUR{core::constants::HOUR / 2}; const core_t::TTime HOUR{core::constants::HOUR}; const core_t::TTime DAY{core::constants::DAY}; const core_t::TTime WEEK{core::constants::WEEK}; } BOOST_AUTO_TEST_CASE(testSyntheticNoSeasonality) { // Test FP % for a variety of synthetic time series with not seasonality. TGeneratorVec generators{constant, ramp, markov}; core_t::TTime startTime{604800}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; TSizeVec repeats; double FP{0.0}; double TN{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } for (auto window : {WEEK, 2 * WEEK, 4 * WEEK}) { LOG_TRACE(<< "window = " << window); for (auto bucketLength : {HALF_HOUR, HOUR}) { LOG_TRACE(<< "bucket length = " << bucketLength); generateNoise(test, rng, window, bucketLength, noise); rng.generateUniformSamples(0, generators.size(), 1, index); rng.generateUniformSamples(3, 20, 1, repeats); LOG_TRACE(<< "generator = " << index[0]); const auto& generator = generators[index[0]]; values.assign(window / bucketLength, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += bucketLength) { std::size_t bucket(time / bucketLength); values[bucket].add(5.0 * generator(startTime + time * bucketLength) + noise[bucket]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, bucketLength, values}; auto result = seasonality.decompose(); bool isSeasonal{result.seasonal().empty() == false}; if (isSeasonal) { LOG_DEBUG(<< "got " << result.print() << " expected []"); } FP += isSeasonal ? 1.0 : 0.0; TN += isSeasonal ? 0.0 : 1.0; } } } LOG_DEBUG(<< "True negative rate = " << TN / (FP + TN)); BOOST_TEST_REQUIRE(TN / (FP + TN) > 0.99); } BOOST_AUTO_TEST_CASE(testSyntheticNoSeasonalitySparse) { // Test FP % for sparse data with no seasonality. core_t::TTime bucketLength{HOUR}; core_t::TTime window{4 * WEEK}; core_t::TTime startTime{10 * WEEK}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TSizeVec nonEmptyBuckets; for (auto occupancy : {0.25, 0.1, 0.05}) { LOG_DEBUG(<< "occupancy = " << occupancy); double FP{0.0}; double TN{0.0}; for (std::size_t test = 0; test < 1000; ++test) { if ((test + 1) % 100 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 1000"); } auto numberBuckets = static_cast<std::size_t>(window / bucketLength); auto numberNonEmptyBuckets = static_cast<std::size_t>( occupancy * static_cast<double>(numberBuckets)); rng.generateUniformSamples(0, numberBuckets, numberNonEmptyBuckets, nonEmptyBuckets); std::sort(nonEmptyBuckets.begin(), nonEmptyBuckets.end()); values.assign(numberBuckets, TFloatMeanAccumulator{}); for (std::size_t j = 0; j < numberBuckets; ++j) { values[j].add(std::find(nonEmptyBuckets.begin(), nonEmptyBuckets.end(), j) != nonEmptyBuckets.end() ? 1.0 : 0.0); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, bucketLength, values, occupancy}; auto result = seasonality.decompose(); bool isSeasonal{result.seasonal().empty() == false}; if (isSeasonal) { LOG_DEBUG(<< "got " << result.print() << " expected []"); } FP += isSeasonal ? 1.0 : 0.0; TN += isSeasonal ? 0.0 : 1.0; } LOG_DEBUG(<< "True negative rate = " << TN / (FP + TN)); BOOST_TEST_REQUIRE(TN / (FP + TN) >= 0.995); } } BOOST_AUTO_TEST_CASE(testSyntheticSeasonalSparse) { // Test TP % for sparse data with seasonality. core_t::TTime bucketLength{HOUR}; core_t::TTime window{4 * WEEK}; core_t::TTime startTime{10 * WEEK}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TSizeVec period; double TP{0.0}; double FN{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } auto numberBuckets = static_cast<std::size_t>(window / bucketLength); rng.generateUniformSamples(10, 50, 1, period); double occupancy{1.0 / static_cast<double>(period[0])}; values.assign(numberBuckets, TFloatMeanAccumulator{}); for (std::size_t i = 0; i < numberBuckets; ++i) { values[i].add(i % period[0] == 0 ? 1.0 : 0.0); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, bucketLength, values, occupancy}; auto result = seasonality.decompose(); bool isSeasonal{result.seasonal().empty() == false}; TP += isSeasonal ? 1.0 : 0.0; FN += isSeasonal ? 0.0 : 1.0; } LOG_DEBUG(<< "True positive rate = " << TP / (TP + FN)); BOOST_TEST_REQUIRE(TP / (TP + FN) >= 0.99); } BOOST_AUTO_TEST_CASE(testSyntheticDiurnal) { // Test accuracy for a variety of synthetic time series with daily and // weekly seasonalities. TTimeVec windows{4 * DAY, WEEK, 2 * WEEK, 4 * WEEK}; TSizeVec permittedGenerators{2, 2, 3, 5}; TGeneratorVec generators{smoothDaily, spikeyDaily, smoothWeekly, spikeyDailyWeekly, weekends}; TStrVecVec expected{{"86400"}, {"86400"}, {"604800"}, {"86400/(0,172800)", "86400/(172800,604800)", "604800/(0,172800)"}, {"86400/(0,172800)", "86400/(172800,604800)", "604800/(0,172800)", "604800/(172800,604800)"}}; core_t::TTime startTime{10000}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; double TP{0.0}; double FN{0.0}; double FP{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } for (std::size_t i = 0; i < windows.size(); ++i) { core_t::TTime window{windows[i]}; LOG_TRACE(<< "window = " << window); for (auto bucketLength : {HALF_HOUR, HOUR}) { LOG_TRACE(<< "bucket length = " << bucketLength); generateNoise(test, rng, window, HALF_HOUR, noise); rng.generateUniformSamples(0, permittedGenerators[i], 1, index); LOG_TRACE(<< "generator = " << index[0]); const auto& generator = generators[index[0]]; values.assign(window / bucketLength, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += HALF_HOUR) { values[time / bucketLength].add(20.0 * generator(startTime + time) + noise[time / HALF_HOUR]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, bucketLength, values}; auto result = seasonality.decompose(); if (result.print() != core::CContainerPrinter::print(expected[index[0]])) { LOG_DEBUG(<< "got " << result.print() << ", expected " << expected[index[0]]); } TSizeVec found{0, 0}; for (const auto& component : result.seasonal()) { ++found[static_cast<std::size_t>( std::find(expected[index[0]].begin(), expected[index[0]].end(), component.print()) == expected[index[0]].end())]; } TP += static_cast<double>(found[0]); FN += static_cast<double>(expected[index[0]].size() - found[0]); FP += static_cast<double>(found[1]); } } } LOG_DEBUG(<< "recall = " << TP / (TP + FN)); LOG_DEBUG(<< "accuracy = " << TP / (TP + FP)); BOOST_TEST_REQUIRE(TP / (TP + FN) > 0.99); BOOST_TEST_REQUIRE(TP / (TP + FP) > 0.99); } BOOST_AUTO_TEST_CASE(testRealSpikeyDaily) { // Test on real spikey data with daily seasonality. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/spikey_data.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); BOOST_TEST_REQUIRE(timeseries.size() > 0); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); TTimeVec lastTest{timeseries[0].first, timeseries[0].first}; TTimeVec windows{3 * DAY, 2 * WEEK}; TFloatMeanAccumulatorVec values[2]{ TFloatMeanAccumulatorVec(windows[0] / HOUR, TFloatMeanAccumulator{}), TFloatMeanAccumulatorVec(windows[1] / HOUR, TFloatMeanAccumulator{})}; for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time; double value; std::tie(time, value) = timeseries[i]; for (std::size_t j = 0; j < 2; ++j) { values[j][((time - lastTest[j]) % windows[j]) / HOUR].add(value); if (time > lastTest[j] + windows[j]) { maths::time_series::CTimeSeriesTestForSeasonality seasonality{ lastTest[j], lastTest[j], HOUR, HOUR, values[j]}; auto result = seasonality.decompose(); BOOST_REQUIRE_EQUAL("[86400]", result.print()); values[j].assign(windows[j] / HOUR, TFloatMeanAccumulator{}); lastTest[j] = time; } } } } BOOST_AUTO_TEST_CASE(testRealTradingDays) { // Test on real data with weekdays/weekend. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/thirty_minute_samples.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_ISO8601_REGEX, test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); BOOST_TEST_REQUIRE(timeseries.size() > 0); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{2 * WEEK}; TFloatMeanAccumulatorVec values(window / HOUR, TFloatMeanAccumulator{}); for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time; double value; std::tie(time, value) = timeseries[i]; values[((time - lastTest) % window) / HOUR].add(value); if (time > lastTest + window) { maths::time_series::CTimeSeriesTestForSeasonality seasonality{ lastTest, lastTest, HOUR, HALF_HOUR, values}; auto result = seasonality.decompose(); LOG_DEBUG(<< result.print()); BOOST_REQUIRE(result.print() == "[86400/(172800,604800), 604800/(0,172800), 604800/(172800,604800)]" || result.print() == "[86400/(0,172800), 86400/(172800,604800), 604800/(0,172800), 604800/(172800,604800)]"); values.assign(window / HOUR, TFloatMeanAccumulator{}); lastTest = time; } } } BOOST_AUTO_TEST_CASE(testRealTradingDaysPlusOutliers) { // Test on real data with weekdays/weekend and outliers. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/diurnal.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_UNIX_REGEX)); BOOST_TEST_REQUIRE(timeseries.size() > 0); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{2 * WEEK}; TFloatMeanAccumulatorVec values(window / HOUR, TFloatMeanAccumulator{}); for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time; double value; std::tie(time, value) = timeseries[i]; values[((time - lastTest) % window) / HOUR].add(value); if (time > lastTest + window) { maths::time_series::CTimeSeriesTestForSeasonality seasonality{ lastTest, lastTest, HOUR, FIVE_MINS, values}; auto result = seasonality.decompose(); LOG_DEBUG(<< result.print()); BOOST_REQUIRE(result.print() == "[86400/(0,172800), 86400/(172800,604800), 604800/(0,172800), 604800/(172800,604800)]" || result.print() == "[86400/(0,172800), 86400/(172800,604800), 604800/(0,172800)]"); values.assign(window / HOUR, TFloatMeanAccumulator{}); lastTest = time; } } } BOOST_AUTO_TEST_CASE(testRealSwitchingNoSeasonality) { // Test on real non-periodic switching data. TTimeDoublePrVec timeseries; core_t::TTime startTime; core_t::TTime endTime; BOOST_REQUIRE(test::CTimeSeriesTestData::parse( "testfiles/no_periods.csv", timeseries, startTime, endTime, test::CTimeSeriesTestData::CSV_ISO8601_REGEX, test::CTimeSeriesTestData::CSV_ISO8601_DATE_FORMAT)); BOOST_TEST_REQUIRE(timeseries.size() > 0); LOG_DEBUG(<< "timeseries = " << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); core_t::TTime lastTest{timeseries[0].first}; core_t::TTime window{2 * WEEK}; TFloatMeanAccumulatorVec values(window / HOUR, TFloatMeanAccumulator{}); for (std::size_t i = 0; i < timeseries.size(); ++i) { core_t::TTime time; double value; std::tie(time, value) = timeseries[i]; values[((time - lastTest) % window) / HOUR].add(value); if (time > lastTest + window) { maths::time_series::CTimeSeriesTestForSeasonality seasonality{ lastTest, lastTest, HOUR, HALF_HOUR, values}; auto result = seasonality.decompose(); BOOST_TEST_REQUIRE(result.print() == "[]"); values.assign(window / HOUR, TFloatMeanAccumulator{}); lastTest = time; } } } BOOST_AUTO_TEST_CASE(testSyntheticNonDiurnal) { // Test the accuracy for non-diurnal seasonal components with periods in the // range [DAY / 5, 5 * DAY]. TGeneratorVec generators{smoothDaily, spikeyDaily}; core_t::TTime startTime{10000}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; TSizeVec repeats; double TP[3]{0.0}; double FN[3]{0.0}; double FP{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } for (auto window : {WEEK, 2 * WEEK, 25 * DAY}) { LOG_TRACE(<< "window = " << window); double periodScale{[&] { TDoubleVec result; rng.generateUniformSamples(1.0, 5.0, 1, result); return test % 2 == 0 ? result[0] : 1.0 / result[0]; }()}; for (auto bucketLength : {HALF_HOUR, HOUR}) { LOG_TRACE(<< "bucket length = " << bucketLength); core_t::TTime period{maths::common::CIntegerTools::floor( static_cast<core_t::TTime>(static_cast<double>(DAY) / periodScale), bucketLength)}; periodScale = static_cast<double>(DAY) / static_cast<double>(period); if (periodScale == 1.0 || window < 5 * period) { continue; } generateNoise(test, rng, window, FIVE_MINS, noise); rng.generateUniformSamples(0, 2, 1, index); rng.generateUniformSamples(3, 20, 1, repeats); auto generator = generators[index[0]]; values.assign(window / bucketLength, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += FIVE_MINS) { values[time / bucketLength].add( 20.0 * scale(periodScale, startTime + time, generator) + noise[time / FIVE_MINS]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, FIVE_MINS, values}; auto result = seasonality.decompose(); double found[]{0.0, 0.0, 0.0, 0.0}; if (result.print() != "[" + std::to_string(period) + "]") { LOG_DEBUG(<< "got " << result.print() << ", expected [" << period << "]"); } for (const auto& component : result.seasonal()) { core_t::TTime componentPeriod{component.seasonalTime()->period()}; double error{static_cast<double>(std::min(componentPeriod % period, period - componentPeriod % period)) / static_cast<double>(period)}; if (error == 0.0) { found[0] = 1.0; } if (error < 0.01) { found[1] = 1.0; } if (error < 0.05) { found[2] = 1.0; } if (error >= 0.05) { found[3] += 1.0; } } for (std::size_t i = 0; i < 3; ++i) { TP[i] += found[i]; FN[i] += 1.0 - found[i]; } FP += found[3]; } } } LOG_DEBUG(<< "recall @ 0% error = " << TP[0] / (TP[0] + FN[0])); LOG_DEBUG(<< "recall @ 1% error = " << TP[1] / (TP[1] + FN[1])); LOG_DEBUG(<< "recall @ 5% error = " << TP[2] / (TP[2] + FN[2])); LOG_DEBUG(<< "accuracy @ 0% error = " << TP[0] / (TP[0] + FP)); LOG_DEBUG(<< "accuracy @ 1% error = " << TP[1] / (TP[1] + FP)); LOG_DEBUG(<< "accuracy @ 5% error = " << TP[2] / (TP[2] + FP)); BOOST_TEST_REQUIRE(TP[0] / (TP[0] + FN[0]) > 0.98); BOOST_TEST_REQUIRE(TP[1] / (TP[1] + FN[1]) > 0.99); BOOST_TEST_REQUIRE(TP[2] / (TP[2] + FN[2]) > 0.99); BOOST_TEST_REQUIRE(TP[0] / (TP[0] + FP) > 0.96); BOOST_TEST_REQUIRE(TP[1] / (TP[1] + FP) > 0.96); BOOST_TEST_REQUIRE(TP[2] / (TP[2] + FP) > 0.96); } BOOST_AUTO_TEST_CASE(testSyntheticSparseDaily) { // Test a synthetic time series with daily seasonality and periodically // missing values. TStrVec type{"Daily", "No seasonality"}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; for (std::size_t test = 0; test < 2; ++test) { LOG_DEBUG(<< type[test]); values.assign(7 * 48, TFloatMeanAccumulator{}); for (std::size_t day = 0, i = 0, j = 0; day < 7; ++day, ++j) { for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 1.0}) { if (value > 0.0) { rng.generateUniformSamples(-1.0, 1.0, 1, noise); values[i].add(test == 0 ? value : noise[0]); } ++i; } if (day > 3) { maths::time_series::CTimeSeriesTestForSeasonality seasonality{ 0, 0, HALF_HOUR, HALF_HOUR, values}; auto result = seasonality.decompose(); BOOST_TEST_REQUIRE(result.print() == (test == 0 ? "[86400]" : "[]")); } } } } BOOST_AUTO_TEST_CASE(testSyntheticSparseWeekly) { // Test a synthetic time series with weekly seasonality and periodically // missing values. TStrVec type{"Weekly", "No seasonality"}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; for (std::size_t test = 0; test < 2; ++test) { LOG_DEBUG(<< type[test]); values.assign(5 * 168, TFloatMeanAccumulator{}); for (std::size_t week = 0, i = 0, j = 0; week < 5; ++week, ++j) { for (auto value : {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 20.0, 18.0, 10.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 8.0, 9.0, 9.0, 10.0, 10.0, 8.0, 4.0, 3.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}) { if (value > 0.0) { rng.generateUniformSamples(-1.0, 1.0, 1, noise); values[i].add(test == 0 ? value : noise[0]); } ++i; } if (week >= 3) { maths::time_series::CTimeSeriesTestForSeasonality seasonality{ 0, 0, HOUR, HOUR, values}; auto result = seasonality.decompose(); LOG_DEBUG(<< result.print()); if (test == 0) { BOOST_TEST_REQUIRE(result.print() == "[86400/(0,172800), 604800/(0,172800), 604800/(172800,604800)]"); } else { BOOST_TEST_REQUIRE(result.print() == "[]"); } } } } } BOOST_AUTO_TEST_CASE(testSyntheticWithOutliers) { // Test synthetic timeseries data with pepper and salt outliers. core_t::TTime startTime{10000}; test::CRandomNumbers rng; TDoubleVec noise; TSizeVec outliers; TDoubleVec spikeOrTroughSelector; TFloatMeanAccumulatorVec values; for (auto period : {DAY, WEEK}) { LOG_DEBUG(<< "period = " << period); for (auto window : {WEEK, 2 * WEEK, 4 * WEEK}) { if (window < 2 * period) { continue; } LOG_TRACE(<< "window = " << window); for (auto bucketLength : {HALF_HOUR, HOUR}) { core_t::TTime buckets{window / bucketLength}; std::size_t numberOutliers{ static_cast<std::size_t>(0.05 * static_cast<double>(buckets))}; rng.generateUniformSamples(0, buckets, numberOutliers, outliers); rng.generateUniformSamples(0, 1.0, numberOutliers, spikeOrTroughSelector); rng.generateNormalSamples(0.0, 1.0, buckets, noise); std::sort(outliers.begin(), outliers.end()); values.assign(buckets, TFloatMeanAccumulator{}); for (core_t::TTime time = startTime; time < startTime + window; time += bucketLength) { std::size_t bucket((time - startTime) / bucketLength); auto outlier = std::lower_bound(outliers.begin(), outliers.end(), bucket); if (outlier != outliers.end() && *outlier == bucket) { values[bucket].add( spikeOrTroughSelector[outlier - outliers.begin()] > 0.2 ? 0.0 : 100.0); } else { values[bucket].add( 20.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(period)) + noise[bucket]); } } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, bucketLength, values}; auto result = seasonality.decompose(); LOG_DEBUG(<< result.print()); BOOST_TEST_REQUIRE(result.print() == "[" + std::to_string(period) + "]"); } } } LOG_DEBUG(<< "Weekdays/weekend"); TDoubleVec modulation{0.2, 0.2, 1.0, 1.0, 1.0, 1.0, 1.0}; for (auto window : {2 * WEEK, 4 * WEEK}) { LOG_DEBUG(<< "window length = " << window); for (auto bucketLength : {HALF_HOUR, HOUR}) { core_t::TTime buckets{window / bucketLength}; std::size_t numberOutliers{ static_cast<std::size_t>(0.05 * static_cast<double>(buckets))}; rng.generateUniformSamples(0, buckets, numberOutliers, outliers); rng.generateUniformSamples(0, 1.0, numberOutliers, spikeOrTroughSelector); rng.generateNormalSamples(0.0, 1.0, buckets, noise); std::sort(outliers.begin(), outliers.end()); values.assign(buckets, TFloatMeanAccumulator{}); for (core_t::TTime time = startTime; time < startTime + window; time += bucketLength) { std::size_t bucket((time - startTime) / bucketLength); auto outlier = std::lower_bound(outliers.begin(), outliers.end(), bucket); if (outlier != outliers.end() && *outlier == bucket) { values[bucket].add( spikeOrTroughSelector[outlier - outliers.begin()] > 0.2 ? 0.0 : 100.0); } else { values[bucket].add( modulation[((time - startTime) / DAY) % 7] * (20.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast<double>(time) / static_cast<double>(DAY))) + noise[bucket]); } } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, bucketLength, std::move(values)}; auto result = seasonality.decompose(); LOG_DEBUG(<< result.print()); BOOST_TEST_REQUIRE(result.print() == "[86400/(0,172800), 86400/(172800,604800)]"); } } } BOOST_AUTO_TEST_CASE(testSyntheticMixtureOfSeasonalities) { // Test the accuracy with which we decompose a synthetic time series into // its multiple constitute components. TGeneratorVec generators[]{ {spikeyDaily, [](core_t::TTime time) { return scale(16.0 / 24.0, time, spikeyDaily); }}, {smoothDaily, [](core_t::TTime time) { return 2.0 * scale(36.0 / 24.0, time, spikeyDaily); }}, {smoothDaily, [](core_t::TTime time) { return scale(18.0 / 24.0, time, smoothDaily); }}}; TStrVecVec expected{{"86400", "129600"}, {"86400", "57600"}, {"86400", "115200"}}; core_t::TTime startTime{10000}; test::CRandomNumbers rng; TDoubleVec noise; TFloatMeanAccumulatorVec values; TSizeVec index; double TP{0.0}; double FN{0.0}; double FP{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } for (auto window : {16 * DAY, 4 * WEEK}) { LOG_TRACE(<< "window = " << window); TDoubleVec periodScales; rng.generateUniformSamples(1.0, 5.0, 2, periodScales); periodScales[0] = 1.0 / periodScales[0]; for (auto bucketLength : {HALF_HOUR, HOUR}) { LOG_TRACE(<< "bucket length = " << bucketLength); for (auto periodScale : periodScales) { core_t::TTime period{maths::common::CIntegerTools::floor( static_cast<core_t::TTime>(static_cast<double>(DAY) / periodScale), bucketLength)}; periodScale = static_cast<double>(DAY) / static_cast<double>(period); } generateNoise(test, rng, window, FIVE_MINS, noise); rng.generateUniformSamples(0, 3, 1, index); LOG_TRACE(<< "generator = " << index[0]); const auto& generator = generators[index[0]]; values.assign(window / bucketLength, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += FIVE_MINS) { double value{0.0}; for (std::size_t i = 0; i < 2; ++i) { value += 20.0 * generator[i](startTime + time); } values[time / bucketLength].add(value + noise[time / FIVE_MINS]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, FIVE_MINS, values}; auto result = seasonality.decompose(); std::size_t found[]{0, 0}; for (const auto& component : result.seasonal()) { ++found[std::find(expected[index[0]].begin(), expected[index[0]].end(), component.print()) == expected[index[0]].end()]; } TP += static_cast<double>(found[0]); FN += static_cast<double>(2 - found[0]); FP += static_cast<double>(found[1]); if (found[0] < 2 || found[1] > 0) { LOG_DEBUG(<< "got " << result.print() << ", expected " << expected[index[0]]); } } } } LOG_DEBUG(<< "recall = " << TP / (TP + FN)); LOG_DEBUG(<< "accuracy = " << TP / (TP + FP)); BOOST_TEST_REQUIRE(TP / (TP + FN) > 0.99); BOOST_TEST_REQUIRE(TP / (TP + FP) > 0.99); } BOOST_AUTO_TEST_CASE(testSyntheticDiurnalWithLinearScaling) { // Test the ability to correctly decompose a time series with diurnal seasonal // components in the presence of piecewise constant random linear scaling events. std::size_t segmentSupport[][2]{{100, 200}, {600, 900}}; double scaleSupport[][2]{{4.0, 6.0}, {0.2, 0.4}}; TGeneratorVec generators{smoothDaily, spikeyDaily}; core_t::TTime startTime{0}; test::CRandomNumbers rng; TDoubleVec noise; TSizeVec index; TFloatMeanAccumulatorVec values; double TP{0.0}; double FN{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } for (auto window : {3 * WEEK, 4 * WEEK}) { LOG_TRACE(<< "window = " << window); core_t::TTime endTime{startTime + window}; TTimeVec segments; TDoubleVec scales{1.0}; for (std::size_t i = 0; i < 2; ++i) { TSizeVec segment; TDoubleVec scale; rng.generateUniformSamples(segmentSupport[i][0], segmentSupport[i][1], 1, segment); rng.generateUniformSamples(scaleSupport[i][0], scaleSupport[i][1], 1, scale); segments.push_back(startTime + segment[0] * HALF_HOUR); scales.push_back(scale[0]); } segments.push_back(endTime); auto trend = [&](core_t::TTime time) { auto i = std::lower_bound(segments.begin(), segments.end(), time); return 20.0 * scales[i - segments.begin()] * generators[index[0]](time); }; rng.generateNormalSamples(0.0, 1.0, window / HALF_HOUR, noise); rng.generateUniformSamples(0, 2, 1, index); values.assign(window / HALF_HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += HALF_HOUR) { std::size_t bucket(time / HALF_HOUR); values[bucket].add(trend(startTime + time) + noise[bucket]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, HALF_HOUR, HALF_HOUR, values}; auto result = seasonality.decompose(); if (result.print() != "[86400]") { LOG_DEBUG(<< "got " << result.print() << ", expected [86400]"); } TP += result.print() == "[86400]" ? 1.0 : 0.0; FN += result.print() == "[86400]" ? 0.0 : 1.0; } } LOG_DEBUG(<< "Recall = " << TP / (TP + FN)); BOOST_TEST_REQUIRE(TP / (TP + FN) > 0.99); } BOOST_AUTO_TEST_CASE(testSyntheticNonDiurnalWithLinearTrend) { // Test the ability to correctly decompose a time series with seasonal // components and a linear trend. TGeneratorVec generators{smoothDaily, spikeyDaily}; core_t::TTime startTime{10000}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; double TP[3]{0.0}; double FN[3]{0.0}; double FP{0.0}; for (std::size_t test = 0; test < 100; ++test) { if ((test + 1) % 10 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 100"); } for (auto window : {WEEK, 2 * WEEK, 25 * DAY}) { LOG_TRACE(<< "window = " << window); double periodScale{[&] { TDoubleVec result; rng.generateUniformSamples(1.0, 5.0, 1, result); return test % 2 == 0 ? result[0] : 1.0 / result[0]; }()}; for (auto bucketLength : {HALF_HOUR, HOUR}) { LOG_TRACE(<< "bucket length = " << bucketLength); core_t::TTime period{maths::common::CIntegerTools::floor( static_cast<core_t::TTime>(static_cast<double>(DAY) / periodScale), bucketLength)}; periodScale = static_cast<double>(DAY) / static_cast<double>(period); if (periodScale == 1.0 || window < 5 * period) { continue; } generateNoise(test, rng, window, FIVE_MINS, noise); rng.generateUniformSamples(0, 2, 1, index); auto generator = generators[index[0]]; values.assign(window / bucketLength, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += FIVE_MINS) { std::size_t bucket(time / bucketLength); values[bucket].add(0.5 * static_cast<double>(bucket) + 20.0 * scale(periodScale, startTime + time, generator) + noise[time / FIVE_MINS]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, bucketLength, FIVE_MINS, values}; auto result = seasonality.decompose(); double found[]{0.0, 0.0, 0.0, 0.0}; if (result.print() != "[" + std::to_string(period) + "]") { LOG_DEBUG(<< "got " << result.print() << ", expected [" << period << "]"); } for (const auto& component : result.seasonal()) { core_t::TTime componentPeriod{component.seasonalTime()->period()}; double error{static_cast<double>(std::min(componentPeriod % period, period - componentPeriod % period)) / static_cast<double>(period)}; if (error == 0.0) { found[0] = 1.0; } if (error < 0.01) { found[1] = std::max(found[1], 1.0); } if (error < 0.05) { found[2] = std::max(found[2], 1.0); } if (error >= 0.05) { found[3] += 1.0; } } for (std::size_t i = 0; i < 3; ++i) { TP[i] += found[i]; FN[i] += 1.0 - found[i]; } FP += found[3]; } } } LOG_DEBUG(<< "recall @ 0% error = " << TP[0] / (TP[0] + FN[0])); LOG_DEBUG(<< "recall @ 1% error = " << TP[1] / (TP[1] + FN[1])); LOG_DEBUG(<< "recall @ 5% error = " << TP[2] / (TP[2] + FN[2])); LOG_DEBUG(<< "accuracy @ 0% error = " << TP[0] / (TP[0] + FP)); LOG_DEBUG(<< "accuracy @ 1% error = " << TP[1] / (TP[1] + FP)); LOG_DEBUG(<< "accuracy @ 5% error = " << TP[2] / (TP[2] + FP)); BOOST_TEST_REQUIRE(TP[0] / (TP[0] + FN[0]) > 0.98); BOOST_TEST_REQUIRE(TP[1] / (TP[1] + FN[1]) > 0.99); BOOST_TEST_REQUIRE(TP[2] / (TP[2] + FN[2]) > 0.99); BOOST_TEST_REQUIRE(TP[0] / (TP[0] + FP) > 0.94); BOOST_TEST_REQUIRE(TP[1] / (TP[1] + FP) > 0.94); BOOST_TEST_REQUIRE(TP[2] / (TP[2] + FP) > 0.94); } BOOST_AUTO_TEST_CASE(testSyntheticDiurnalWithPiecewiseLinearTrend) { // Test the ability to correctly decompose a time series with diurnal seasonal // components and a piecewise linear trend. std::size_t segmentSupport[][2]{{100, 200}, {600, 900}}; double slopeSupport[][2]{{0.5, 1.0}, {-1.0, -0.5}}; double interceptSupport[][2]{{-10.0, 5.0}, {10.0, 20.0}}; TGeneratorVec generators{smoothDaily, spikeyDaily}; core_t::TTime startTime{0}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; double TP{0.0}; double FN{0.0}; for (std::size_t test = 0; test < 20; ++test) { if ((test + 1) % 2 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 20"); } for (auto window : {3 * WEEK, 4 * WEEK}) { LOG_TRACE(<< "window = " << window); core_t::TTime endTime{startTime + window}; TTimeVec segments; TLinearModelVec models{[](core_t::TTime) { return 0.0; }}; for (std::size_t i = 0; i < 2; ++i) { TSizeVec segment; TDoubleVec slope; TDoubleVec intercept; rng.generateUniformSamples(segmentSupport[i][0], segmentSupport[i][1], 1, segment); rng.generateUniformSamples(slopeSupport[i][0], slopeSupport[i][1], 1, slope); rng.generateUniformSamples(interceptSupport[i][0], interceptSupport[i][1], 1, intercept); segments.push_back(startTime + segment[0] * HALF_HOUR); models.push_back([startTime, slope, intercept](core_t::TTime time) { return slope[0] * static_cast<double>(time - startTime) / static_cast<double>(DAY) + intercept[0]; }); } segments.push_back(endTime); auto trend = [&](core_t::TTime time) { auto i = std::lower_bound(segments.begin(), segments.end(), time); return 2.0 * (models[i - segments.begin()](time) + 5.0 * generators[index[0]](time)); }; rng.generateNormalSamples(0.0, 1.0, window / HALF_HOUR, noise); rng.generateUniformSamples(0, 2, 1, index); values.assign(window / HALF_HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += HALF_HOUR) { std::size_t bucket(time / HALF_HOUR); values[bucket].add(trend(startTime + time) + noise[bucket]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, HALF_HOUR, HALF_HOUR, values}; auto result = seasonality.decompose(); if (result.print() != "[86400]") { LOG_DEBUG(<< "got " << result.print() << ", expected [86400]"); } TP += result.print() == "[86400]" ? 1.0 : 0.0; FN += result.print() == "[86400]" ? 0.0 : 1.0; } } LOG_DEBUG(<< "Recall = " << TP / (TP + FN)); BOOST_TEST_REQUIRE(TP / (TP + FN) > 0.92); } BOOST_AUTO_TEST_CASE(testModelledSeasonalityWithNoChange) { // Check we keep the modelled seasonality if it hasn't changed. TGeneratorVec generators{smoothDaily, spikeyDaily, smoothWeekly, weekends}; TDiurnalTimeVecVec modelledComponents{{{0, 0, WEEK, DAY}}, {{0, 0, WEEK, DAY}}, {{0, 0, WEEK, WEEK}}, {{5 * DAY, 0 * DAY, 2 * DAY, DAY}, {5 * DAY, 0 * DAY, 2 * DAY, WEEK}, {5 * DAY, 2 * DAY, 7 * DAY, DAY}, {5 * DAY, 2 * DAY, 7 * DAY, WEEK}}}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; for (std::size_t test = 0; test < 20; ++test) { if ((test + 1) % 2 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 20"); } for (auto window : {3 * WEEK, 4 * WEEK}) { LOG_TRACE(<< "window = " << window); generateNoise(test, rng, window, FIVE_MINS, noise); rng.generateUniformSamples(0, 2, 1, index); LOG_TRACE(<< "index = " << index[0]); auto generator = generators[index[0]]; values.assign(window / HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += FIVE_MINS) { values[time / HOUR].add(5.0 * generator(time) + noise[time / FIVE_MINS]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ 0, 0, HOUR, FIVE_MINS, values}; for (const auto& time : modelledComponents[index[0]]) { seasonality.addModelledSeasonality(time, 2, 24); } auto result = seasonality.decompose(); // We should only detect the components we have already which we don't // bother to return. BOOST_REQUIRE(result.seasonal().empty()); } } } BOOST_AUTO_TEST_CASE(testModelledSeasonalityWithChange) { // Simulate the seasonal components having changed in the test window and // check that we correctly identify the new ones. TGeneratorVec generators{smoothDaily, weekends}; TDiurnalTimeVecVec modelledComponents{{{5 * DAY, 0 * DAY, 2 * DAY, DAY}, {5 * DAY, 0 * DAY, 2 * DAY, WEEK}, {5 * DAY, 2 * DAY, 7 * DAY, DAY}, {5 * DAY, 2 * DAY, 7 * DAY, WEEK}}, {{0, 0, WEEK, DAY}}}; TStrVecVec expected{{"86400"}, {"86400/(0,172800)", "86400/(172800,604800)", "604800/(0,172800)", "604800/(172800,604800)"}}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; double TP{0.0}; double FN{0.0}; double FP{0.0}; for (std::size_t test = 0; test < 20; ++test) { if ((test + 1) % 2 == 0) { LOG_DEBUG(<< "test " << test + 1 << " / 20"); } for (auto window : {3 * WEEK, 4 * WEEK}) { LOG_TRACE(<< "window = " << window); generateNoise(test, rng, window, FIVE_MINS, noise); rng.generateUniformSamples(0, 2, 1, index); LOG_TRACE(<< "index = " << index[0]); auto generator = generators[index[0]]; values.assign(window / HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < window; time += FIVE_MINS) { values[time / HOUR].add(10.0 * generator(time) + noise[time / FIVE_MINS]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ 0, 0, HOUR, FIVE_MINS, values}; for (const auto& time : modelledComponents[index[0]]) { seasonality.addModelledSeasonality(time, 2, 24); } auto result = seasonality.decompose(); if (result.print() != core::CContainerPrinter::print(expected[index[0]])) { LOG_DEBUG(<< "got " << result.print() << ", expected " << expected[index[0]]); } std::size_t found[]{0, 0}; for (const auto& component : result.seasonal()) { ++found[std::find(expected[index[0]].begin(), expected[index[0]].end(), component.print()) == expected[index[0]].end()]; } TP += static_cast<double>(found[0]); FN += static_cast<double>(expected[index[0]].size() - found[0]); FP += static_cast<double>(found[1]); } } LOG_DEBUG(<< "recall = " << TP / (TP + FN)); LOG_DEBUG(<< "accuracy = " << TP / (TP + FP)); BOOST_TEST_REQUIRE(TP / (TP + FN) > 0.9); BOOST_TEST_REQUIRE(TP / (TP + FP) > 0.8); } BOOST_AUTO_TEST_CASE(testNewComponentInitialValues) { // Test that the initial values for the seasonal components identified match // the supplied values. TGeneratorVec generators{smoothDaily, spikeyDaily, weekends}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec predictions; TSizeVec startTimes; rng.generateUniformSamples(0, 10000000, 10, startTimes); TTimeVec expectedWindowStarts{0, 2 * DAY, 0, 2 * DAY}; TTimeVec expectedWindowEnds{2 * DAY, 7 * DAY, 2 * DAY, 7 * DAY}; TTimeVec expectedPeriods{DAY, DAY, WEEK, WEEK}; for (std::size_t test = 0; test < 10; ++test) { LOG_DEBUG(<< "test " << test + 1 << " / 10"); std::size_t index{test % generators.size()}; auto generator = generators[index]; core_t::TTime startTime{ HALF_HOUR * (static_cast<core_t::TTime>(startTimes[test]) / HALF_HOUR)}; values.assign(3 * WEEK / HALF_HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < 3 * WEEK; time += HALF_HOUR) { values[time / HALF_HOUR].add(10.0 * generator(startTime + time)); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, HALF_HOUR, HALF_HOUR, values}; auto result = seasonality.decompose(); // Expect agreement with generator. predictions.assign(values.size(), 0.0); for (const auto& component : result.seasonal()) { BOOST_REQUIRE_EQUAL(startTime, component.initialValuesStartTime()); BOOST_REQUIRE_EQUAL(startTime + 3 * WEEK, component.initialValuesEndTime()); for (std::size_t i = 0; i < component.initialValues().size(); ++i) { predictions[i] += maths::common::CBasicStatistics::mean( component.initialValues()[i]); } } for (core_t::TTime time = 0; time < 3 * WEEK; time += HALF_HOUR) { BOOST_REQUIRE_CLOSE_ABSOLUTE(10.0 * generator(startTime + time), predictions[time / HALF_HOUR], 1e-4); } // Check the seasonal time is initialized correctly. switch (index) { case 0: case 1: for (const auto& component : result.seasonal()) { auto time = component.seasonalTime(); BOOST_REQUIRE_EQUAL(false, time->windowed()); BOOST_REQUIRE_EQUAL(0, time->windowRepeatStart()); BOOST_REQUIRE_EQUAL(0, time->windowStart()); BOOST_REQUIRE_EQUAL(DAY, time->windowEnd()); BOOST_REQUIRE_EQUAL(DAY, time->period()); } break; default: BOOST_REQUIRE_EQUAL(4, result.seasonal().size()); for (std::size_t i = 0; i < result.seasonal().size(); ++i) { auto time = result.seasonal()[i].seasonalTime(); BOOST_REQUIRE_EQUAL(true, time->windowed()); BOOST_REQUIRE_EQUAL(5 * DAY / HALF_HOUR, time->windowRepeatStart() / HALF_HOUR); BOOST_REQUIRE_EQUAL(expectedWindowStarts[i], time->windowStart()); BOOST_REQUIRE_EQUAL(expectedWindowEnds[i], time->windowEnd()); BOOST_REQUIRE_EQUAL(expectedPeriods[i], time->period()); } break; } } } BOOST_AUTO_TEST_CASE(testNewComponentInitialValuesWithPiecewiseLinearScaling) { // Test that the initial values for the seasonal components when there // are linear scalings in the test values. TGeneratorVec generators{smoothDaily}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec startTimes; TDoubleVec scales; rng.generateUniformSamples(0, 10000000, 10, startTimes); rng.generateUniformSamples(0.5, 2.0, 10, scales); for (std::size_t test = 0; test < 10; ++test) { LOG_DEBUG(<< "test " << test + 1 << " / 10"); generateNoise(test, rng, 2 * WEEK, FIVE_MINS, noise); std::size_t index{test % generators.size()}; auto generator = generators[index]; core_t::TTime startTime{HOUR * (static_cast<core_t::TTime>(startTimes[test]) / HOUR)}; values.assign(2 * WEEK / HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < 2 * WEEK; time += HOUR) { values[time / HOUR].add(10.0 * (time < WEEK ? 1.0 : scales[test]) * generator(startTime + time) + 0.1 * noise[time / HOUR]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, HOUR, HOUR, values}; auto result = seasonality.decompose(); BOOST_REQUIRE_EQUAL(1, result.seasonal().size()); const auto& seasonal = result.seasonal()[0].initialValues(); for (std::size_t i = 0; i < seasonal.size(); ++i) { double actual{10.0 * scales[test] * generator(startTime + HOUR * static_cast<core_t::TTime>(i))}; double prediction{maths::common::CBasicStatistics::mean(seasonal[i])}; BOOST_REQUIRE_CLOSE_ABSOLUTE(actual, prediction, 2.0); } } } BOOST_AUTO_TEST_CASE(testNewTrendSummary) { // Test we get the trend initial values we expect with either no trend // or a linear ramp. TGeneratorVec trends{constant, ramp}; TGeneratorVec seasonalities{smoothDaily, spikeyDaily, weekends}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec predictions; TSizeVec startTimes; rng.generateUniformSamples(0, 10000000, 10, startTimes); for (std::size_t test = 0; test < 10; ++test) { LOG_DEBUG(<< "test " << test + 1 << " / 10"); core_t::TTime startTime{HOUR * (static_cast<core_t::TTime>(startTimes[test]) / HOUR)}; auto trend = trends[test % trends.size()]; auto generator = [&](core_t::TTime time) { return 100.0 * trend(time) + 10.0 * seasonalities[test % seasonalities.size()](time); }; values.assign(4 * WEEK / HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < 4 * WEEK; time += HOUR) { values[time / HOUR].add(generator(startTime + time)); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, HOUR, HOUR, values}; auto result = seasonality.decompose(); BOOST_REQUIRE(result.trend() != nullptr); predictions.assign(values.size(), 0.0); BOOST_REQUIRE_EQUAL(startTime, result.trend()->initialValuesStartTime()); BOOST_REQUIRE_EQUAL(startTime + 4 * WEEK, result.trend()->initialValuesEndTime()); for (std::size_t i = 0; i < result.trend()->initialValues().size(); ++i) { predictions[i] += maths::common::CBasicStatistics::mean( result.trend()->initialValues()[i]); } // Expect slope to match. TFloatMeanAccumulator expectedMeanSlope; TFloatMeanAccumulator meanSlope; for (core_t::TTime time = HOUR; time < 4 * WEEK; time += HOUR) { expectedMeanSlope.add( 100.0 * (trend(startTime + time) - trend(startTime + time - HOUR))); meanSlope.add(predictions[time / HOUR] - predictions[time / HOUR - 1]); } BOOST_REQUIRE_CLOSE_ABSOLUTE( static_cast<double>(maths::common::CBasicStatistics::mean(expectedMeanSlope)), static_cast<double>(maths::common::CBasicStatistics::mean(meanSlope)), 2e-3); // Expect agreement with generator. for (const auto& component : result.seasonal()) { for (std::size_t i = 0; i < component.initialValues().size(); ++i) { BOOST_REQUIRE_EQUAL(startTime, component.initialValuesStartTime()); BOOST_REQUIRE_EQUAL(startTime + 4 * WEEK, component.initialValuesEndTime()); predictions[i] += maths::common::CBasicStatistics::mean( component.initialValues()[i]); } } for (core_t::TTime time = 0; time < 4 * WEEK; time += HOUR) { BOOST_REQUIRE_CLOSE_ABSOLUTE(generator(startTime + time), predictions[time / HOUR], 10.0); } } } BOOST_AUTO_TEST_CASE(testNewTrendSummaryPiecewiseLinearTrend) { // Test the initial values for a piecewise linear trend. std::size_t segmentSupport[][2]{{100, 200}, {600, 900}}; double slopeSupport[][2]{{0.5, 1.0}, {-1.0, -0.5}}; double interceptSupport[][2]{{-10.0, 5.0}, {10.0, 20.0}}; TGeneratorVec generators{smoothDaily, spikeyDaily}; core_t::TTime startTime{0}; test::CRandomNumbers rng; TFloatMeanAccumulatorVec values; TDoubleVec noise; TSizeVec index; TFloatMeanAccumulator meanError; for (std::size_t test = 0; test < 10; ++test) { LOG_DEBUG(<< "test " << test + 1 << " / 10"); core_t::TTime endTime{startTime + 4 * WEEK}; TTimeVec segments; TLinearModelVec models{[](core_t::TTime) { return 0.0; }}; for (std::size_t i = 0; i < 2; ++i) { TSizeVec segment; TDoubleVec slope; TDoubleVec intercept; rng.generateUniformSamples(segmentSupport[i][0], segmentSupport[i][1], 1, segment); rng.generateUniformSamples(slopeSupport[i][0], slopeSupport[i][1], 1, slope); rng.generateUniformSamples(interceptSupport[i][0], interceptSupport[i][1], 1, intercept); segments.push_back(startTime + segment[0] * HALF_HOUR); models.push_back([startTime, slope, intercept](core_t::TTime time) { return slope[0] * static_cast<double>(time - startTime) / static_cast<double>(DAY) + intercept[0]; }); } segments.push_back(endTime); auto generator = [&](core_t::TTime time) { auto i = std::lower_bound(segments.begin(), segments.end(), time); return 80.0 + 2.0 * (models[i - segments.begin()](time) + 5.0 * generators[index[0]](time)); }; rng.generateNormalSamples(0.0, 1.0, 2 * WEEK / HOUR, noise); rng.generateUniformSamples(0, 2, 1, index); values.assign(2 * WEEK / HOUR, TFloatMeanAccumulator{}); for (core_t::TTime time = 0; time < 4 * WEEK; time += 2 * HOUR) { std::size_t bucket(time / 2 / HOUR); values[bucket].add(generator(startTime + time) + noise[bucket]); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, 2 * HOUR, 2 * HOUR, values}; auto result = seasonality.decompose(); // Since we remove step continuities just check agreement on the last // couple of days of the window. const auto& trend = result.trend()->initialValues(); for (std::size_t i = trend.size() - 36; i < trend.size(); ++i) { double actual{generator(startTime + 2 * HOUR * static_cast<core_t::TTime>(i))}; double prediction{maths::common::CBasicStatistics::mean(trend[i])}; for (const auto& seasonal : result.seasonal()) { prediction += maths::common::CBasicStatistics::mean( seasonal.initialValues()[i]); } BOOST_REQUIRE_CLOSE(actual, prediction, 15.0); meanError.add(std::fabs(prediction - actual) / std::fabs(actual)); } } BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanError) < 0.03); } BOOST_AUTO_TEST_CASE(testWithSuppliedPredictor) { // Check the initial values in the case that we have a supplied predictor. using TBoolVec = std::vector<bool>; auto daily = [&](core_t::TTime time) { return std::sin(boost::math::double_constants::pi * static_cast<double>(time % DAY) / static_cast<double>(DAY)); }; auto weekly = [](core_t::TTime time) { double values[]{2.0, 2.1, 2.3, 2.2, 1.8, 1.6, 1.4, 1.0, 1.2, 1.8, 1.5, 1.7, 1.8, 1.9}; return values[2 * (time % WEEK) / DAY]; }; core_t::TTime startTime{1000000}; TFloatMeanAccumulatorVec values(3 * WEEK / HOUR); for (core_t::TTime time = 0; time < 3 * WEEK; time += HOUR) { values[time / HOUR].add(daily(startTime + time) + weekly(startTime + time)); } maths::time_series::CTimeSeriesTestForSeasonality seasonality{ startTime, startTime, HOUR, HOUR, values}; seasonality.addModelledSeasonality( maths::time_series::CDiurnalTime{0, 0, WEEK, DAY}, 2, 24); seasonality.modelledSeasonalityPredictor([](core_t::TTime time, const TBoolVec&) { return std::sin(boost::math::double_constants::pi * static_cast<double>(time % DAY) / static_cast<double>(DAY)); }); auto result = seasonality.decompose(); for (core_t::TTime time = 0; time < 3 * WEEK; time += HOUR) { double prediction{0.0}; for (const auto& component : result.seasonal()) { prediction += maths::common::CBasicStatistics::mean( component.initialValues()[time / HOUR]); } BOOST_REQUIRE_CLOSE_ABSOLUTE(prediction, weekly(startTime + time), 1e-4); } } BOOST_AUTO_TEST_SUITE_END()