in lib/maths/time_series/CSeasonalComponentAdaptiveBucketing.cc [355:505]
void CSeasonalComponentAdaptiveBucketing::refresh(const TFloatVec& oldEndpoints) {
// Values are assigned based on their intersection with each
// bucket in the previous configuration. The regression and
// variance are computed using the appropriate combination
// rules. Note that the count is weighted by the square of
// the fractional intersection between the old and new buckets.
// This means that the effective weight of buckets whose end
// points change significantly is reduced. This is reasonable
// because the periodic trend is assumed to be unchanging
// throughout the interval, when of course it is varying, so
// adjusting the end points introduces error in the bucket
// value, which we handle by reducing its significance in the
// new bucket values.
//
// A better approximation is to assume that it the trend is
// continuous. In fact, this can be done by using a spline
// with the constraint that the mean of the spline in each
// interval is equal to the mean value. We can additionally
// decompose the variance into a contribution from noise and
// a contribution from the trend. Under these assumptions it
// is then possible (but not trivial) to update the bucket
// means and variances based on the new end point positions.
// This might be worth considering at some point.
using TDoubleMeanAccumulator = common::CBasicStatistics::SSampleMean<double>::TAccumulator;
using TMinAccumulator = common::CBasicStatistics::SMin<core_t::TTime>::TAccumulator;
std::size_t m{m_Buckets.size()};
std::size_t n{oldEndpoints.size()};
if (m + 1 != n) {
LOG_ERROR(<< "Inconsistent end points and regressions");
return;
}
const TFloatVec& newEndpoints{this->endpoints()};
const TFloatVec& oldCentres{this->centres()};
const TFloatVec& oldLargeErrorCounts{this->largeErrorCounts()};
TBucketVec buckets;
TFloatVec newCentres;
TFloatVec newLargeErrorCounts;
buckets.reserve(m);
newCentres.reserve(m);
newLargeErrorCounts.reserve(m);
for (std::size_t i = 1; i < n; ++i) {
double yl{newEndpoints[i - 1]};
double yr{newEndpoints[i]};
std::size_t r(std::lower_bound(oldEndpoints.begin(), oldEndpoints.end(), yr) -
oldEndpoints.begin());
r = common::CTools::truncate(r, std::size_t(1), n - 1);
std::size_t l(std::upper_bound(oldEndpoints.begin(), oldEndpoints.end(), yl) -
oldEndpoints.begin());
l = common::CTools::truncate(l, std::size_t(1), r);
LOG_TRACE(<< "interval = [" << yl << "," << yr << "]");
LOG_TRACE(<< "l = " << l << ", r = " << r);
LOG_TRACE(<< "[x(l), x(r)] = [" << oldEndpoints[l - 1] << ","
<< oldEndpoints[r] << "]");
double xl{oldEndpoints[l - 1]};
double xr{oldEndpoints[l]};
if (l == r) {
double interval{newEndpoints[i] - newEndpoints[i - 1]};
double w{common::CTools::truncate(interval / (xr - xl), 0.0, 1.0)};
const SBucket& bucket{m_Buckets[l - 1]};
buckets.emplace_back(bucket.s_Regression.scaled(w * w), bucket.s_Variance,
bucket.s_FirstUpdate, bucket.s_LastUpdate);
newCentres.push_back(common::CTools::truncate(
static_cast<double>(oldCentres[l - 1]), yl, yr));
newLargeErrorCounts.push_back(w * oldLargeErrorCounts[l - 1]);
} else {
double interval{xr - newEndpoints[i - 1]};
double w{common::CTools::truncate(interval / (xr - xl), 0.0, 1.0)};
const SBucket* bucket{&m_Buckets[l - 1]};
TMinAccumulator firstUpdate;
TMinAccumulator lastUpdate;
TDoubleRegression regression{bucket->s_Regression.scaled(w)};
TDoubleMeanVarAccumulator variance{common::CBasicStatistics::momentsAccumulator(
w * bucket->s_Regression.count(), bucket->s_Regression.mean(),
static_cast<double>(bucket->s_Variance))};
firstUpdate.add(bucket->s_FirstUpdate);
lastUpdate.add(bucket->s_LastUpdate);
TDoubleMeanAccumulator centre{common::CBasicStatistics::momentsAccumulator(
w * bucket->s_Regression.count(), static_cast<double>(oldCentres[l - 1]))};
double largeErrorCount{w * oldLargeErrorCounts[l - 1]};
double count{w * w * bucket->s_Regression.count()};
while (++l < r) {
bucket = &m_Buckets[l - 1];
regression += bucket->s_Regression;
variance += common::CBasicStatistics::momentsAccumulator(
bucket->s_Regression.count(), bucket->s_Regression.mean(),
static_cast<double>(bucket->s_Variance));
firstUpdate.add(bucket->s_FirstUpdate);
lastUpdate.add(bucket->s_LastUpdate);
centre += common::CBasicStatistics::momentsAccumulator(
bucket->s_Regression.count(), static_cast<double>(oldCentres[l - 1]));
largeErrorCount += oldLargeErrorCounts[l - 1];
count += bucket->s_Regression.count();
}
xl = oldEndpoints[l - 1];
xr = oldEndpoints[l];
bucket = &m_Buckets[l - 1];
interval = newEndpoints[i] - xl;
w = common::CTools::truncate(interval / (xr - xl), 0.0, 1.0);
regression += bucket->s_Regression.scaled(w);
variance += common::CBasicStatistics::momentsAccumulator(
w * bucket->s_Regression.count(), bucket->s_Regression.mean(),
static_cast<double>(bucket->s_Variance));
firstUpdate.add(bucket->s_FirstUpdate);
lastUpdate.add(bucket->s_LastUpdate);
centre += common::CBasicStatistics::momentsAccumulator(
w * bucket->s_Regression.count(), static_cast<double>(oldCentres[l - 1]));
largeErrorCount += w * oldLargeErrorCounts[l - 1];
count += w * w * bucket->s_Regression.count();
double scale{count == regression.count() ? 1.0 : count / regression.count()};
buckets.emplace_back(regression.scaled(scale),
common::CBasicStatistics::maximumLikelihoodVariance(variance),
firstUpdate[0], lastUpdate[0]);
newCentres.push_back(common::CTools::truncate(
common::CBasicStatistics::mean(centre), yl, yr));
newLargeErrorCounts.push_back(largeErrorCount);
}
}
// We want all regressions to respond at the same rate to changes
// in the trend. To achieve this we should assign them a weight
// that is equal to the number of points they will receive in one
// period.
double count{0.0};
for (const auto& bucket : buckets) {
count += bucket.s_Regression.count();
}
count /= (oldEndpoints[m] - oldEndpoints[0]);
for (std::size_t i = 0; i < m; ++i) {
double c{buckets[i].s_Regression.count()};
if (c > 0.0) {
buckets[i].s_Regression.scale(
count * (oldEndpoints[i + 1] - oldEndpoints[i]) / c);
}
}
LOG_TRACE(<< "old endpoints = " << oldEndpoints);
LOG_TRACE(<< "old centres = " << oldCentres);
LOG_TRACE(<< "new endpoints = " << newEndpoints);
LOG_TRACE(<< "new centres = " << newCentres);
m_Buckets.swap(buckets);
this->centres().swap(newCentres);
this->largeErrorCounts().swap(newLargeErrorCounts);
}