in lib/maths/time_series/CAdaptiveBucketing.cc [395:550]
void CAdaptiveBucketing::refine(core_t::TTime time) {
using TDoubleDoublePr = std::pair<double, double>;
using TDoubleDoublePrVec = std::vector<TDoubleDoublePr>;
using TDoubleSizePr = std::pair<double, std::size_t>;
using TMinMaxAccumulator = common::CBasicStatistics::CMinMax<TDoubleSizePr>;
LOG_TRACE(<< "refining at " << time);
if (m_Endpoints.size() < 2) {
return;
}
// Check if any buckets should be split based on the large error counts.
this->maybeSplitBucket();
std::size_t n{m_Endpoints.size() - 1};
double a{m_Endpoints[0]};
double b{m_Endpoints[n]};
// Extract the bucket means.
TDoubleDoublePrVec values;
values.reserve(n);
for (std::size_t i = 0; i < n; ++i) {
values.emplace_back(this->bucketCount(i), this->predict(i, time, m_Centres[i]));
}
LOG_TRACE(<< "values = " << values);
// Compute the function range in each bucket, imposing periodic
// boundary conditions at the start and end of the interval.
TDoubleVec ranges;
ranges.reserve(n);
for (std::size_t i = 0; i < n; ++i) {
TDoubleDoublePr v[]{values[(n + i - 2) % n], values[(n + i - 1) % n],
values[(n + i + 0) % n], // centre
values[(n + i + 1) % n], values[(n + i + 2) % n]};
TMinMaxAccumulator minmax;
for (std::size_t j = 0; j < sizeof(v) / sizeof(v[0]); ++j) {
if (v[j].first > 0.0) {
minmax.add({v[j].second, j});
}
}
if (minmax.initialized()) {
ranges.push_back(WEIGHTS[minmax.max().second > minmax.min().second
? minmax.max().second - minmax.min().second
: minmax.min().second - minmax.max().second] *
std::pow(minmax.max().first - minmax.min().first, 0.75));
} else {
ranges.push_back(0.0);
}
}
// Smooth the ranges by convolving with a smoothing function.
// We do this in the "time" domain because the smoothing
// function is narrow. Estimate the averaging error in each
// bucket by multiplying the smoothed range by the bucket width.
TDoubleVec averagingErrors;
averagingErrors.reserve(n);
for (std::size_t i = 0; i < n; ++i) {
double ai{m_Endpoints[i]};
double bi{m_Endpoints[i + 1]};
double error{0.0};
for (std::size_t j = 0; j < std::size(SMOOTHING_FUNCTION); ++j) {
error += SMOOTHING_FUNCTION[j] * ranges[(n + i + j - WIDTH) % n];
}
double h{bi - ai};
error *= h / (b - a);
averagingErrors.push_back(error);
}
double maxAveragingError{
*std::max_element(averagingErrors.begin(), averagingErrors.end())};
for (const auto& pValue : m_LargeErrorCountPValues) {
if (pValue.first < MODERATE_SIGNIFICANCE) {
averagingErrors[pValue.second] = maxAveragingError;
}
}
double totalAveragingError{
std::accumulate(averagingErrors.begin(), averagingErrors.end(), 0.0)};
LOG_TRACE(<< "averagingErrors = " << averagingErrors);
LOG_TRACE(<< "totalAveragingError = " << totalAveragingError);
double n_{static_cast<double>(n)};
double step{(1.0 - n_ * EPS) * totalAveragingError / n_};
TFloatVec endpoints{m_Endpoints};
LOG_TRACE(<< "step = " << step);
// If all the function values are identical then the end points
// should be equidistant. We check step in case of underflow.
if (step == 0.0) {
m_Endpoints[0] = a;
for (std::size_t i = 0; i < n; ++i) {
m_Endpoints[i] = (b - a) * static_cast<double>(i) / n_;
}
m_Endpoints[n] = b;
} else {
// Noise in the bucket mean values creates a "high" frequency
// mean zero driving force on the buckets' end points desired
// positions. Once they have stabilized on their desired location
// for the trend, we are able to detect this by comparing the
// time averaged desired displacement and the absolute desired
// displacement. The lower the ratio the more smoothing we apply.
// Note we want to damp the noise out because the process of
// adjusting the buckets end points loses a small amount of
// information, see the comments at the start of refresh for
// more details.
double alpha{
ALPHA *
(common::CBasicStatistics::mean(m_MeanAbsDesiredDisplacement) == 0.0
? 1.0
: std::fabs(common::CBasicStatistics::mean(m_MeanDesiredDisplacement)) /
common::CBasicStatistics::mean(m_MeanAbsDesiredDisplacement))};
LOG_TRACE(<< "alpha = " << alpha);
double displacement{0.0};
// Linearly interpolate between the current end points and points
// separated by equal total averaging error. Interpolating is
// equivalent to adding drag to the end point dynamics and damps
// any oscillations.
double unassignedAveragingError{0.0};
for (std::size_t i = 0, j = 1; i < n && j < n + 1; ++i) {
double ai{endpoints[i]};
double bi{endpoints[i + 1]};
double hi{bi - ai};
double ei{averagingErrors[i]};
unassignedAveragingError += ei;
for (double ej = step - (unassignedAveragingError - ei);
unassignedAveragingError >= step;
ej += step, unassignedAveragingError -= step, ++j) {
double xj{hi * ej / ei};
m_Endpoints[j] = std::max(
m_Endpoints[j - 1] + 1e-8 * std::fabs(m_Endpoints[j - 1]),
endpoints[j] + alpha * (ai + xj - endpoints[j]));
displacement += (ai + xj) - endpoints[j];
LOG_TRACE(<< "interval = [" << ai << "," << bi << "]"
<< " averaging error / unit length = " << ei / hi << ", desired translation "
<< endpoints[j] << " -> " << ai + xj);
}
}
// Add a margin to guaranty we'll meet the separation constraint.
spread(a, b, (1.0 + 1e-8) * m_MinimumBucketLength, m_Endpoints);
// By construction, the first and last end point should be
// close "a" and "b", respectively, but we snap them to "a"
// and "b" so that the total interval is unchanged.
m_Endpoints[0] = a;
m_Endpoints[n] = b;
LOG_TRACE(<< "refinedEndpoints = " << m_Endpoints);
m_MeanDesiredDisplacement.add(displacement);
m_MeanAbsDesiredDisplacement.add(std::fabs(displacement));
}
this->refresh(endpoints);
}