in lib/model/CAnomalyScore.cc [509:673]
bool CAnomalyScore::CNormalizer::updateQuantiles(const CMaximumScoreScope& scope, double score) {
using TUInt32UInt64Pr = std::pair<std::uint32_t, std::uint64_t>;
using TUInt32UInt64PrVec = std::vector<TUInt32UInt64Pr>;
CMaxScore& maxScore = m_MaxScores[scope.key(m_IsForMembersOfPopulation, m_Dictionary)];
double oldMaxScore{maxScore.score()};
maxScore.add(score);
bool bigChange{maxScore.score() > BIG_CHANGE_FACTOR * oldMaxScore};
LOG_TRACE(<< "updateQuantiles: scope = " << scope.print()
<< ", oldMaxScore = " << oldMaxScore << ", score = " << score
<< ", newMaxScore = " << maxScore.score() << ", bigChange = " << bigChange);
m_Sample = std::max(m_Sample, score);
if (++m_CountSinceLastSample < m_CountPerSample) {
return bigChange;
}
std::uint32_t discreteScore = this->discreteScore(m_Sample);
LOG_TRACE(<< "score = " << m_Sample << ", discreteScore = " << discreteScore
<< ", maxScore = " << maxScore.score() << ", scope = " << scope.print());
m_CountSinceLastSample = 0;
m_Sample = 0.0;
std::uint64_t n = m_RawScoreQuantileSummary.n();
std::uint64_t k = m_RawScoreQuantileSummary.k();
LOG_TRACE(<< "n = " << n << ", k = " << k);
// We are about to compress the q-digest, at the moment it comprises
// the unique values we have seen so far. So we extract the values
// greater than the HIGH_PERCENTILE percentile at this point to
// initialize the fine grain high quantile summary.
if ((n + 1) == k) {
LOG_TRACE(<< "Initializing H");
TUInt32UInt64PrVec L;
m_RawScoreQuantileSummary.summary(L);
if (L.empty()) {
LOG_ERROR(<< "High quantile summary is empty: "
<< m_RawScoreQuantileSummary.print());
} else {
auto highPercentileCount = static_cast<std::uint64_t>(
(HIGH_PERCENTILE / 100.0) * static_cast<double>(n) + 0.5);
// Estimate the high percentile score and update the count.
std::size_t i = 1;
m_HighPercentileScore = L[0].first;
m_HighPercentileCount = L[0].second;
for (/**/; i < L.size(); ++i) {
if (L[i].second > highPercentileCount) {
m_HighPercentileScore = L[i - 1].first;
m_HighPercentileCount = L[i - 1].second;
break;
}
}
if (m_HighPercentileCount > n) {
LOG_ERROR(<< "Invalid c(H) " << m_HighPercentileCount);
LOG_ERROR(<< "target " << highPercentileCount);
LOG_ERROR(<< "L " << L);
m_HighPercentileCount = n;
}
LOG_TRACE(<< "s(H) = " << m_HighPercentileScore
<< ", c(H) = " << m_HighPercentileCount << ", percentile = "
<< 100.0 * static_cast<double>(m_HighPercentileCount) /
static_cast<double>(n)
<< "%, desired c(H) = " << highPercentileCount);
// Populate the high quantile summary.
for (/**/; i < L.size(); ++i) {
std::uint32_t x = L[i].first;
std::uint64_t m = L[i].second - L[i - 1].second;
LOG_TRACE(<< "Adding (" << x << ", " << m << ") to H");
m_RawScoreHighQuantileSummary.add(x, m);
}
}
}
m_RawScoreQuantileSummary.add(discreteScore);
if (discreteScore <= m_HighPercentileScore) {
++m_HighPercentileCount;
} else {
m_RawScoreHighQuantileSummary.add(discreteScore);
}
LOG_TRACE(<< "percentile = "
<< static_cast<double>(m_HighPercentileCount) / static_cast<double>(n + 1));
// Periodically refresh the high percentile score.
if ((n + 1) > k && (n + 1) % k == 0) {
LOG_TRACE(<< "Refreshing high quantile summary");
auto highPercentileCount = static_cast<std::uint64_t>(
(HIGH_PERCENTILE / 100.0) * static_cast<double>(n + 1) + 0.5);
LOG_TRACE(<< "s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount
<< ", desired c(H) = " << highPercentileCount);
if (m_HighPercentileCount > highPercentileCount) {
TUInt32UInt64PrVec L;
m_RawScoreQuantileSummary.summary(L);
TUInt32UInt64PrVec H;
m_RawScoreHighQuantileSummary.summary(H);
std::size_t i0 = std::min(
static_cast<std::size_t>(
std::lower_bound(L.begin(), L.end(), highPercentileCount,
maths::common::COrderings::SSecondLess()) -
L.begin()),
L.size() - 1);
std::size_t j = std::min(
static_cast<std::size_t>(
std::upper_bound(H.begin(), H.end(), L[i0],
maths::common::COrderings::SFirstLess()) -
H.begin()),
H.size() - 1);
std::uint64_t r = L[i0].second;
for (std::size_t i = i0 + 1;
i < L.size() && L[i0].second + m_RawScoreHighQuantileSummary.n() < n + 1;
++i) {
for (/**/; j < H.size() && H[j].first <= L[i].first; ++j) {
r += (H[j].second - (j == 0 ? static_cast<std::uint64_t>(0)
: H[j - 1].second));
}
std::uint32_t x = L[i].first;
std::uint64_t m = r < L[i].second ? L[i].second - r
: static_cast<std::uint64_t>(0);
r += m;
if (m > 0) {
LOG_TRACE(<< "Adding (" << x << ',' << m << ") to H");
m_RawScoreHighQuantileSummary.add(x, m);
}
}
m_HighPercentileScore = L[i0].first;
m_HighPercentileCount = L[i0].second;
if (m_HighPercentileCount > n + 1) {
LOG_ERROR(<< "Invalid c(H) " << m_HighPercentileCount);
LOG_ERROR(<< "target " << highPercentileCount);
LOG_ERROR(<< "L " << L);
m_HighPercentileCount = n;
}
LOG_TRACE(<< "s(H) = " << m_HighPercentileScore
<< ", c(H) = " << m_HighPercentileCount << ", percentile = "
<< 100.0 * static_cast<double>(m_HighPercentileCount) /
static_cast<double>(n + 1)
<< "%");
} else {
m_RawScoreQuantileSummary.quantile(HIGH_PERCENTILE / 100.0, m_HighPercentileScore);
double lowerBound;
double upperBound;
m_RawScoreQuantileSummary.cdf(m_HighPercentileScore, 0.0, lowerBound, upperBound);
m_HighPercentileCount = static_cast<std::uint64_t>(
static_cast<double>(n + 1) * lowerBound + 0.5);
LOG_TRACE(<< "s(H) = " << m_HighPercentileScore << ", c(H) = " << m_HighPercentileCount
<< ", percentile = " << 100.0 * lowerBound << "%");
}
}
return bigChange;
}