in lib/model/CAnomalyScore.cc [274:426]
bool CAnomalyScore::CNormalizer::normalize(const CMaximumScoreScope& scope,
double& score) const {
if (score == 0.0) {
// Nothing to do.
return true;
}
if (m_RawScoreQuantileSummary.n() == 0) {
score = 0.0;
return true;
}
LOG_TRACE(<< "Normalising " << score);
double normalizedScores[]{m_MaximumNormalizedScore, m_MaximumNormalizedScore,
m_MaximumNormalizedScore, m_MaximumNormalizedScore};
std::uint32_t discreteScore = this->discreteScore(score);
// Our normalized score is the minimum of a set of different
// score ceilings. The idea is that we have a number of factors
// which can reduce the score based on the other score values
// observed so far and the absolute score for some of our
// functions. We want the following properties:
// 1) The noise like scores, i.e. the smallest scores which
// relatively frequently, to have low normalized score.
// 2) Higher resolution of the highest scores we've seen.
// 3) The highest raw score ever observed has the highest
// normalized score.
// 4) We don't want to generate significant anomalies for
// moderately unusual events before we have enough history
// to assess their probability accurately.
//
// To ensure 1) we use a ceiling for the score that is a
// multiple of a low(ish) percentile score. To ensure 2) we
// use non-linear (actually piecewise linear) mapping from
// percentiles to scores, i.e. low percentile map to a small
// normalized score range and high percentiles map to a large
// normalized score range. Finally to ensure 3) we
// use a ceiling which is linear interpolation between a raw
// score of zero and the maximum ever raw score for the partition.
// To achieve 4), we cap the maximum score such that probabilities
// near the cutoff don't generate large normalized scores.
// Compute the noise ceiling. Note that since the scores are
// logarithms (base e) the difference corresponds to the scaled,
// 10 / log(10), signal strength in dB. By default a signal of
// 75dB corresponds to a normalized score of 75. Note that if
// the noise percentile is zero then really it is unknown,
// since scores are truncated to zero. In this case we don't
// want this term to constrain the normalized score. However,
// we also want this term to be smooth, i.e. the score should
// be nearly continuous when F(0) = pn. Here, F(.) denotes the
// c.d.f. of the score and pn the noise percentile. We achieve
// this by adding "max score" * min(F(0) / "noise percentile",
// to the score.
std::uint32_t noiseScore;
m_RawScoreQuantileSummary.quantile(m_NoisePercentile / 100.0, noiseScore);
auto knotPoint = std::lower_bound(m_NormalizedScoreKnotPoints.begin(),
m_NormalizedScoreKnotPoints.end(),
TDoubleDoublePr(m_NoisePercentile, 0.0));
double signalStrength =
m_NoiseMultiplier * 10.0 / DISCRETIZATION_FACTOR *
(static_cast<double>(discreteScore) - static_cast<double>(noiseScore));
double l0;
double u0;
m_RawScoreQuantileSummary.cdf(0, 0.0, l0, u0);
normalizedScores[0] =
knotPoint->second * std::max(1.0 + signalStrength, 0.0) +
m_MaximumNormalizedScore *
std::max(2.0 * std::min(50.0 * (l0 + u0) / m_NoisePercentile, 1.0) - 1.0, 0.0);
LOG_TRACE(<< "normalizedScores[0] = " << normalizedScores[0]
<< ", knotPoint = " << knotPoint->second << ", discreteScore = " << discreteScore
<< ", noiseScore = " << noiseScore << ", l(0) = " << l0
<< ", u(0) = " << u0 << ", signalStrength = " << signalStrength);
// Compute the raw normalized score percentile compared to historic values.
double lowerPercentile;
double upperPercentile;
this->quantile(score, 70.0 /*confidence interval*/, lowerPercentile, upperPercentile);
lowerPercentile *= 100.0;
upperPercentile *= 100.0;
if (lowerPercentile > upperPercentile) {
std::swap(lowerPercentile, upperPercentile);
}
lowerPercentile = maths::common::CTools::truncate(lowerPercentile, 0.0, 100.0);
upperPercentile = maths::common::CTools::truncate(upperPercentile, 0.0, 100.0);
std::size_t lowerKnotPoint =
std::max(std::lower_bound(m_NormalizedScoreKnotPoints.begin(),
m_NormalizedScoreKnotPoints.end(), lowerPercentile,
maths::common::COrderings::SFirstLess()) -
m_NormalizedScoreKnotPoints.begin(),
std::ptrdiff_t(1));
std::size_t upperKnotPoint =
std::max(std::lower_bound(m_NormalizedScoreKnotPoints.begin(),
m_NormalizedScoreKnotPoints.end(), upperPercentile,
maths::common::COrderings::SFirstLess()) -
m_NormalizedScoreKnotPoints.begin(),
std::ptrdiff_t(1));
if (lowerKnotPoint < m_NormalizedScoreKnotPoints.size()) {
const TDoubleDoublePr& left = m_NormalizedScoreKnotPoints[lowerKnotPoint - 1];
const TDoubleDoublePr& right = m_NormalizedScoreKnotPoints[lowerKnotPoint];
normalizedScores[1] = maths::common::CTools::linearlyInterpolate(
left.first, right.first, left.second, right.second, lowerPercentile);
} else {
normalizedScores[1] = m_MaximumNormalizedScore;
}
if (upperKnotPoint < m_NormalizedScoreKnotPoints.size()) {
const TDoubleDoublePr& left = m_NormalizedScoreKnotPoints[upperKnotPoint - 1];
const TDoubleDoublePr& right = m_NormalizedScoreKnotPoints[upperKnotPoint];
normalizedScores[1] =
(normalizedScores[1] + maths::common::CTools::linearlyInterpolate(
left.first, right.first, left.second,
right.second, upperPercentile)) /
2.0;
} else {
normalizedScores[1] = (normalizedScores[1] + m_MaximumNormalizedScore) / 2.0;
}
LOG_TRACE(<< "normalizedScores[1] = " << normalizedScores[1] << ", lowerPercentile = "
<< lowerPercentile << ", upperPercentile = " << upperPercentile);
double maxScore{0.0};
bool hasValidMaxScore = this->maxScore(scope, maxScore);
LOG_TRACE(<< "hasValidMaxScore = " << hasValidMaxScore
<< ", scope = " << scope.print());
if (hasValidMaxScore) {
// Compute the maximum score ceiling
double ratio = score / maxScore;
normalizedScores[2] = m_MaximumNormalizedScore *
std::min({0.0 + 1.5 * ratio, 0.5 + 0.5 * ratio});
LOG_TRACE(<< "normalizedScores[2] = " << normalizedScores[2] << ", score = " << score
<< ", maxScore = " << maxScore << ", ratio = " << ratio);
}
// Logarithmically interpolate the maximum score between the
// largest significant and small probability.
static const double M =
(probabilityToScore(maths::common::SMALL_PROBABILITY) -
probabilityToScore(maths::common::LARGEST_SIGNIFICANT_PROBABILITY)) /
(std::log(maths::common::SMALL_PROBABILITY) -
std::log(maths::common::LARGEST_SIGNIFICANT_PROBABILITY));
static const double C = std::log(maths::common::LARGEST_SIGNIFICANT_PROBABILITY);
normalizedScores[3] = m_MaximumNormalizedScore *
(0.95 * M * (std::log(scoreToProbability(score)) - C) + 0.05);
LOG_TRACE(<< "normalizedScores[3] = " << normalizedScores[3] << ", score = " << score
<< ", probability = " << scoreToProbability(score));
score = std::min(*std::min_element(std::begin(normalizedScores), std::end(normalizedScores)),
m_MaximumNormalizedScore);
LOG_TRACE(<< "normalizedScore = " << score << ", scope = " << scope.print());
return true;
}