in lib/model/CEventRateModel.cc [382:503]
bool CEventRateModel::computeProbability(std::size_t pid,
core_t::TTime startTime,
core_t::TTime endTime,
CPartitioningFields& partitioningFields,
std::size_t /*numberAttributeProbabilities*/,
SAnnotatedProbability& result) const {
const CDataGatherer& gatherer = this->dataGatherer();
core_t::TTime bucketLength = gatherer.bucketLength();
if (endTime != startTime + bucketLength) {
LOG_ERROR(<< "Can only compute probability for single bucket");
return false;
}
if (pid >= this->firstBucketTimes().size()) {
LOG_ERROR(<< "No first time for person = " << gatherer.personName(pid));
return false;
}
CAnnotatedProbabilityBuilder resultBuilder(result,
1, // # attribute probabilities
function_t::function(gatherer.features()));
CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff);
pJoint.addAggregator(maths::common::CJointProbabilityOfLessLikelySamples());
CProbabilityAndInfluenceCalculator pFeatures(this->params().s_InfluenceCutoff);
pFeatures.addAggregator(maths::common::CJointProbabilityOfLessLikelySamples());
pFeatures.addAggregator(maths::common::CProbabilityOfExtremeSample());
bool addPersonProbability{false};
bool skippedResults{false};
for (std::size_t i = 0u, n = gatherer.numberFeatures(); i < n; ++i) {
model_t::EFeature feature = gatherer.feature(i);
if (model_t::isCategorical(feature)) {
continue;
}
const TFeatureData* data = this->featureData(feature, pid, startTime);
if (!data) {
continue;
}
if (this->shouldSkipUpdate(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID,
model_t::sampleTime(feature, startTime, bucketLength))) {
result.s_ShouldUpdateQuantiles = false;
}
if (this->shouldIgnoreResult(
feature, result.s_ResultType, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID,
model_t::sampleTime(feature, startTime, bucketLength))) {
skippedResults = true;
continue;
}
addPersonProbability = true;
LOG_TRACE(<< "value(" << this->personName(pid) << ") = " << data->print());
if (this->correlates(feature, pid, startTime)) {
CProbabilityAndInfluenceCalculator::SCorrelateParams params(partitioningFields);
TStrCRefDouble1VecDouble1VecPrPrVecVecVec influenceValues;
this->fill(feature, pid, startTime, result.isInterim(), params, influenceValues);
this->addProbabilityAndInfluences(pid, params, influenceValues,
pFeatures, resultBuilder);
} else {
CProbabilityAndInfluenceCalculator::SParams params(partitioningFields);
if (this->fill(feature, pid, startTime, result.isInterim(), params)) {
this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues,
pFeatures, resultBuilder);
}
}
}
TOptionalUInt64 count = this->currentBucketCount(pid, startTime);
pJoint.add(pFeatures);
if (addPersonProbability && count && *count != 0) {
double p;
if (m_Probabilities.lookup(pid, p)) {
LOG_TRACE(<< "P(" << gatherer.personName(pid) << ") = " << p);
pJoint.addProbability(p);
}
}
double p{1.0};
if (skippedResults && pJoint.empty()) {
// This means we have skipped results for all features.
// We set the probability to 1.0 here to ensure the
// quantiles are updated accordingly.
} else if (pJoint.empty()) {
LOG_TRACE(<< "No samples in [" << startTime << "," << endTime << ")");
return false;
} else if (!pJoint.calculate(p, result.s_Influences)) {
LOG_ERROR(<< "Failed to compute probability");
return false;
}
LOG_TRACE(<< "probability(" << this->personName(pid) << ") = " << p);
resultBuilder.probability(p);
double multiBucketImpact{-1.0 * CAnomalyDetectorModelConfig::MAXIMUM_MULTI_BUCKET_IMPACT_MAGNITUDE};
if (pJoint.calculateMultiBucketImpact(multiBucketImpact)) {
resultBuilder.multiBucketImpact(multiBucketImpact);
}
resultBuilder.anomalyScoreExplanation() = pJoint.anomalyScoreExplanation();
auto& anomalyScoreExplanation{resultBuilder.anomalyScoreExplanation()};
bool everSeenBefore = this->firstBucketTimes()[pid] != startTime;
auto typicalConcentration = m_Probabilities.medianConcentration();
double actualConcentration;
if (m_ProbabilityPrior.concentration(pid, actualConcentration) &&
typicalConcentration.has_value()) {
anomalyScoreExplanation.s_ByFieldActualConcentration = actualConcentration;
anomalyScoreExplanation.s_ByFieldTypicalConcentration =
typicalConcentration.value();
}
anomalyScoreExplanation.s_ByFieldFirstOccurrence = !everSeenBefore;
resultBuilder.build();
return true;
}