bool CEventRateModel::computeProbability()

in lib/model/CEventRateModel.cc [382:503]


bool CEventRateModel::computeProbability(std::size_t pid,
                                         core_t::TTime startTime,
                                         core_t::TTime endTime,
                                         CPartitioningFields& partitioningFields,
                                         std::size_t /*numberAttributeProbabilities*/,
                                         SAnnotatedProbability& result) const {
    const CDataGatherer& gatherer = this->dataGatherer();
    core_t::TTime bucketLength = gatherer.bucketLength();

    if (endTime != startTime + bucketLength) {
        LOG_ERROR(<< "Can only compute probability for single bucket");
        return false;
    }

    if (pid >= this->firstBucketTimes().size()) {
        LOG_ERROR(<< "No first time for person = " << gatherer.personName(pid));
        return false;
    }

    CAnnotatedProbabilityBuilder resultBuilder(result,
                                               1, // # attribute probabilities
                                               function_t::function(gatherer.features()));

    CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff);
    pJoint.addAggregator(maths::common::CJointProbabilityOfLessLikelySamples());

    CProbabilityAndInfluenceCalculator pFeatures(this->params().s_InfluenceCutoff);
    pFeatures.addAggregator(maths::common::CJointProbabilityOfLessLikelySamples());
    pFeatures.addAggregator(maths::common::CProbabilityOfExtremeSample());

    bool addPersonProbability{false};
    bool skippedResults{false};

    for (std::size_t i = 0u, n = gatherer.numberFeatures(); i < n; ++i) {
        model_t::EFeature feature = gatherer.feature(i);
        if (model_t::isCategorical(feature)) {
            continue;
        }
        const TFeatureData* data = this->featureData(feature, pid, startTime);
        if (!data) {
            continue;
        }

        if (this->shouldSkipUpdate(feature, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID,
                                   model_t::sampleTime(feature, startTime, bucketLength))) {
            result.s_ShouldUpdateQuantiles = false;
        }

        if (this->shouldIgnoreResult(
                feature, result.s_ResultType, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID,
                model_t::sampleTime(feature, startTime, bucketLength))) {
            skippedResults = true;
            continue;
        }

        addPersonProbability = true;

        LOG_TRACE(<< "value(" << this->personName(pid) << ") = " << data->print());

        if (this->correlates(feature, pid, startTime)) {
            CProbabilityAndInfluenceCalculator::SCorrelateParams params(partitioningFields);
            TStrCRefDouble1VecDouble1VecPrPrVecVecVec influenceValues;
            this->fill(feature, pid, startTime, result.isInterim(), params, influenceValues);
            this->addProbabilityAndInfluences(pid, params, influenceValues,
                                              pFeatures, resultBuilder);
        } else {
            CProbabilityAndInfluenceCalculator::SParams params(partitioningFields);
            if (this->fill(feature, pid, startTime, result.isInterim(), params)) {
                this->addProbabilityAndInfluences(pid, params, data->s_InfluenceValues,
                                                  pFeatures, resultBuilder);
            }
        }
    }

    TOptionalUInt64 count = this->currentBucketCount(pid, startTime);

    pJoint.add(pFeatures);
    if (addPersonProbability && count && *count != 0) {
        double p;
        if (m_Probabilities.lookup(pid, p)) {
            LOG_TRACE(<< "P(" << gatherer.personName(pid) << ") = " << p);
            pJoint.addProbability(p);
        }
    }

    double p{1.0};
    if (skippedResults && pJoint.empty()) {
        // This means we have skipped results for all features.
        // We set the probability to 1.0 here to ensure the
        // quantiles are updated accordingly.
    } else if (pJoint.empty()) {
        LOG_TRACE(<< "No samples in [" << startTime << "," << endTime << ")");
        return false;
    } else if (!pJoint.calculate(p, result.s_Influences)) {
        LOG_ERROR(<< "Failed to compute probability");
        return false;
    }
    LOG_TRACE(<< "probability(" << this->personName(pid) << ") = " << p);

    resultBuilder.probability(p);

    double multiBucketImpact{-1.0 * CAnomalyDetectorModelConfig::MAXIMUM_MULTI_BUCKET_IMPACT_MAGNITUDE};
    if (pJoint.calculateMultiBucketImpact(multiBucketImpact)) {
        resultBuilder.multiBucketImpact(multiBucketImpact);
    }

    resultBuilder.anomalyScoreExplanation() = pJoint.anomalyScoreExplanation();
    auto& anomalyScoreExplanation{resultBuilder.anomalyScoreExplanation()};
    bool everSeenBefore = this->firstBucketTimes()[pid] != startTime;
    auto typicalConcentration = m_Probabilities.medianConcentration();
    double actualConcentration;
    if (m_ProbabilityPrior.concentration(pid, actualConcentration) &&
        typicalConcentration.has_value()) {
        anomalyScoreExplanation.s_ByFieldActualConcentration = actualConcentration;
        anomalyScoreExplanation.s_ByFieldTypicalConcentration =
            typicalConcentration.value();
    }
    anomalyScoreExplanation.s_ByFieldFirstOccurrence = !everSeenBefore;
    resultBuilder.build();

    return true;
}