bool CMetricPopulationModel::computeProbability()

in lib/model/CMetricPopulationModel.cc [565:695]


bool CMetricPopulationModel::computeProbability(std::size_t pid,
                                                core_t::TTime startTime,
                                                core_t::TTime endTime,
                                                CPartitioningFields& partitioningFields,
                                                std::size_t numberAttributeProbabilities,
                                                SAnnotatedProbability& result) const {
    const CDataGatherer& gatherer = this->dataGatherer();
    core_t::TTime bucketLength = gatherer.bucketLength();

    if (endTime != startTime + bucketLength) {
        LOG_ERROR(<< "Can only compute probability for single bucket");
        return false;
    }
    if (pid > gatherer.numberPeople()) {
        LOG_TRACE(<< "No person for pid = " << pid);
        return false;
    }

    using TOptionalStr = std::optional<std::string>;
    using TOptionalStr1Vec = core::CSmallVector<TOptionalStr, 1>;

    static const TOptionalStr1Vec NO_CORRELATED_ATTRIBUTES;
    static const TSizeDoublePr1Vec NO_CORRELATES;

    partitioningFields.add(gatherer.attributeFieldName(), EMPTY_STRING);

    CAnnotatedProbabilityBuilder resultBuilder(
        result, std::max(numberAttributeProbabilities, std::size_t(1)),
        function_t::function(gatherer.features()));

    LOG_TRACE(<< "computeProbability(" << gatherer.personName(pid) << ")");

    CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff);
    pJoint.addAggregator(maths::common::CJointProbabilityOfLessLikelySamples());
    pJoint.addAggregator(maths::common::CProbabilityOfExtremeSample());
    if (this->params().s_CacheProbabilities) {
        pJoint.addCache(m_Probabilities);
    }

    for (std::size_t i = 0; i < gatherer.numberFeatures(); ++i) {
        model_t::EFeature feature = gatherer.feature(i);
        if (model_t::isCategorical(feature)) {
            continue;
        }
        LOG_TRACE(<< "feature = " << model_t::print(feature));

        const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, startTime);
        TSizeSizePr range = personRange(featureData, pid);

        for (std::size_t j = range.first; j < range.second; ++j) {
            std::size_t cid = CDataGatherer::extractAttributeId(featureData[j]);

            partitioningFields.back().second = TStrCRef(gatherer.attributeName(cid));

            const TOptionalSample& bucket =
                CDataGatherer::extractData(featureData[j]).s_BucketValue;
            if (!bucket) {
                LOG_ERROR(<< "Expected a value for feature = " << model_t::print(feature)
                          << ", person = " << gatherer.personName(pid)
                          << ", attribute = " << gatherer.attributeName(cid));
                continue;
            }

            if (this->shouldSkipUpdate(feature, pid, cid,
                                       model_t::sampleTime(feature, startTime, bucketLength))) {
                result.s_ShouldUpdateQuantiles = false;
            }

            if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid,
                                         model_t::sampleTime(feature, startTime, bucketLength,
                                                             bucket->time()))) {
                continue;
            }

            if (this->correlates(feature, pid, cid, startTime)) {
                // TODO
            } else {
                CProbabilityAndInfluenceCalculator::SParams params(partitioningFields);
                if (this->fill(feature, pid, cid, startTime, result.isInterim(),
                               params) == false) {
                    continue;
                }
                model_t::CResultType type;
                TSize1Vec mostAnomalousCorrelate;
                if (pJoint.addProbability(feature, cid, *params.s_Model, params.s_ElapsedTime,
                                          params.s_ComputeProbabilityParams,
                                          params.s_Time, params.s_Value, params.s_Probability,
                                          params.s_Tail, type, mostAnomalousCorrelate)) {
                    LOG_TRACE(<< "P(" << params.describe()
                              << ", attribute = " << gatherer.attributeName(cid)
                              << ", person = " << this->personName(pid)
                              << ") = " << params.s_Probability);
                    const auto& influenceValues =
                        CDataGatherer::extractData(featureData[j]).s_InfluenceValues;
                    for (std::size_t k = 0; k < influenceValues.size(); ++k) {
                        if (const CInfluenceCalculator* influenceCalculator =
                                this->influenceCalculator(feature, k)) {
                            pJoint.plugin(*influenceCalculator);
                            pJoint.addInfluences(*(gatherer.beginInfluencers() + k),
                                                 influenceValues[k], params);
                        }
                    }
                    resultBuilder.addAttributeProbability(
                        cid, gatherer.attributeName(cid), params.s_Probability,
                        model_t::CResultType::E_Unconditional, feature,
                        NO_CORRELATED_ATTRIBUTES, NO_CORRELATES);
                } else {
                    LOG_ERROR(<< "Failed to compute P(" << params.describe()
                              << ", attribute = " << gatherer.attributeName(cid)
                              << ", person = " << this->personName(pid) << ")");
                }
            }
        }
    }

    if (pJoint.empty()) {
        LOG_TRACE(<< "No samples in [" << startTime << "," << endTime << ")");
        return false;
    }

    double p;
    if (!pJoint.calculate(p, result.s_Influences)) {
        LOG_ERROR(<< "Failed to compute probability of " << this->personName(pid));
        return false;
    }
    LOG_TRACE(<< "probability(" << this->personName(pid) << ") = " << p);
    resultBuilder.probability(p);
    resultBuilder.build();

    return true;
}