in lib/model/CMetricPopulationModel.cc [565:695]
bool CMetricPopulationModel::computeProbability(std::size_t pid,
core_t::TTime startTime,
core_t::TTime endTime,
CPartitioningFields& partitioningFields,
std::size_t numberAttributeProbabilities,
SAnnotatedProbability& result) const {
const CDataGatherer& gatherer = this->dataGatherer();
core_t::TTime bucketLength = gatherer.bucketLength();
if (endTime != startTime + bucketLength) {
LOG_ERROR(<< "Can only compute probability for single bucket");
return false;
}
if (pid > gatherer.numberPeople()) {
LOG_TRACE(<< "No person for pid = " << pid);
return false;
}
using TOptionalStr = std::optional<std::string>;
using TOptionalStr1Vec = core::CSmallVector<TOptionalStr, 1>;
static const TOptionalStr1Vec NO_CORRELATED_ATTRIBUTES;
static const TSizeDoublePr1Vec NO_CORRELATES;
partitioningFields.add(gatherer.attributeFieldName(), EMPTY_STRING);
CAnnotatedProbabilityBuilder resultBuilder(
result, std::max(numberAttributeProbabilities, std::size_t(1)),
function_t::function(gatherer.features()));
LOG_TRACE(<< "computeProbability(" << gatherer.personName(pid) << ")");
CProbabilityAndInfluenceCalculator pJoint(this->params().s_InfluenceCutoff);
pJoint.addAggregator(maths::common::CJointProbabilityOfLessLikelySamples());
pJoint.addAggregator(maths::common::CProbabilityOfExtremeSample());
if (this->params().s_CacheProbabilities) {
pJoint.addCache(m_Probabilities);
}
for (std::size_t i = 0; i < gatherer.numberFeatures(); ++i) {
model_t::EFeature feature = gatherer.feature(i);
if (model_t::isCategorical(feature)) {
continue;
}
LOG_TRACE(<< "feature = " << model_t::print(feature));
const TSizeSizePrFeatureDataPrVec& featureData = this->featureData(feature, startTime);
TSizeSizePr range = personRange(featureData, pid);
for (std::size_t j = range.first; j < range.second; ++j) {
std::size_t cid = CDataGatherer::extractAttributeId(featureData[j]);
partitioningFields.back().second = TStrCRef(gatherer.attributeName(cid));
const TOptionalSample& bucket =
CDataGatherer::extractData(featureData[j]).s_BucketValue;
if (!bucket) {
LOG_ERROR(<< "Expected a value for feature = " << model_t::print(feature)
<< ", person = " << gatherer.personName(pid)
<< ", attribute = " << gatherer.attributeName(cid));
continue;
}
if (this->shouldSkipUpdate(feature, pid, cid,
model_t::sampleTime(feature, startTime, bucketLength))) {
result.s_ShouldUpdateQuantiles = false;
}
if (this->shouldIgnoreResult(feature, result.s_ResultType, pid, cid,
model_t::sampleTime(feature, startTime, bucketLength,
bucket->time()))) {
continue;
}
if (this->correlates(feature, pid, cid, startTime)) {
// TODO
} else {
CProbabilityAndInfluenceCalculator::SParams params(partitioningFields);
if (this->fill(feature, pid, cid, startTime, result.isInterim(),
params) == false) {
continue;
}
model_t::CResultType type;
TSize1Vec mostAnomalousCorrelate;
if (pJoint.addProbability(feature, cid, *params.s_Model, params.s_ElapsedTime,
params.s_ComputeProbabilityParams,
params.s_Time, params.s_Value, params.s_Probability,
params.s_Tail, type, mostAnomalousCorrelate)) {
LOG_TRACE(<< "P(" << params.describe()
<< ", attribute = " << gatherer.attributeName(cid)
<< ", person = " << this->personName(pid)
<< ") = " << params.s_Probability);
const auto& influenceValues =
CDataGatherer::extractData(featureData[j]).s_InfluenceValues;
for (std::size_t k = 0; k < influenceValues.size(); ++k) {
if (const CInfluenceCalculator* influenceCalculator =
this->influenceCalculator(feature, k)) {
pJoint.plugin(*influenceCalculator);
pJoint.addInfluences(*(gatherer.beginInfluencers() + k),
influenceValues[k], params);
}
}
resultBuilder.addAttributeProbability(
cid, gatherer.attributeName(cid), params.s_Probability,
model_t::CResultType::E_Unconditional, feature,
NO_CORRELATED_ATTRIBUTES, NO_CORRELATES);
} else {
LOG_ERROR(<< "Failed to compute P(" << params.describe()
<< ", attribute = " << gatherer.attributeName(cid)
<< ", person = " << this->personName(pid) << ")");
}
}
}
}
if (pJoint.empty()) {
LOG_TRACE(<< "No samples in [" << startTime << "," << endTime << ")");
return false;
}
double p;
if (!pJoint.calculate(p, result.s_Influences)) {
LOG_ERROR(<< "Failed to compute probability of " << this->personName(pid));
return false;
}
LOG_TRACE(<< "probability(" << this->personName(pid) << ") = " << p);
resultBuilder.probability(p);
resultBuilder.build();
return true;
}