lib/model/CModelDetailsView.cc (255 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <model/CModelDetailsView.h>
#include <core/CSmallVector.h>
#include <maths/common/CTools.h>
#include <maths/common/Constants.h>
#include <model/CDataGatherer.h>
#include <model/CEventRateModel.h>
#include <model/CEventRatePopulationModel.h>
#include <model/CMetricModel.h>
#include <model/CMetricPopulationModel.h>
#include <model/CModelPlotData.h>
namespace ml {
namespace model {
namespace {
const std::string EMPTY_STRING("");
}
using TDouble1Vec = core::CSmallVector<double, 1>;
using TOptionalUInt64 = CAnomalyDetectorModel::TOptionalUInt64;
using TDoubleDoublePr = std::pair<double, double>;
////////// CModelDetailsView Implementation //////////
bool CModelDetailsView::personId(const std::string& name, std::size_t& result) const {
return this->base().dataGatherer().personId(name, result);
}
bool CModelDetailsView::categoryId(const std::string& attribute, std::size_t& result) const {
return this->base().dataGatherer().attributeId(attribute, result);
}
const CModelDetailsView::TFeatureVec& CModelDetailsView::features() const {
return this->base().dataGatherer().features();
}
void CModelDetailsView::modelPlot(core_t::TTime time,
double boundsPercentile,
const TStrSet& terms,
CModelPlotData& modelPlotData) const {
for (auto feature : this->features()) {
if (!model_t::isConstant(feature) && !model_t::isCategorical(feature)) {
if (terms.empty() || !this->hasByField()) {
for (std::size_t byFieldId = 0; byFieldId < this->maxByFieldId(); ++byFieldId) {
this->modelPlotForByFieldId(time, boundsPercentile, feature,
byFieldId, modelPlotData);
}
} else {
for (const auto& term : terms) {
std::size_t byFieldId(0);
if (this->byFieldId(term, byFieldId)) {
this->modelPlotForByFieldId(time, boundsPercentile, feature,
byFieldId, modelPlotData);
}
}
}
this->addCurrentBucketValues(time, feature, terms, modelPlotData);
}
}
}
void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time,
double boundsPercentile,
model_t::EFeature feature,
std::size_t byFieldId,
CModelPlotData& modelPlotData) const {
using TDouble1VecDouble1VecPr = std::pair<TDouble1Vec, TDouble1Vec>;
using TDouble2Vec = core::CSmallVector<double, 2>;
using TDouble2Vec3Vec = core::CSmallVector<TDouble2Vec, 3>;
if (this->isByFieldIdActive(byFieldId)) {
const maths::common::CModel* model = this->model(feature, byFieldId);
if (model == nullptr) {
return;
}
std::size_t dimension = model_t::dimension(feature);
time = model_t::sampleTime(feature, time, model->params().bucketLength());
maths_t::TDouble2VecWeightsAry weights{
maths_t::CUnitWeights::unit<TDouble2Vec>(dimension)};
TDouble2Vec seasonalWeight;
model->seasonalWeight(maths::common::DEFAULT_SEASONAL_CONFIDENCE_INTERVAL,
time, seasonalWeight);
maths_t::setSeasonalVarianceScale(seasonalWeight, weights);
maths_t::setCountVarianceScale(
TDouble2Vec(dimension, this->countVarianceScale(feature, byFieldId, time)), weights);
TDouble1VecDouble1VecPr support(model_t::support(feature));
TDouble2Vec supportLower(support.first);
TDouble2Vec supportUpper(support.second);
TDouble2Vec3Vec interval(model->confidenceInterval(time, boundsPercentile, weights));
if (interval.size() == 3) {
TDouble2Vec lower = maths::common::CTools::truncate(
interval[0], supportLower, supportUpper);
TDouble2Vec upper = maths::common::CTools::truncate(interval[2], lower, supportUpper);
TDouble2Vec median = maths::common::CTools::truncate(interval[1], lower, upper);
// TODO This data structure should support multivariate features.
modelPlotData.get(feature, this->byFieldValue(byFieldId)) =
CModelPlotData::SByFieldData(lower[0], upper[0], median[0]);
}
}
}
void CModelDetailsView::addCurrentBucketValues(core_t::TTime time,
model_t::EFeature feature,
const TStrSet& terms,
CModelPlotData& modelPlotData) const {
const CDataGatherer& gatherer = this->base().dataGatherer();
if (!gatherer.dataAvailable(time)) {
return;
}
bool isPopulation{gatherer.isPopulation()};
auto addCurrentBucketValue = [&](std::size_t pid, std::size_t cid) {
const std::string& byFieldValue{this->byFieldValue(pid, cid)};
if (this->contains(terms, byFieldValue)) {
TDouble1Vec value(this->base().currentBucketValue(feature, pid, cid, time));
if (!value.empty()) {
const std::string& overFieldValue{
isPopulation ? this->base().personName(pid) : EMPTY_STRING};
modelPlotData.get(feature, byFieldValue).addValue(overFieldValue, value[0]);
}
}
};
if (model_t::includeEmptyBuckets(feature)) {
for (std::size_t pid = 0; pid < gatherer.numberPeople(); ++pid) {
if (gatherer.isPersonActive(pid)) {
if (isPopulation) {
for (std::size_t cid = 0; cid < gatherer.numberAttributes(); ++cid) {
if (gatherer.isAttributeActive(cid)) {
addCurrentBucketValue(pid, cid);
}
}
} else {
addCurrentBucketValue(pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID);
}
}
}
} else {
for (const auto& count : gatherer.bucketCounts(time)) {
std::size_t pid{gatherer.extractPersonId(count)};
std::size_t cid{gatherer.extractAttributeId(count)};
addCurrentBucketValue(pid, cid);
}
}
}
bool CModelDetailsView::contains(const TStrSet& terms, const std::string& key) {
return terms.empty() || key.empty() || terms.find(key) != terms.end();
}
bool CModelDetailsView::hasByField() const {
const std::string& byField = this->base().isPopulation()
? this->base().dataGatherer().attributeFieldName()
: this->base().dataGatherer().personFieldName();
return byField.empty() == false;
}
std::size_t CModelDetailsView::maxByFieldId() const {
return this->base().isPopulation()
? this->base().dataGatherer().numberAttributes()
: this->base().dataGatherer().numberPeople();
}
bool CModelDetailsView::byFieldId(const std::string& byFieldValue, std::size_t& result) const {
return this->base().isPopulation()
? this->base().dataGatherer().attributeId(byFieldValue, result)
: this->base().dataGatherer().personId(byFieldValue, result);
}
const std::string& CModelDetailsView::byFieldValue(std::size_t byFieldId) const {
return this->base().isPopulation() ? this->base().attributeName(byFieldId)
: this->base().personName(byFieldId);
}
const std::string& CModelDetailsView::byFieldValue(std::size_t pid, std::size_t cid) const {
return this->base().isPopulation() ? this->base().attributeName(cid)
: this->base().personName(pid);
}
bool CModelDetailsView::isByFieldIdActive(std::size_t byFieldId) const {
return this->base().isPopulation()
? this->base().dataGatherer().isAttributeActive(byFieldId)
: this->base().dataGatherer().isPersonActive(byFieldId);
}
////////// CEventRateModelDetailsView Implementation //////////
CEventRateModelDetailsView::CEventRateModelDetailsView(const CEventRateModel& model)
: m_Model(&model) {
}
CEventRateModelDetailsView::TTimeTimePr
CEventRateModelDetailsView::dataTimeInterval(std::size_t byFieldId) const {
return {m_Model->firstBucketTimes()[byFieldId], m_Model->lastBucketTimes()[byFieldId]};
}
const maths::common::CModel*
CEventRateModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const {
return m_Model->model(feature, byFieldId);
}
const CAnomalyDetectorModel& CEventRateModelDetailsView::base() const {
return *m_Model;
}
double CEventRateModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/,
std::size_t /*byFieldId*/,
core_t::TTime /*time*/) const {
return 1.0;
}
////////// CEventRatePopulationModelDetailsView Implementation //////////
CEventRatePopulationModelDetailsView::CEventRatePopulationModelDetailsView(const CEventRatePopulationModel& model)
: m_Model(&model) {
}
CEventRatePopulationModelDetailsView::TTimeTimePr
CEventRatePopulationModelDetailsView::dataTimeInterval(std::size_t byFieldId) const {
return {m_Model->attributeFirstBucketTimes()[byFieldId],
m_Model->attributeLastBucketTimes()[byFieldId]};
}
const maths::common::CModel*
CEventRatePopulationModelDetailsView::model(model_t::EFeature feature,
std::size_t byFieldId) const {
return m_Model->model(feature, byFieldId);
}
const CAnomalyDetectorModel& CEventRatePopulationModelDetailsView::base() const {
return *m_Model;
}
double CEventRatePopulationModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/,
std::size_t /*byFieldId*/,
core_t::TTime /*time*/) const {
return 1.0;
}
////////// CMetricModelDetailsView Implementation //////////
CMetricModelDetailsView::CMetricModelDetailsView(const CMetricModel& model)
: m_Model(&model) {
}
CMetricModelDetailsView::TTimeTimePr
CMetricModelDetailsView::dataTimeInterval(std::size_t byFieldId) const {
return {m_Model->firstBucketTimes()[byFieldId], m_Model->lastBucketTimes()[byFieldId]};
}
const maths::common::CModel*
CMetricModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const {
return m_Model->model(feature, byFieldId);
}
const CAnomalyDetectorModel& CMetricModelDetailsView::base() const {
return *m_Model;
}
double CMetricModelDetailsView::countVarianceScale(model_t::EFeature feature,
std::size_t byFieldId,
core_t::TTime time) const {
TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time);
if (!count) {
return 1.0;
}
return model_t::varianceScale(feature,
m_Model->dataGatherer().effectiveSampleCount(byFieldId),
static_cast<double>(*count));
}
////////// CMetricPopulationModelDetailsView Implementation //////////
CMetricPopulationModelDetailsView::CMetricPopulationModelDetailsView(const CMetricPopulationModel& model)
: m_Model(&model) {
}
CMetricPopulationModelDetailsView::TTimeTimePr
CMetricPopulationModelDetailsView::dataTimeInterval(std::size_t byFieldId) const {
return {m_Model->attributeFirstBucketTimes()[byFieldId],
m_Model->attributeLastBucketTimes()[byFieldId]};
}
const maths::common::CModel*
CMetricPopulationModelDetailsView::model(model_t::EFeature feature, std::size_t byFieldId) const {
return m_Model->model(feature, byFieldId);
}
const CAnomalyDetectorModel& CMetricPopulationModelDetailsView::base() const {
return *m_Model;
}
double CMetricPopulationModelDetailsView::countVarianceScale(model_t::EFeature feature,
std::size_t byFieldId,
core_t::TTime time) const {
TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time);
if (!count) {
return 1.0;
}
return model_t::varianceScale(feature,
m_Model->dataGatherer().effectiveSampleCount(byFieldId),
static_cast<double>(*count));
}
}
}