lib/maths/common/CModel.cc (341 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <maths/common/CModel.h>
#include <maths/common/CModelDetail.h>
#include <core/CLogger.h>
#include <core/Constants.h>
#include <maths/common/CTools.h>
#include <algorithm>
#include <cmath>
#include <limits>
namespace ml {
namespace maths {
namespace common {
namespace {
const std::string EMPTY_STRING;
const double EFFECTIVE_COUNT[]{1.0, 0.8, 0.7, 0.65, 0.6,
0.57, 0.54, 0.52, 0.51};
//! Get the parameters for the stub model.
CModelParams stubParameters() {
return CModelParams{
0, 1.0, 0.0, 0.0, 6 * core::constants::HOUR, core::constants::DAY};
}
}
//////// CModelParams ////////
CModelParams::CModelParams(core_t::TTime bucketLength,
double learnRate,
double decayRate,
double minimumSeasonalVarianceScale,
core_t::TTime minimumTimeToDetectChange,
core_t::TTime maximumTimeToTestForChange)
: m_BucketLength(bucketLength), m_LearnRate(learnRate), m_DecayRate(decayRate),
m_MinimumSeasonalVarianceScale(minimumSeasonalVarianceScale),
m_MinimumTimeToDetectChange(std::max(minimumTimeToDetectChange, 6 * bucketLength)),
m_MaximumTimeToTestForChange(std::max(maximumTimeToTestForChange, 12 * bucketLength)) {
}
core_t::TTime CModelParams::bucketLength() const {
return m_BucketLength;
}
double CModelParams::learnRate() const {
return m_LearnRate;
}
double CModelParams::decayRate() const {
return m_DecayRate;
}
double CModelParams::averagingDecayRate() const {
return 5.0 * m_DecayRate;
}
double CModelParams::minimumSeasonalVarianceScale() const {
return m_MinimumSeasonalVarianceScale;
}
bool CModelParams::testForChange(core_t::TTime changeInterval) const {
return changeInterval >= std::max(3 * m_BucketLength, core::constants::HOUR);
}
core_t::TTime CModelParams::minimumTimeToDetectChange() const {
return m_MinimumTimeToDetectChange;
}
core_t::TTime CModelParams::maximumTimeToTestForChange() const {
return m_MaximumTimeToTestForChange;
}
//////// CModelAddSamplesParams ////////
CModelAddSamplesParams& CModelAddSamplesParams::isInteger(bool isInteger) {
m_Type = isInteger ? maths_t::E_IntegerData : maths_t::E_ContinuousData;
return *this;
}
maths_t::EDataType CModelAddSamplesParams::type() const {
return m_Type;
}
CModelAddSamplesParams& CModelAddSamplesParams::isNonNegative(bool isNonNegative) {
m_IsNonNegative = isNonNegative;
return *this;
}
bool CModelAddSamplesParams::isNonNegative() const {
return m_IsNonNegative;
}
CModelAddSamplesParams& CModelAddSamplesParams::bucketOccupancy(double occupancy) {
m_Occupancy = occupancy;
return *this;
}
double CModelAddSamplesParams::bucketOccupancy() const {
return m_Occupancy;
}
CModelAddSamplesParams& CModelAddSamplesParams::firstValueTime(core_t::TTime time) {
m_FirstValueTime = time;
return *this;
}
core_t::TTime CModelAddSamplesParams::firstValueTime() const {
return m_FirstValueTime;
}
CModelAddSamplesParams& CModelAddSamplesParams::propagationInterval(double interval) {
m_PropagationInterval = interval;
return *this;
}
double CModelAddSamplesParams::propagationInterval() const {
return m_PropagationInterval;
}
CModelAddSamplesParams&
CModelAddSamplesParams::trendWeights(const TDouble2VecWeightsAryVec& weights) {
m_TrendWeights = &weights;
return *this;
}
const CModelAddSamplesParams::TDouble2VecWeightsAryVec&
CModelAddSamplesParams::trendWeights() const {
return *m_TrendWeights;
}
CModelAddSamplesParams&
CModelAddSamplesParams::priorWeights(const TDouble2VecWeightsAryVec& weights) {
m_PriorWeights = &weights;
return *this;
}
const CModelAddSamplesParams::TDouble2VecWeightsAryVec&
CModelAddSamplesParams::priorWeights() const {
return *m_PriorWeights;
}
CModelAddSamplesParams&
CModelAddSamplesParams::annotationCallback(const maths_t::TModelAnnotationCallback& modelAnnotationCallback) {
m_ModelAnnotationCallback = modelAnnotationCallback;
return *this;
}
const maths_t::TModelAnnotationCallback& CModelAddSamplesParams::annotationCallback() const {
return m_ModelAnnotationCallback;
}
CModelAddSamplesParams&
CModelAddSamplesParams::memoryCircuitBreaker(const core::CMemoryCircuitBreaker& breaker) {
m_MemoryCircuitBreaker = &breaker;
return *this;
}
const core::CMemoryCircuitBreaker& CModelAddSamplesParams::memoryCircuitBreaker() const {
return *m_MemoryCircuitBreaker;
}
//////// CModelProbabilityParams ////////
CModelProbabilityParams::CModelProbabilityParams()
: m_SeasonalConfidenceInterval{DEFAULT_SEASONAL_CONFIDENCE_INTERVAL} {
}
CModelProbabilityParams&
CModelProbabilityParams::addCalculation(maths_t::EProbabilityCalculation calculation) {
m_Calculations.push_back(calculation);
return *this;
}
std::size_t CModelProbabilityParams::calculations() const {
return m_Calculations.size();
}
maths_t::EProbabilityCalculation CModelProbabilityParams::calculation(std::size_t i) const {
return m_Calculations.size() == 1 ? m_Calculations[0] : m_Calculations[i];
}
CModelProbabilityParams& CModelProbabilityParams::seasonalConfidenceInterval(double confidence) {
m_SeasonalConfidenceInterval = confidence;
return *this;
}
double CModelProbabilityParams::seasonalConfidenceInterval() const {
return m_SeasonalConfidenceInterval;
}
CModelProbabilityParams&
CModelProbabilityParams::addWeights(const TDouble2VecWeightsAry& weights) {
m_Weights.push_back(weights);
return *this;
}
CModelProbabilityParams&
CModelProbabilityParams::weights(const TDouble2VecWeightsAry1Vec& weights) {
m_Weights = weights;
return *this;
}
const CModelProbabilityParams::TDouble2VecWeightsAry1Vec&
CModelProbabilityParams::weights() const {
return m_Weights;
}
CModelProbabilityParams::TDouble2VecWeightsAry1Vec& CModelProbabilityParams::weights() {
return m_Weights;
}
CModelProbabilityParams& CModelProbabilityParams::addCoordinate(std::size_t coordinate) {
m_Coordinates.push_back(coordinate);
return *this;
}
const CModelProbabilityParams::TSize2Vec& CModelProbabilityParams::coordinates() const {
return m_Coordinates;
}
CModelProbabilityParams& CModelProbabilityParams::mostAnomalousCorrelate(std::size_t correlate) {
m_MostAnomalousCorrelate.emplace(correlate);
return *this;
}
CModelProbabilityParams::TOptionalSize CModelProbabilityParams::mostAnomalousCorrelate() const {
return m_MostAnomalousCorrelate;
}
CModelProbabilityParams& CModelProbabilityParams::useMultibucketFeatures(bool use) {
m_UseMultibucketFeatures = use;
return *this;
}
bool CModelProbabilityParams::useMultibucketFeatures() const {
return m_UseMultibucketFeatures;
}
CModelProbabilityParams& CModelProbabilityParams::useAnomalyModel(bool use) {
m_UseAnomalyModel = use;
return *this;
}
bool CModelProbabilityParams::useAnomalyModel() const {
return m_UseAnomalyModel;
}
CModelProbabilityParams& CModelProbabilityParams::initialCountWeight(double initialCountWeight) {
m_InitialCountWeight = initialCountWeight;
return *this;
}
double CModelProbabilityParams::initialCountWeight() const {
return m_InitialCountWeight;
}
//////// SModelProbabilityResult::SFeatureProbability ////////
SModelProbabilityResult::SFeatureProbability::SFeatureProbability()
: s_Label{E_UndefinedProbability} {
}
SModelProbabilityResult::SFeatureProbability::SFeatureProbability(EFeatureProbabilityLabel label,
double probability)
: s_Label{label}, s_Probability{probability} {
}
//////// CModel ////////
const double CModel::DEFAULT_BOUNDS_PERCENTILE{95.0};
CModel::CModel(const CModelParams& params) : m_Params(params) {
}
double CModel::effectiveCount(std::size_t n) {
return n <= std::size(EFFECTIVE_COUNT) ? EFFECTIVE_COUNT[n - 1] : 0.5;
}
double CModel::emptyBucketWeight(double occupancy) {
// We smoothly transition to ignoring empty buckets when the bucket
// occupancy is less than 0.5.
return common::CTools::truncate(2.0 * occupancy, 1e-6, 1.0);
}
const CModelParams& CModel::params() const {
return m_Params;
}
CModelParams& CModel::params() {
return m_Params;
}
bool CModel::shouldPersist() const {
return true;
}
//////// CModelStub ////////
CModelStub::CModelStub() : CModel(stubParameters()) {
}
std::size_t CModelStub::identifier() const {
return 0;
}
CModelStub* CModelStub::clone(std::size_t /*id*/) const {
return new CModelStub(*this);
}
CModelStub* CModelStub::cloneForPersistence() const {
return new CModelStub(*this);
}
CModelStub* CModelStub::cloneForForecast() const {
return new CModelStub(*this);
}
bool CModelStub::isForecastPossible() const {
return false;
}
void CModelStub::modelCorrelations(time_series::CTimeSeriesCorrelations& /*model*/) {
}
CModelStub::TSize2Vec1Vec CModelStub::correlates() const {
return {};
}
CModelStub::TDouble2Vec CModelStub::mode(core_t::TTime /*time*/,
const TDouble2VecWeightsAry& /*weights*/) const {
return {};
}
CModelStub::TDouble2Vec1Vec
CModelStub::correlateModes(core_t::TTime /*time*/,
const TDouble2VecWeightsAry1Vec& /*weights*/) const {
return {};
}
CModelStub::TDouble2Vec1Vec
CModelStub::residualModes(const TDouble2VecWeightsAry& /*weights*/) const {
return {};
}
void CModelStub::addBucketValue(const TTimeDouble2VecSizeTrVec& /*value*/) {
}
CModelStub::EUpdateResult CModelStub::addSamples(const CModelAddSamplesParams& /*params*/,
TTimeDouble2VecSizeTrVec /*samples*/) {
return E_Success;
}
void CModelStub::skipTime(core_t::TTime /*gap*/) {
}
void CModelStub::detrend(const TTime2Vec1Vec& /*time*/,
double /*confidenceInterval*/,
TDouble2Vec1Vec& /*value*/) const {
}
CModelStub::TDouble2Vec CModelStub::predict(core_t::TTime /*time*/,
const TSizeDoublePr1Vec& /*correlated*/,
TDouble2Vec /*hint*/) const {
return {};
}
CModelStub::TDouble2Vec3Vec
CModelStub::confidenceInterval(core_t::TTime /*time*/,
double /*confidenceInterval*/,
const TDouble2VecWeightsAry& /*weights*/) const {
return {};
}
bool CModelStub::forecast(core_t::TTime /*firstDataTime*/,
core_t::TTime /*lastDataTime*/,
core_t::TTime /*startTime*/,
core_t::TTime /*endTime*/,
double /*confidenceInterval*/,
const TDouble2Vec& /*minimum*/,
const TDouble2Vec& /*maximum*/,
const TForecastPushDatapointFunc& /*forecastPushDataPointFunc*/,
std::string& /*messageOut*/) {
return true;
}
bool CModelStub::probability(const CModelProbabilityParams& /*params*/,
const TTime2Vec1Vec& /*time*/,
const TDouble2Vec1Vec& /*value*/,
SModelProbabilityResult& result) const {
result = SModelProbabilityResult{};
return true;
}
void CModelStub::countWeights(core_t::TTime /*time*/,
const TDouble2Vec& /*value*/,
double /*trendCountWeight*/,
double /*residualCountWeight*/,
double /*outlierWeightDerate*/,
double /*countVarianceScale*/,
TDouble2VecWeightsAry& /*trendWeights*/,
TDouble2VecWeightsAry& /*residualWeights*/) const {
}
void CModelStub::addCountWeights(core_t::TTime /*time*/,
double /*trendCountWeight*/,
double /*residualCountWeight*/,
double /*countVarianceScale*/,
TDouble2VecWeightsAry& /*trendWeights*/,
TDouble2VecWeightsAry& /*residualWeights*/) const {
}
void CModelStub::seasonalWeight(double /*confidence*/,
core_t::TTime /*time*/,
TDouble2Vec& /*weight*/) const {
}
std::uint64_t CModelStub::checksum(std::uint64_t seed) const {
return seed;
}
void CModelStub::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& /*mem*/) const {
}
std::size_t CModelStub::memoryUsage() const {
return 0;
}
void CModelStub::acceptPersistInserter(core::CStatePersistInserter& /*inserter*/) const {
}
void CModelStub::persistModelsState(core::CStatePersistInserter& /*inserter*/) const {
}
maths_t::EDataType CModelStub::dataType() const {
return maths_t::E_MixedData;
}
bool CModelStub::shouldPersist() const {
return false;
}
void CModelStub::shiftTime(core_t::TTime /*time*/, core_t::TTime /*shift*/) {
}
}
}
}