include/model/CAnomalyDetectorModel.h (322 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #ifndef INCLUDED_ml_model_CAnomalyDetectorModel_h #define INCLUDED_ml_model_CAnomalyDetectorModel_h #include <core/CMemoryUsage.h> #include <core/CProgramCounters.h> #include <core/CSmallVector.h> #include <core/CoreTypes.h> #include <maths/time_series/CTimeSeriesModel.h> #include <model/CAnnotation.h> #include <model/ImportExport.h> #include <model/ModelTypes.h> #include <model/SModelParams.h> #include <boost/unordered_map.hpp> #include <cstdint> #include <functional> #include <limits> #include <memory> #include <optional> #include <string> #include <utility> #include <vector> namespace ml { namespace core { class CStatePersistInserter; class CStateRestoreTraverser; } namespace maths { namespace common { class CMultivariatePrior; } } namespace model { class CAnnotation; class CAttributeFrequencyGreaterThan; class CDataGatherer; class CHierarchicalResults; class CInterimBucketCorrector; class CMemoryUsageEstimator; class CModelDetailsView; class CPartitioningFields; class CPersonFrequencyGreaterThan; class CResourceMonitor; struct SAnnotatedProbability; struct SAttributeProbability; //! \brief The model interface. //! //! DESCRIPTION:\n //! This defines the interface common to all (statistical) models of //! the (random) processes which describe system state. It declares //! core functions used by the anomaly detection code to: //! -# Retrieve information about the categories and people of the //! processes being modeled. //! -# Sample the processes in a specified time interval and update //! the model. //! -# Manage the model life-cycle. //! -# Compute the probability of the samples of the process in a //! specified time interval. //! //! The raw events can be partitioned by attribute and/or person (for //! population analysis). These are just two labels which can be //! annotated on the events and induce equivalence relations on the //! set of all events. The events in subsets comprise (some of) //! the raw events for (one of) the processes we model. For example, //! in temporal analysis we would model the history of all events //! for which the labels are equal for each distinct value of the //! label. //! //! There are two main types of analysis: //! -# Individual analysis. //! -# Population analysis. //! //! Individual analysis looks at the historical values of various //! features on a single time series' events and detects significant //! changes in those values. Population analysis looks at similar //! features, but on a whole collection of processes in conjunction //! (induced by the person label equivalence relation). The concrete //! implementations of this class include more detailed descriptions. //! This object also maintains the state to find the most correlated //! pairs of time series. //! //! The extraction of the features from the raw process events is //! managed by a separate object. These include a number of simple //! statistics such as the count of events in a time interval, the //! mean of a certain number of event values, the minimum of a //! certain number of event values and so on. (See model::CDataGatherer //! for more details.) //! //! The model hierarchy is also able to compare two time intervals //! in which case a model really comprises two distinct models of the //! underlying random process one for each time interval: see the //! computeProbability for more details. //! //! IMPLEMENTATION DECISIONS:\n //! The model hierarchy has been abstracted to allow the code to detect //! anomalies to be reused for different types of data, log messages, //! metrics, etc, to perform different types of analysis on that data, //! and to handle the case that data are continuously streamed to the //! object or the case that two different data sets are to be compared. //! //! All models can be serialized to/from text representation. //! //! The hierarchy is non-copyable because we don't currently need to be //! able to copy models and the "correct" copy semantics are not obvious. class MODEL_EXPORT CAnomalyDetectorModel { friend class CModelDetailsView; public: using TSizeVec = std::vector<std::size_t>; using TDoubleVec = std::vector<double>; using TDouble1Vec = core::CSmallVector<double, 1>; using TDouble10Vec = core::CSmallVector<double, 10>; using TDouble10Vec1Vec = core::CSmallVector<TDouble10Vec, 1>; using TDouble1VecDoublePr = std::pair<TDouble1Vec, double>; using TDouble1VecDouble1VecPr = std::pair<TDouble1Vec, TDouble1Vec>; using TSizeDoublePr = std::pair<std::size_t, double>; using TSizeDoublePr1Vec = core::CSmallVector<TSizeDoublePr, 1>; using TSize1Vec = core::CSmallVector<std::size_t, 1>; using TSize2Vec = core::CSmallVector<std::size_t, 2>; using TSize2Vec1Vec = core::CSmallVector<TSize2Vec, 1>; using TDoubleDoublePr = std::pair<double, double>; using TDoubleDoublePrVec = std::vector<TDoubleDoublePr>; using TSizeSizePr = std::pair<std::size_t, std::size_t>; using TStr1Vec = core::CSmallVector<std::string, 1>; using TOptionalDouble = std::optional<double>; using TOptionalDoubleVec = std::vector<TOptionalDouble>; using TOptionalUInt64 = std::optional<std::uint64_t>; using TOptionalSize = std::optional<std::size_t>; using TAttributeProbability1Vec = core::CSmallVector<SAttributeProbability, 1>; using TInfluenceCalculatorCPtr = std::shared_ptr<const CInfluenceCalculator>; using TFeatureInfluenceCalculatorCPtrPr = std::pair<model_t::EFeature, TInfluenceCalculatorCPtr>; using TFeatureInfluenceCalculatorCPtrPrVec = std::vector<TFeatureInfluenceCalculatorCPtrPr>; using TFeatureInfluenceCalculatorCPtrPrVecVec = std::vector<TFeatureInfluenceCalculatorCPtrPrVec>; using TMathsModelSPtr = std::shared_ptr<maths::common::CModel>; using TFeatureMathsModelSPtrPr = std::pair<model_t::EFeature, TMathsModelSPtr>; using TFeatureMathsModelSPtrPrVec = std::vector<TFeatureMathsModelSPtrPr>; using TMathsModelUPtr = std::unique_ptr<maths::common::CModel>; using TMathsModelUPtrVec = std::vector<TMathsModelUPtr>; using TMultivariatePriorSPtr = std::shared_ptr<maths::common::CMultivariatePrior>; using TFeatureMultivariatePriorSPtrPr = std::pair<model_t::EFeature, TMultivariatePriorSPtr>; using TFeatureMultivariatePriorSPtrPrVec = std::vector<TFeatureMultivariatePriorSPtrPr>; using TCorrelationsPtr = std::unique_ptr<maths::time_series::CTimeSeriesCorrelations>; using TFeatureCorrelationsPtrPr = std::pair<model_t::EFeature, TCorrelationsPtr>; using TFeatureCorrelationsPtrPrVec = std::vector<TFeatureCorrelationsPtrPr>; using TDataGathererPtr = std::shared_ptr<CDataGatherer>; using TModelDetailsViewUPtr = std::unique_ptr<CModelDetailsView>; using TModelPtr = std::unique_ptr<CAnomalyDetectorModel>; using TAnnotationVec = std::vector<CAnnotation>; public: //! A value used to indicate a time variable is unset static const core_t::TTime TIME_UNSET; public: //! \name Life-cycle. //@{ //! \param[in] params The global configuration parameters. //! \param[in] dataGatherer The object that gathers time series data. //! \param[in] influenceCalculators The influence calculators to use //! for each feature. CAnomalyDetectorModel(const SModelParams& params, const TDataGathererPtr& dataGatherer, const TFeatureInfluenceCalculatorCPtrPrVecVec& influenceCalculators); //! Create a copy that will result in the same persisted state as the //! original. This is effectively a copy constructor that creates a //! copy that's only valid for a single purpose. The boolean flag is //! redundant except to create a signature that will not be mistaken for //! a general purpose copy constructor. CAnomalyDetectorModel(bool isForPersistence, const CAnomalyDetectorModel& other); virtual ~CAnomalyDetectorModel() = default; CAnomalyDetectorModel& operator=(const CAnomalyDetectorModel&) = delete; //@} //! Get a human understandable description of the model for debugging. std::string description() const; //! \name Persistence //@{ //! Persist the state of the models. virtual void persistModelsState(core::CStatePersistInserter& inserter) const = 0; //! Should the model be persisted? virtual bool shouldPersist() const = 0; //! Persist state by passing information to the supplied inserter. virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; //! Restore the model reading state from the supplied traverser. virtual bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) = 0; //! Create a clone of this model that will result in the same persisted //! state. The clone may be incomplete in ways that do not affect the //! persisted representation, and must not be used for any other //! purpose. //! \warning The caller owns the object returned. virtual CAnomalyDetectorModel* cloneForPersistence() const = 0; //@} //! Get the model category. virtual model_t::EModelType category() const = 0; //! True if this is a population model. virtual bool isPopulation() const = 0; //! Check if this is an event rate model. virtual bool isEventRate() const = 0; //! Check if this is a metric model. virtual bool isMetric() const = 0; //! \name Bucket Statistics //!@{ //! Get the count of the bucketing interval containing \p time //! for the person identified by \p pid. //! //! \param[in] pid The identifier of the person of interest. //! \param[in] time The time of interest. //! \return The count in the bucketing interval at \p time for the //! person identified by \p pid if available and null otherwise. virtual TOptionalUInt64 currentBucketCount(std::size_t pid, core_t::TTime time) const = 0; //! Get the mean count of the person identified by \p pid in the //! reference data set (for comparison). //! //! \param[in] pid The identifier of the person of interest. virtual TOptionalDouble baselineBucketCount(std::size_t pid) const = 0; //! Get the bucket value of \p feature for the person identified //! by \p pid and the attribute identified by \p cid in the //! bucketing interval including \p time. //! //! \param[in] feature The feature of interest. //! \param[in] pid The identifier of the person of interest. //! \param[in] cid The identifier of the attribute of interest. //! \param[in] time The time of interest. //! \return The value of \p feature in the bucket containing //! \p time if available and empty otherwise. virtual TDouble1Vec currentBucketValue(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const = 0; //! Get the appropriate baseline bucket value of \p feature for //! the person identified by \p pid and the attribute identified //! by \p cid as of the start of the current bucketing interval. //! This has subtly different meanings dependent on the model. //! //! \param[in] feature The feature of interest. //! \param[in] pid The identifier of the person of interest. //! \param[in] cid The identifier of the attribute of interest. //! \param[in] type A description of the type of result for which //! to get the baseline. See CResultType for more details. //! \param[in] correlated The correlated series' identifiers and //! their values if any. //! \param[in] time The time of interest. //! \return The baseline mean value of \p feature if available //! and empty otherwise. virtual TDouble1Vec baselineBucketMean(model_t::EFeature feature, std::size_t pid, std::size_t cid, model_t::CResultType type, const TSizeDoublePr1Vec& correlated, core_t::TTime time) const = 0; //! Check if bucket statistics are available for the specified time. virtual bool bucketStatsAvailable(core_t::TTime time) const = 0; //@} //! \name Person //@{ //! Get the name of the person identified by \p pid. This returns //! a default fallback string if the person doesn't exist. const std::string& personName(std::size_t pid) const; //! As above but with a specified fallback. const std::string& personName(std::size_t pid, const std::string& fallback) const; //! Print the people identified by \p pids. //! Optionally, this may be limited to return a string of the form: //! A B C and n others std::string printPeople(const TSizeVec& pids, size_t limit = std::numeric_limits<size_t>::max()) const; //! Get the person unique identifiers which have a feature value //! in the bucketing time interval including \p time. //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the person identifiers //! in the bucketing time interval of interest. virtual void currentBucketPersonIds(core_t::TTime time, TSizeVec& result) const = 0; // TODO this needs to be renamed to numberOfActivePeople, and // the places where it is used carefully checked // (currently only CModelInspector) //! Get the total number of people currently being modeled. std::size_t numberOfPeople() const; //@} //! \name Attribute //@{ //! Get the name of the attribute identified by \p cid. This returns //! a default fallback string if the attribute doesn't exist. //! //! \param[in] cid The identifier of the attribute of interest. const std::string& attributeName(std::size_t cid) const; //! As above but with a specified fallback. const std::string& attributeName(std::size_t cid, const std::string& fallback) const; //! Print the attributes identified by \p cids. //! Optionally, this may be limited to return a string of the form: //! A B C and n others std::string printAttributes(const TSizeVec& cids, size_t limit = std::numeric_limits<size_t>::max()) const; //@} //! \name Update //@{ //! This samples the bucket statistics, and any state needed //! by computeProbablity, in the time interval [\p startTime, //! \p endTime], but does not update the model. This is needed //! by the results preview. //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. virtual void sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; //! Update the model with the samples of the process in the //! time interval [\p startTime, \p endTime]. //! //! \param[in] startTime The start of the time interval to sample. //! \param[in] endTime The end of the time interval to sample. //! \param[in] resourceMonitor The resourceMonitor. virtual void sample(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) = 0; //! Rolls time to \p endTime while skipping sampling the models for //! buckets within the gap. //! //! \param[in] endTime The end of the time interval to skip sampling. void skipSampling(core_t::TTime endTime); //! Prune any person models which haven't been updated for a //! specified period. virtual void prune(std::size_t maximumAge) = 0; //! Prune any person models which haven't been updated for a //! sufficiently long period, based on the prior decay rates. void prune(); //! Calculate the maximum permitted prune window (measured in buckets) //! for this model std::size_t defaultPruneWindow() const; //! Calculate the minimum permitted prune window (measured in buckets) //! for this model std::size_t minimumPruneWindow() const; //@} //! \name Probability //@{ //! Compute the probability of seeing the samples of the process //! for the person identified by \p pid in the time interval //! [\p startTime, \p endTime]. //! //! \param[in] pid The unique identifier of the person of interest. //! \param[in] startTime The start of the time interval of interest. //! \param[in] endTime The end of the time interval of interest. //! \param[in] partitioningFields The partitioning field (name, value) //! pairs for which to compute the the probability. //! \param[in] numberAttributeProbabilities The maximum number of //! attribute probabilities to retrieve. //! \param[out] result A structure containing the probability, //! the smallest \p numberAttributeProbabilities attribute //! probabilities, the influences and any extra descriptive data. virtual bool computeProbability(std::size_t pid, core_t::TTime startTime, core_t::TTime endTime, CPartitioningFields& partitioningFields, std::size_t numberAttributeProbabilities, SAnnotatedProbability& result) const = 0; //! Update the results with this model's probability. //! //! \param[in] startTime The start of the time interval of interest. //! \param[in] endTime The end of the time interval of interest. //! \param[in] numberAttributeProbabilities The maximum number of //! attribute probabilities to retrieve. //! \param[in,out] results The model results are added. bool addResults(core_t::TTime startTime, core_t::TTime endTime, std::size_t numberAttributeProbabilities, CHierarchicalResults& results) const; //! Compute the probability of seeing \p person's attribute processes //! so far given the population distributions. //! //! \param[in] person The person of interest. //! \param[in] numberAttributeProbabilities The maximum number of //! attribute probabilities to retrieve. //! \param[out] probability Filled in with the probability of seeing //! the person's processes given the population processes. //! \param[out] attributeProbabilities Filled in with the smallest //! \p numberAttributeProbabilities attribute probabilities and //! associated data describing the calculation. virtual bool computeTotalProbability(const std::string& person, std::size_t numberAttributeProbabilities, TOptionalDouble& probability, TAttributeProbability1Vec& attributeProbabilities) const = 0; //@} //! Get the checksum of this model. //! //! \param[in] includeCurrentBucketStats If true then include //! the current bucket statistics. (This is designed to handle //! serialization, for which we don't serialize the current //! bucket statistics.) virtual std::uint64_t checksum(bool includeCurrentBucketStats = true) const = 0; //! Get the memory used by this model virtual void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const = 0; //! Get the memory used by this model virtual std::size_t memoryUsage() const = 0; //! Estimate the memory usage of the model based on number of people, //! attributes and correlations. Returns empty when the estimator //! is unable to produce an estimate. TOptionalSize estimateMemoryUsage(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations) const; //! Estimate the memory usage of the model based on number of people, //! attributes and correlations. When an estimate cannot be produced, //! the memory usage is computed and the estimator is updated. std::size_t estimateMemoryUsageOrComputeAndUpdate(std::size_t numberPeople, std::size_t numberAttributes, std::size_t numberCorrelations); //! Get the static size of this object - used for virtual hierarchies virtual std::size_t staticSize() const = 0; //! Get the time series data gatherer. const CDataGatherer& dataGatherer() const; //! Get the time series data gatherer. CDataGatherer& dataGatherer(); //! Get the length of the time interval used to aggregate data. core_t::TTime bucketLength() const; //! Get a view of the internals of the model for visualization. virtual TModelDetailsViewUPtr details() const = 0; //! Get the frequency of the person identified by \p pid. double personFrequency(std::size_t pid) const; //! Get the frequency of the attribute identified by \p cid. virtual double attributeFrequency(std::size_t cid) const = 0; //! Returns true if the the \p is an unset first bucket time static bool isTimeUnset(core_t::TTime); //! Get the descriptions of any occurring scheduled event descriptions for the bucket time virtual const TStr1Vec& scheduledEventDescriptions(core_t::TTime time) const; //! Get the annotations produced by this model. virtual const TAnnotationVec& annotations() const = 0; //! Apply time shift at the time \p time by \p shift amount of seconds. virtual void shiftTime(core_t::TTime time, core_t::TTime shift) = 0; //! Check if the rule has been applied. bool checkRuleApplied(const CDetectionRule& rule) const; //! Mark the rule as applied. void markRuleApplied(const CDetectionRule& rule); protected: using TStrCRef = std::reference_wrapper<const std::string>; using TSizeSize1VecUMap = boost::unordered_map<std::size_t, TSize1Vec>; using TFeatureSizeSize1VecUMapPr = std::pair<model_t::EFeature, TSizeSize1VecUMap>; using TFeatureSizeSize1VecUMapPrVec = std::vector<TFeatureSizeSize1VecUMapPr>; using TUint64TTimePr = std::pair<std::uint64_t, core_t::TTime>; using TUint64TTimePrVec = std::vector<TUint64TTimePr>; //! \brief The feature models. struct MODEL_EXPORT SFeatureModels { SFeatureModels(model_t::EFeature feature, TMathsModelSPtr newModel); SFeatureModels(const SFeatureModels&) = delete; SFeatureModels& operator=(const SFeatureModels&) = delete; SFeatureModels(SFeatureModels&&) = default; SFeatureModels& operator=(SFeatureModels&&) = default; //! Restore the models reading state from \p traverser. bool acceptRestoreTraverser(const SModelParams& params, core::CStateRestoreTraverser& traverser); //! Persist the models passing state to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Persist the state of the residual models only. void persistModelsState(core::CStatePersistInserter& inserter) const; //! Debug the memory used by this model. void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; //! Get the memory used by this model. std::size_t memoryUsage() const; //! Determine whether the model should be persisted or not. bool shouldPersist() const; //! The feature. model_t::EFeature s_Feature; //! A prototype model. TMathsModelSPtr s_NewModel; //! The person models. TMathsModelUPtrVec s_Models; }; using TFeatureModelsVec = std::vector<SFeatureModels>; //! \brief The feature correlate models. struct MODEL_EXPORT SFeatureCorrelateModels { SFeatureCorrelateModels(model_t::EFeature feature, const TMultivariatePriorSPtr& modelPrior, TCorrelationsPtr&& model); ~SFeatureCorrelateModels(); SFeatureCorrelateModels(const SFeatureCorrelateModels&) = delete; SFeatureCorrelateModels& operator=(const SFeatureCorrelateModels&) = delete; SFeatureCorrelateModels(SFeatureCorrelateModels&&); SFeatureCorrelateModels& operator=(SFeatureCorrelateModels&&); //! Restore the models reading state from \p traverser. bool acceptRestoreTraverser(const SModelParams& params, core::CStateRestoreTraverser& traverser); //! Persist the models passing state to \p inserter. void acceptPersistInserter(core::CStatePersistInserter& inserter) const; //! Debug the memory used by this model. void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; //! Get the memory used by this model. std::size_t memoryUsage() const; //! The feature. model_t::EFeature s_Feature; //! The prototype prior for a correlate model. TMultivariatePriorSPtr s_ModelPrior; //! The correlate models. TCorrelationsPtr s_Models; }; using TFeatureCorrelateModelsVec = std::vector<SFeatureCorrelateModels>; //! \brief Implements the allocator for new correlate priors. class CTimeSeriesCorrelateModelAllocator : public maths::time_series::CTimeSeriesCorrelateModelAllocator { public: using TMemoryUsage = std::function<std::size_t(std::size_t)>; using TMultivariatePriorUPtr = TMultivariatePriorPtr; public: CTimeSeriesCorrelateModelAllocator(CResourceMonitor& resourceMonitor, TMemoryUsage memoryUsage, std::size_t resourceLimit, std::size_t maxNumberCorrelations); //! Check if we can still allocate any correlations. bool areAllocationsAllowed() const override; //! Check if \p correlations exceeds the memory limit. bool exceedsLimit(std::size_t correlations) const override; //! Get the maximum number of correlations we should model. std::size_t maxNumberCorrelations() const override; //! Get the chunk size in which to allocate correlations. std::size_t chunkSize() const override; //! Create a new prior for a correlation model. TMultivariatePriorUPtr newPrior() const override; //! Set the prototype prior. void prototypePrior(const TMultivariatePriorSPtr& prior); private: //! The global resource monitor. CResourceMonitor* m_ResourceMonitor; //! Computes the current memory usage. TMemoryUsage m_MemoryUsage; //! The number of correlations which can still be modeled. std::size_t m_ResourceLimit; //! The maximum permitted number of correlations which can be modeled. std::size_t m_MaxNumberCorrelations; //! The prototype correlate prior. TMultivariatePriorSPtr m_PrototypePrior; }; protected: //! The maximum time a person or attribute is allowed to live //! without update. static const std::size_t MAXIMUM_PERMITTED_AGE; //! Convenience for persistence. static const std::string EMPTY_STRING; protected: //! Remove heavy hitting people from the \p data if necessary. template<typename T, typename FILTER> void applyFilter(model_t::EExcludeFrequent exclude, bool updateStatistics, const FILTER& filter, T& data) const { if (this->params().s_ExcludeFrequent & exclude) { std::size_t initialSize = data.size(); data.erase(std::remove_if(data.begin(), data.end(), filter), data.end()); if (updateStatistics && data.size() != initialSize) { ++core::CProgramCounters::counter(counter_t::E_TSADNumberExcludedFrequentInvocations); } } } //! Get the predicate used for removing heavy hitting people. CPersonFrequencyGreaterThan personFilter() const; //! Get the predicate used for removing heavy hitting attributes. CAttributeFrequencyGreaterThan attributeFilter() const; //! Get the global configuration parameters. const SModelParams& params() const; //! Get the LearnRate parameter from the model configuration - //! this may be affected by the current feature being used virtual double learnRate(model_t::EFeature feature) const; //! Get the start time of the current bucket. virtual core_t::TTime currentBucketStartTime() const = 0; //! Set the start time of the current bucket. virtual void currentBucketStartTime(core_t::TTime time) = 0; //! Get the influence calculator for the influencer field identified //! by \p iid and the \p feature. const CInfluenceCalculator* influenceCalculator(model_t::EFeature feature, std::size_t iid) const; //! Get the person bucket counts. const TDoubleVec& personBucketCounts() const; //! Writable access to the person bucket counts. TDoubleVec& personBucketCounts(); //! Set the total count of buckets in the window. void windowBucketCount(double windowBucketCount); //! Get the total count of buckets in the window. double windowBucketCount() const; //! Create the time series models for "n" newly observed people //! and "m" newly observed attributes. virtual void createNewModels(std::size_t n, std::size_t m) = 0; //! Reinitialize the time series models for recycled people and/or //! attributes. virtual void updateRecycledModels() = 0; //! Clear out large state objects for people/attributes that are pruned virtual void clearPrunedResources(const TSizeVec& people, const TSizeVec& attributes) = 0; //! Get the object which calculates corrections for interim buckets. virtual const CInterimBucketCorrector& interimValueCorrector() const = 0; //! Get the value of the initial count weight to apply to the model's //! samples, as determined by the detection rules. double initialCountWeight(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; //! Should the event be omitted from the quantiles and the results? bool shouldSkipUpdate(model_t::EFeature feature, std::size_t pid, std::size_t cid, core_t::TTime time) const; //! Check if any of the result-filtering detection rules apply to this series. bool shouldIgnoreResult(model_t::EFeature feature, const model_t::CResultType& resultType, std::size_t pid, std::size_t cid, core_t::TTime time) const; //! Get the non-estimated value of the the memory used by this model. virtual std::size_t computeMemoryUsage() const = 0; //! Create a stub version of maths::common::CModel for use when pruning people //! or attributes to free memory resource. static maths::common::CModel* tinyModel(); //! Add an annotation to the model. virtual void addAnnotation(core_t::TTime, CAnnotation::EEvent type, const std::string& annotation) = 0; TUint64TTimePrVec& appliedRuleChecksums(); const TUint64TTimePrVec& appliedRuleChecksums() const; private: using TModelParamsCRef = std::reference_wrapper<const SModelParams>; private: //! Skip sampling the interval \p endTime - \p startTime. virtual void doSkipSampling(core_t::TTime startTime, core_t::TTime endTime) = 0; //! Get the model memory usage estimator virtual CMemoryUsageEstimator* memoryUsageEstimator() const = 0; private: //! The global configuration parameters. TModelParamsCRef m_Params; //! The data gatherer. (This is not persisted by the model hierarchy.) TDataGathererPtr m_DataGatherer; //! The bucket count of each person in the exponentially decaying //! window with decay rate equal to m_DecayRate. TDoubleVec m_PersonBucketCounts; //! The total number of buckets in the exponentially decaying window //! with decay rate equal to m_DecayRate. double m_BucketCount; //! The influence calculators to use for each feature which is being //! modeled. TFeatureInfluenceCalculatorCPtrPrVecVec m_InfluenceCalculators; //! Checksums of the rules that should be applied only once. TUint64TTimePrVec m_AppliedRuleChecksums; }; class CMemoryCircuitBreaker : public core::CMemoryCircuitBreaker { public: explicit CMemoryCircuitBreaker(CResourceMonitor& resourceMonitor) : m_ResourceMonitor{&resourceMonitor} {} bool areAllocationsAllowed() const override; private: CResourceMonitor* m_ResourceMonitor; }; } } #endif // INCLUDED_ml_model_CAnomalyDetectorModel_h