lib/model/CAnomalyDetectorModelConfig.cc (859 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <model/CAnomalyDetectorModelConfig.h> #include <core/CContainerPrinter.h> #include <core/CStreamUtils.h> #include <core/Constants.h> #include <maths/common/CModel.h> #include <maths/common/COrderings.h> #include <maths/common/CTools.h> #include <maths/common/Constants.h> #include <model/CCountingModelFactory.h> #include <model/CEventRateModelFactory.h> #include <model/CEventRatePopulationModelFactory.h> #include <model/CInterimBucketCorrector.h> #include <model/CMetricModelFactory.h> #include <model/CMetricPopulationModelFactory.h> #include <model/CSearchKey.h> #include <model/FunctionTypes.h> #include <boost/property_tree/ini_parser.hpp> #include <boost/property_tree/ptree.hpp> #include <algorithm> #include <fstream> namespace ml { namespace model { namespace { const CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMap EMPTY_RULES_MAP; const CAnomalyDetectorModelConfig::TStrDetectionRulePrVec EMPTY_EVENTS; namespace detail { core_t::TTime validateBucketLength(core_t::TTime length) { // A zero or negative length is used by the individual commands to request // the default length - this avoids the need for the commands to know the // default length return length <= 0 ? CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH : length; } } } const std::string CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER(","); const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH(300); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS(0); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY(1); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY(10); const double CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR(0.1); const core_t::TTime CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH(1800); const double CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE(0.0005); const double CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER(4.0); const double CAnomalyDetectorModelConfig::DEFAULT_LEARN_RATE(1.0); const double CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION(0.05); const double CAnomalyDetectorModelConfig::DEFAULT_POPULATION_MINIMUM_MODE_FRACTION(0.05); const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT(12.0); const double CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION(0.8); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE(36u); const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(core::constants::DAY); const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_TIME_TO_TEST_FOR_CHANGE(2 * core::constants::DAY); const std::size_t CAnomalyDetectorModelConfig::MULTIBUCKET_FEATURES_WINDOW_LENGTH(12); const double CAnomalyDetectorModelConfig::MAXIMUM_MULTI_BUCKET_IMPACT_MAGNITUDE(5.0); const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET(1.0); const double CAnomalyDetectorModelConfig::DEFAULT_INFLUENCE_CUTOFF(0.4); const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM(0.25); const double CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM(4.0); const double CAnomalyDetectorModelConfig::DEFAULT_CORRELATION_MODELS_OVERHEAD(3.0); const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_SIGNIFICANT_CORRELATION(0.3); const double CAnomalyDetectorModelConfig::DEFAULT_AGGREGATION_STYLE_PARAMS[][model_t::NUMBER_AGGREGATION_PARAMS] = {{0.0, 1.0, 1.0, 1.0}, {0.5, 0.5, 1.0, 5.0}, {0.5, 0.5, 1.0, 1.0}}; // The default for maximumanomalousprobability now matches the default // for unusualprobabilitythreshold in mllimits.conf - this avoids // inconsistencies in output const double CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY(0.035); const double CAnomalyDetectorModelConfig::DEFAULT_NOISE_PERCENTILE(50.0); const double CAnomalyDetectorModelConfig::DEFAULT_NOISE_MULTIPLIER(1.0); const CAnomalyDetectorModelConfig::TDoubleDoublePr CAnomalyDetectorModelConfig::DEFAULT_NORMALIZED_SCORE_KNOT_POINTS[9] = { CAnomalyDetectorModelConfig::TDoubleDoublePr(0.0, 0.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(70.0, 1.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(85.0, 1.2), CAnomalyDetectorModelConfig::TDoubleDoublePr(90.0, 1.5), CAnomalyDetectorModelConfig::TDoubleDoublePr(95.0, 3.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(97.0, 20.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(99.0, 50.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(99.9, 90.0), CAnomalyDetectorModelConfig::TDoubleDoublePr(100.0, 100.0)}; CAnomalyDetectorModelConfig CAnomalyDetectorModelConfig::defaultConfig(core_t::TTime bucketLength, model_t::ESummaryMode summaryMode, const std::string& summaryCountFieldName, core_t::TTime latency, bool multivariateByFields) { bucketLength = detail::validateBucketLength(bucketLength); double learnRate = DEFAULT_LEARN_RATE * bucketNormalizationFactor(bucketLength); double decayRate = DEFAULT_DECAY_RATE * bucketNormalizationFactor(bucketLength); SModelParams params(bucketLength); params.s_LearnRate = learnRate; params.s_DecayRate = decayRate; params.s_ExcludeFrequent = model_t::E_XF_None; params.configureLatency(latency, bucketLength); TInterimBucketCorrectorPtr interimBucketCorrector = std::make_shared<CInterimBucketCorrector>(bucketLength); TFactoryTypeFactoryPtrMap factories; params.s_MinimumModeFraction = DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTION; factories[E_EventRateFactory] = std::make_shared<CEventRateModelFactory>( params, interimBucketCorrector, summaryMode, summaryCountFieldName); factories[E_MetricFactory] = std::make_shared<CMetricModelFactory>( params, interimBucketCorrector, summaryMode, summaryCountFieldName); factories[E_EventRatePopulationFactory] = std::make_shared<CEventRatePopulationModelFactory>( params, interimBucketCorrector, summaryMode, summaryCountFieldName); params.s_MinimumModeFraction = DEFAULT_POPULATION_MINIMUM_MODE_FRACTION; factories[E_MetricPopulationFactory] = std::make_shared<CMetricPopulationModelFactory>( params, interimBucketCorrector, summaryMode, summaryCountFieldName); params.s_MinimumModeFraction = 1.0; factories[E_CountingFactory] = std::make_shared<CCountingModelFactory>( params, interimBucketCorrector, summaryMode, summaryCountFieldName); CAnomalyDetectorModelConfig result; result.bucketLength(bucketLength); result.interimBucketCorrector(interimBucketCorrector); result.multivariateByFields(multivariateByFields); result.factories(factories); return result; } // De-rates the decay and learn rate to account for differences from the // standard bucket length. double CAnomalyDetectorModelConfig::bucketNormalizationFactor(core_t::TTime bucketLength) { return std::min(1.0, static_cast<double>(bucketLength) / static_cast<double>(STANDARD_BUCKET_LENGTH)); } // Standard decay rate for time series decompositions given the specified // model decay rate and bucket length. double CAnomalyDetectorModelConfig::trendDecayRate(double modelDecayRate, core_t::TTime bucketLength) { double scale = static_cast<double>(bucketLength / 24 / STANDARD_BUCKET_LENGTH); return std::min(48.0 * modelDecayRate / bucketNormalizationFactor(bucketLength) / std::max(scale, 1.0), 0.1); } CAnomalyDetectorModelConfig::CAnomalyDetectorModelConfig() : m_BucketLength(STANDARD_BUCKET_LENGTH), m_MultivariateByFields(false), m_ModelPlotBoundsPercentile(-1.0), m_MaximumAnomalousProbability(DEFAULT_MAXIMUM_ANOMALOUS_PROBABILITY), m_NoisePercentile(DEFAULT_NOISE_PERCENTILE), m_NoiseMultiplier(DEFAULT_NOISE_MULTIPLIER), m_NormalizedScoreKnotPoints(std::begin(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS), std::end(DEFAULT_NORMALIZED_SCORE_KNOT_POINTS)), m_DetectionRules(EMPTY_RULES_MAP), m_ScheduledEvents(EMPTY_EVENTS) { for (std::size_t i = 0; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { m_AggregationStyleParams[i][j] = DEFAULT_AGGREGATION_STYLE_PARAMS[i][j]; } } } void CAnomalyDetectorModelConfig::bucketLength(core_t::TTime length) { m_BucketLength = length; for (auto& factory : m_Factories) { factory.second->updateBucketLength(length); } } void CAnomalyDetectorModelConfig::interimBucketCorrector(const TInterimBucketCorrectorPtr& interimBucketCorrector) { m_InterimBucketCorrector = interimBucketCorrector; for (auto& factory : m_Factories) { factory.second->interimBucketCorrector(m_InterimBucketCorrector); } } void CAnomalyDetectorModelConfig::useMultibucketFeatures(bool enabled) { for (auto& factory : m_Factories) { factory.second->multibucketFeaturesWindowLength( enabled ? MULTIBUCKET_FEATURES_WINDOW_LENGTH : 0); } } void CAnomalyDetectorModelConfig::multivariateByFields(bool enabled) { m_MultivariateByFields = enabled; } void CAnomalyDetectorModelConfig::factories(const TFactoryTypeFactoryPtrMap& factories) { m_Factories = factories; } bool CAnomalyDetectorModelConfig::aggregationStyleParams(model_t::EAggregationStyle style, model_t::EAggregationParam param, double value) { switch (param) { case model_t::E_JointProbabilityWeight: if (value < 0.0 || value > 1.0) { LOG_ERROR(<< "joint probability weight " << value << " out of in range [0,1]"); return false; } m_AggregationStyleParams[style][model_t::E_JointProbabilityWeight] = value; break; case model_t::E_ExtremeProbabilityWeight: if (value < 0.0 || value > 1.0) { LOG_ERROR(<< "extreme probability weight " << value << " out of in range [0,1]"); return false; } m_AggregationStyleParams[style][model_t::E_ExtremeProbabilityWeight] = value; break; case model_t::E_MinExtremeSamples: if (value < 1.0 || value > 10.0) { LOG_ERROR(<< "min extreme samples " << value << " out of in range [0,10]"); return false; } m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = value; m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = std::max( value, m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples]); break; case model_t::E_MaxExtremeSamples: if (value < 1.0 || value > 10.0) { LOG_ERROR(<< "max extreme samples " << value << " out of in range [0,10]"); return false; } m_AggregationStyleParams[style][model_t::E_MaxExtremeSamples] = value; m_AggregationStyleParams[style][model_t::E_MinExtremeSamples] = std::min( value, m_AggregationStyleParams[style][model_t::E_MinExtremeSamples]); break; } return true; } void CAnomalyDetectorModelConfig::maximumAnomalousProbability(double probability) { double minimum = 100 * maths::common::MINUSCULE_PROBABILITY; if (probability < minimum || probability > 1.0) { LOG_INFO(<< "Maximum anomalous probability " << probability << " out of range [" << minimum << "," << 1.0 << "] truncating"); } m_MaximumAnomalousProbability = maths::common::CTools::truncate(probability, minimum, 1.0); } bool CAnomalyDetectorModelConfig::noisePercentile(double percentile) { if (percentile < 0.0 || percentile > 100.0) { LOG_ERROR(<< "Noise percentile " << percentile << " out of range [0, 100]"); return false; } m_NoisePercentile = percentile; return true; } bool CAnomalyDetectorModelConfig::noiseMultiplier(double multiplier) { if (multiplier <= 0.0) { LOG_ERROR(<< "Noise multiplier must be positive"); return false; } m_NoiseMultiplier = multiplier; return true; } bool CAnomalyDetectorModelConfig::normalizedScoreKnotPoints(const TDoubleDoublePrVec& points) { if (points.empty()) { LOG_ERROR(<< "Must provide at least two know points"); return false; } if (points[0].first != 0.0 && points[0].second != 0.0) { LOG_ERROR(<< "First knot point must be (0,0)"); return false; } if (points.back().first != 100.0 && points.back().second != 100.0) { LOG_ERROR(<< "Last knot point must be (100,100)"); return false; } for (std::size_t i = 0; i < points.size(); i += 2) { if (points[i].first < 0.0 || points[i].first > 100.0) { LOG_ERROR(<< "Unexpected value " << points[i].first << " for percentile"); return false; } if (points[i].second < 0.0 || points[i].second > 100.0) { LOG_ERROR(<< "Unexpected value " << points[i].second << " for score"); return false; } } if (!std::is_sorted(points.begin(), points.end(), maths::common::COrderings::SFirstLess())) { LOG_ERROR(<< "Percentiles must be monotonic increasing " << points); return false; } if (!std::is_sorted(points.begin(), points.end(), maths::common::COrderings::SSecondLess())) { LOG_ERROR(<< "Scores must be monotonic increasing " << points); return false; } m_NormalizedScoreKnotPoints = points; m_NormalizedScoreKnotPoints.erase(std::unique(m_NormalizedScoreKnotPoints.begin(), m_NormalizedScoreKnotPoints.end()), m_NormalizedScoreKnotPoints.end()); return true; } bool CAnomalyDetectorModelConfig::init(const std::string& configFile) { boost::property_tree::ptree propTree; return this->init(configFile, propTree); } bool CAnomalyDetectorModelConfig::init(const std::string& configFile, boost::property_tree::ptree& propTree) { LOG_DEBUG(<< "Reading config file " << configFile); try { std::ifstream strm(configFile.c_str()); if (!strm.is_open()) { LOG_ERROR(<< "Error opening config file " << configFile); return false; } core::CStreamUtils::skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); } catch (boost::property_tree::ptree_error& e) { LOG_ERROR(<< "Error reading config file " << configFile << " : " << e.what()); return false; } if (this->init(propTree) == false) { LOG_ERROR(<< "Error reading config file " << configFile); return false; } return true; } bool CAnomalyDetectorModelConfig::init(const boost::property_tree::ptree& propTree) { static const std::string MODEL_STANZA("model"); static const std::string ANOMALY_SCORE_STANZA("anomalyscore"); bool result = true; for (boost::property_tree::ptree::const_iterator i = propTree.begin(); i != propTree.end(); ++i) { const std::string& stanzaName = i->first; const boost::property_tree::ptree& propertyTree = i->second; if (stanzaName == MODEL_STANZA) { if (this->processStanza(propertyTree) == false) { LOG_ERROR(<< "Error reading model config stanza: " << MODEL_STANZA); result = false; } } else if (stanzaName == ANOMALY_SCORE_STANZA) { if (this->processStanza(propertyTree) == false) { LOG_ERROR(<< "Error reading model config stanza: " << ANOMALY_SCORE_STANZA); result = false; } } else { LOG_WARN(<< "Ignoring unknown model config stanza: " << stanzaName); } } return result; } void CAnomalyDetectorModelConfig::configureModelPlot(bool modelPlotEnabled, bool annotationsEnabled, const std::string& terms) { m_ModelPlotEnabled = modelPlotEnabled; if (m_ModelPlotEnabled) { m_ModelPlotBoundsPercentile = maths::common::CModel::DEFAULT_BOUNDS_PERCENTILE; } m_ModelPlotAnnotationsEnabled = annotationsEnabled; for (auto& factory : m_Factories) { factory.second->annotationsEnabled(annotationsEnabled); } TStrVec tokens; std::string remainder; core::CStringUtils::tokenise(",", terms, tokens, remainder); if (remainder.empty() == false) { tokens.push_back(remainder); } m_ModelPlotTerms.clear(); for (const auto& token : tokens) { m_ModelPlotTerms.insert(token); } } bool CAnomalyDetectorModelConfig::configureModelPlot(const std::string& modelPlotConfigFile) { LOG_DEBUG(<< "Reading model plot config file " << modelPlotConfigFile); boost::property_tree::ptree propTree; try { std::ifstream strm(modelPlotConfigFile.c_str()); if (!strm.is_open()) { LOG_ERROR(<< "Error opening model plot config file " << modelPlotConfigFile); return false; } core::CStreamUtils::skipUtf8Bom(strm); boost::property_tree::ini_parser::read_ini(strm, propTree); } catch (boost::property_tree::ptree_error& e) { LOG_ERROR(<< "Error reading model plot config file " << modelPlotConfigFile << " : " << e.what()); return false; } if (this->configureModelPlot(propTree) == false) { LOG_ERROR(<< "Error reading model plot config file " << modelPlotConfigFile); return false; } return true; } namespace { // Model debug config properties const std::string BOUNDS_PERCENTILE_PROPERTY("boundspercentile"); const std::string TERMS_PROPERTY("terms"); const std::string ANNOTATIONS_ENABLED_PROPERTY("annotations_enabled"); } bool CAnomalyDetectorModelConfig::configureModelPlot(const boost::property_tree::ptree& propTree) { try { std::string valueStr(propTree.get<std::string>(BOUNDS_PERCENTILE_PROPERTY)); if (core::CStringUtils::stringToType(valueStr, m_ModelPlotBoundsPercentile) == false) { LOG_ERROR(<< "Cannot parse as double: " << valueStr); return false; } } catch (boost::property_tree::ptree_error&) { LOG_ERROR(<< "Error reading model debug config. Property '" << BOUNDS_PERCENTILE_PROPERTY << "' is missing"); return false; } m_ModelPlotTerms.clear(); try { std::string valueStr(propTree.get<std::string>(TERMS_PROPERTY)); TStrVec tokens; std::string remainder; core::CStringUtils::tokenise(",", valueStr, tokens, remainder); if (!remainder.empty()) { tokens.push_back(remainder); } for (std::size_t i = 0; i < tokens.size(); ++i) { m_ModelPlotTerms.insert(tokens[i]); } } catch (boost::property_tree::ptree_error&) { LOG_ERROR(<< "Error reading model debug config. Property '" << TERMS_PROPERTY << "' is missing"); return false; } try { std::string valueStr(propTree.get<std::string>(ANNOTATIONS_ENABLED_PROPERTY)); bool annotationsEnabled = false; if (core::CStringUtils::stringToType(valueStr, annotationsEnabled) == false) { LOG_ERROR(<< "Cannot parse as bool: " << valueStr); return false; } m_ModelPlotAnnotationsEnabled = annotationsEnabled; for (auto& factory : m_Factories) { factory.second->annotationsEnabled(annotationsEnabled); } } catch (boost::property_tree::ptree_error&) { LOG_ERROR(<< "Error reading model debug config. Property '" << ANNOTATIONS_ENABLED_PROPERTY << "' is missing"); return false; } return true; } bool CAnomalyDetectorModelConfig::modelPlotEnabled() const { return m_ModelPlotEnabled; } bool CAnomalyDetectorModelConfig::modelPlotAnnotationsEnabled() const { return m_ModelPlotAnnotationsEnabled; } CAnomalyDetectorModelConfig::TModelFactoryCPtr CAnomalyDetectorModelConfig::factory(const CSearchKey& key) const { TModelFactoryCPtr result = m_FactoryCache[key]; if (!result) { result = key.isSimpleCount() ? this->factory(key.detectorIndex(), key.function(), true, key.excludeFrequent(), key.partitionFieldName(), key.overFieldName(), key.byFieldName(), key.fieldName(), key.influenceFieldNames()) : this->factory(key.detectorIndex(), key.function(), key.useNull(), key.excludeFrequent(), key.partitionFieldName(), key.overFieldName(), key.byFieldName(), key.fieldName(), key.influenceFieldNames()); } return result; } CAnomalyDetectorModelConfig::TModelFactoryCPtr CAnomalyDetectorModelConfig::factory(int detectorIndex, function_t::EFunction function, bool useNull, model_t::EExcludeFrequent excludeFrequent, const std::string& partitionFieldName, const std::string& overFieldName, const std::string& byFieldName, const std::string& valueFieldName, const CSearchKey::TStrVec& influenceFieldNames) const { const TFeatureVec& features = function_t::features(function); // Simple state machine to deduce the factory type from // a collection of features. EFactoryType factory = E_UnknownFactory; for (std::size_t i = 0; i < features.size(); ++i) { switch (factory) { case E_EventRateFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: break; case model_t::E_Metric: factory = E_MetricFactory; break; case model_t::E_PopulationEventRate: case model_t::E_PopulationMetric: factory = E_BadFactory; break; } break; case E_MetricFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: break; case model_t::E_PopulationEventRate: case model_t::E_PopulationMetric: factory = E_BadFactory; break; } break; case E_EventRatePopulationFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: factory = E_BadFactory; break; case model_t::E_PopulationEventRate: break; case model_t::E_PopulationMetric: factory = E_BadFactory; break; } break; case E_MetricPopulationFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: case model_t::E_PopulationEventRate: factory = E_BadFactory; break; case model_t::E_PopulationMetric: factory = E_MetricPopulationFactory; break; } break; case E_CountingFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: case model_t::E_Metric: case model_t::E_PopulationEventRate: case model_t::E_PopulationMetric: factory = E_BadFactory; break; } break; case E_UnknownFactory: switch (model_t::analysisCategory(features[i])) { case model_t::E_EventRate: factory = CSearchKey::isSimpleCount(function, byFieldName) ? E_CountingFactory : E_EventRateFactory; break; case model_t::E_Metric: factory = E_MetricFactory; break; case model_t::E_PopulationEventRate: factory = E_EventRatePopulationFactory; break; case model_t::E_PopulationMetric: factory = E_MetricPopulationFactory; break; } break; case E_BadFactory: break; } } TFactoryTypeFactoryPtrMapCItr prototype = m_Factories.find(factory); if (prototype == m_Factories.end()) { LOG_ABORT(<< "No factory for features = " << features); } TModelFactoryPtr result(prototype->second->clone()); result->detectorIndex(detectorIndex); TStrVec influences; influences.reserve(influenceFieldNames.size()); for (const auto& influenceFieldName : influenceFieldNames) { influences.push_back(influenceFieldName); } result->fieldNames(partitionFieldName, overFieldName, byFieldName, valueFieldName, influences); result->useNull(useNull); result->excludeFrequent(excludeFrequent); result->features(features); result->multivariateByFields(m_MultivariateByFields); TIntDetectionRuleVecUMapCItr rulesItr = m_DetectionRules.get().find(detectorIndex); if (rulesItr != m_DetectionRules.get().end()) { result->detectionRules(TDetectionRuleVecCRef(rulesItr->second)); } result->scheduledEvents(m_ScheduledEvents); return result; } void CAnomalyDetectorModelConfig::decayRate(double value) { for (auto& factory : m_Factories) { factory.second->decayRate(value); } } double CAnomalyDetectorModelConfig::decayRate() const { return m_Factories.begin()->second->modelParams().s_DecayRate; } core_t::TTime CAnomalyDetectorModelConfig::bucketLength() const { return m_BucketLength; } core_t::TTime CAnomalyDetectorModelConfig::modelPruneWindow() const { return m_ModelPruneWindow; } core_t::TTime CAnomalyDetectorModelConfig::latency() const { return m_BucketLength * m_Factories.begin()->second->modelParams().s_LatencyBuckets; } std::size_t CAnomalyDetectorModelConfig::latencyBuckets() const { return m_Factories.begin()->second->modelParams().s_LatencyBuckets; } const CInterimBucketCorrector& CAnomalyDetectorModelConfig::interimBucketCorrector() const { return *m_InterimBucketCorrector; } bool CAnomalyDetectorModelConfig::multivariateByFields() const { return m_MultivariateByFields; } void CAnomalyDetectorModelConfig::modelPlotBoundsPercentile(double percentile) { if (percentile < 0.0 || percentile >= 100.0) { LOG_ERROR(<< "Bad confidence interval"); return; } m_ModelPlotBoundsPercentile = percentile; } double CAnomalyDetectorModelConfig::modelPlotBoundsPercentile() const { return m_ModelPlotBoundsPercentile; } void CAnomalyDetectorModelConfig::modelPlotTerms(TStrSet terms) { m_ModelPlotTerms.swap(terms); } const CAnomalyDetectorModelConfig::TStrSet& CAnomalyDetectorModelConfig::modelPlotTerms() const { return m_ModelPlotTerms; } double CAnomalyDetectorModelConfig::aggregationStyleParam(model_t::EAggregationStyle style, model_t::EAggregationParam param) const { return m_AggregationStyleParams[style][param]; } double CAnomalyDetectorModelConfig::maximumAnomalousProbability() const { return m_MaximumAnomalousProbability; } double CAnomalyDetectorModelConfig::noisePercentile() const { return m_NoisePercentile; } double CAnomalyDetectorModelConfig::noiseMultiplier() const { return m_NoiseMultiplier; } const CAnomalyDetectorModelConfig::TDoubleDoublePrVec& CAnomalyDetectorModelConfig::normalizedScoreKnotPoints() const { return m_NormalizedScoreKnotPoints; } void CAnomalyDetectorModelConfig::detectionRules(TIntDetectionRuleVecUMapCRef detectionRules) { m_DetectionRules = detectionRules; } void CAnomalyDetectorModelConfig::scheduledEvents(TStrDetectionRulePrVecCRef scheduledEvents) { m_ScheduledEvents = scheduledEvents; } void CAnomalyDetectorModelConfig::modelPruneWindow(core_t::TTime modelPruneWindow) { m_ModelPruneWindow = modelPruneWindow; } core_t::TTime CAnomalyDetectorModelConfig::samplingAgeCutoff() const { return m_Factories.begin()->second->modelParams().s_SamplingAgeCutoff; } namespace { const std::string ONLINE_LEARN_RATE_PROPERTY("learnrate"); const std::string DECAY_RATE_PROPERTY("decayrate"); const std::string INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY("initialdecayratemultiplier"); const std::string MAXIMUM_UPDATES_PER_BUCKET_PROPERTY("maximumupdatesperbucket"); const std::string INDIVIDUAL_MODE_FRACTION_PROPERTY("individualmodefraction"); const std::string POPULATION_MODE_FRACTION_PROPERTY("populationmodefraction"); const std::string COMPONENT_SIZE_PROPERTY("componentsize"); const std::string SAMPLE_COUNT_FACTOR_PROPERTY("samplecountfactor"); const std::string PRUNE_WINDOW_SCALE_MINIMUM("prunewindowscaleminimum"); const std::string PRUNE_WINDOW_SCALE_MAXIMUM("prunewindowscalemaximum"); const std::string AGGREGATION_STYLE_PARAMS("aggregationstyleparams"); const std::string MAXIMUM_ANOMALOUS_PROBABILITY_PROPERTY("maximumanomalousprobability"); const std::string NOISE_PERCENTILE_PROPERTY("noisepercentile"); const std::string NOISE_MULTIPLIER_PROPERTY("noisemultiplier"); const std::string NORMALIZED_SCORE_KNOT_POINTS("normalizedscoreknotpoints"); } bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptree& propertyTree) { bool result = true; for (const auto& property : propertyTree) { std::string propName = property.first; std::string propValue = property.second.data(); core::CStringUtils::trimWhitespace(propValue); if (propName == ONLINE_LEARN_RATE_PROPERTY) { double learnRate = DEFAULT_LEARN_RATE; if (core::CStringUtils::stringToType(propValue, learnRate) == false || learnRate <= 0.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } learnRate *= bucketNormalizationFactor(this->bucketLength()); for (auto& factory : m_Factories) { factory.second->learnRate(learnRate); } } else if (propName == DECAY_RATE_PROPERTY) { double decayRate = DEFAULT_DECAY_RATE; if (core::CStringUtils::stringToType(propValue, decayRate) == false || decayRate <= 0.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } decayRate *= bucketNormalizationFactor(this->bucketLength()); for (auto& factory : m_Factories) { factory.second->decayRate(decayRate); } } else if (propName == INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY) { double multiplier = DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER; if (core::CStringUtils::stringToType(propValue, multiplier) == false || multiplier < 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } for (auto& factory : m_Factories) { factory.second->initialDecayRateMultiplier(multiplier); } } else if (propName == MAXIMUM_UPDATES_PER_BUCKET_PROPERTY) { double maximumUpdatesPerBucket; if (core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false || maximumUpdatesPerBucket < 0.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } for (auto& factory : m_Factories) { factory.second->maximumUpdatesPerBucket(maximumUpdatesPerBucket); } } else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) { double fraction; if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } if (m_Factories.count(E_EventRateFactory) > 0) { m_Factories[E_EventRateFactory]->minimumModeFraction(fraction); } if (m_Factories.count(E_MetricFactory) > 0) { m_Factories[E_MetricFactory]->minimumModeFraction(fraction); } } else if (propName == POPULATION_MODE_FRACTION_PROPERTY) { double fraction; if (core::CStringUtils::stringToType(propValue, fraction) == false || fraction < 0.0 || fraction > 1.0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } if (m_Factories.count(E_EventRatePopulationFactory) > 0) { m_Factories[E_EventRatePopulationFactory]->minimumModeFraction(fraction); } if (m_Factories.count(E_MetricPopulationFactory) > 0) { m_Factories[E_MetricPopulationFactory]->minimumModeFraction(fraction); } } else if (propName == COMPONENT_SIZE_PROPERTY) { int componentSize; if (core::CStringUtils::stringToType(propValue, componentSize) == false || componentSize < 0) { LOG_ERROR(<< "Invalid value of property " << propName << " : " << propValue); result = false; continue; } for (auto& factory : m_Factories) { factory.second->componentSize(componentSize); } } else if (propName == SAMPLE_COUNT_FACTOR_PROPERTY) { int factor; if (core::CStringUtils::stringToType(propValue, factor) == false || factor < 0) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } for (auto& factory : m_Factories) { factory.second->sampleCountFactor(factor); } } else if (propName == PRUNE_WINDOW_SCALE_MINIMUM) { double factor; if (core::CStringUtils::stringToType(propValue, factor) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } for (auto& factory : m_Factories) { factory.second->pruneWindowScaleMinimum(factor); } } else if (propName == PRUNE_WINDOW_SCALE_MAXIMUM) { double factor; if (core::CStringUtils::stringToType(propValue, factor) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } for (auto& factory : m_Factories) { factory.second->pruneWindowScaleMaximum(factor); } } else if (propName == AGGREGATION_STYLE_PARAMS) { core::CStringUtils::trimWhitespace(propValue); propValue = core::CStringUtils::normaliseWhitespace(propValue); TStrVec strings; std::string remainder; core::CStringUtils::tokenise(" ", propValue, strings, remainder); if (!remainder.empty()) { strings.push_back(remainder); } std::size_t n = model_t::NUMBER_AGGREGATION_STYLES * model_t::NUMBER_AGGREGATION_PARAMS; if (strings.size() != n) { LOG_ERROR(<< "Expected " << n << " values for " << propName); result = false; continue; } for (std::size_t j = 0, l = 0; j < model_t::NUMBER_AGGREGATION_STYLES; ++j) { for (std::size_t k = 0; k < model_t::NUMBER_AGGREGATION_PARAMS; ++k, ++l) { double value; if (core::CStringUtils::stringToType(strings[l], value) == false) { LOG_ERROR(<< "Unexpected value " << strings[l] << " in property " << propName); result = false; continue; } this->aggregationStyleParams( static_cast<model_t::EAggregationStyle>(j), static_cast<model_t::EAggregationParam>(k), value); } } } else if (propName == MAXIMUM_ANOMALOUS_PROBABILITY_PROPERTY) { double probability; if (core::CStringUtils::stringToType(propValue, probability) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } this->maximumAnomalousProbability(probability); } else if (propName == NOISE_PERCENTILE_PROPERTY) { double percentile; if (core::CStringUtils::stringToType(propValue, percentile) == false || this->noisePercentile(percentile) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } } else if (propName == NOISE_MULTIPLIER_PROPERTY) { double multiplier; if (core::CStringUtils::stringToType(propValue, multiplier) == false || this->noiseMultiplier(multiplier) == false) { LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); result = false; continue; } } else if (propName == NORMALIZED_SCORE_KNOT_POINTS) { core::CStringUtils::trimWhitespace(propValue); propValue = core::CStringUtils::normaliseWhitespace(propValue); TStrVec strings; std::string remainder; core::CStringUtils::tokenise(" ", propValue, strings, remainder); if (!remainder.empty()) { strings.push_back(remainder); } if (strings.empty() || (strings.size() % 2) != 0) { LOG_ERROR(<< "Expected even number of values for property " << propName << " " << strings); result = false; continue; } TDoubleDoublePrVec points; points.reserve(strings.size() / 2 + 2); points.emplace_back(0.0, 0.0); for (std::size_t j = 0; j < strings.size(); j += 2) { double rate; double score; if (core::CStringUtils::stringToType(strings[j], rate) == false) { LOG_ERROR(<< "Unexpected value " << strings[j] << " for rate in property " << propName); result = false; continue; } if (core::CStringUtils::stringToType(strings[j + 1], score) == false) { LOG_ERROR(<< "Unexpected value " << strings[j + 1] << " for score in property " << propName); result = false; continue; } points.emplace_back(rate, score); } points.emplace_back(100.0, 100.0); this->normalizedScoreKnotPoints(points); } else { LOG_WARN(<< "Ignoring unknown property " << propName); } } return result; } double CAnomalyDetectorModelConfig::bucketNormalizationFactor() const { return bucketNormalizationFactor(m_BucketLength); } } }