lib/api/CAnomalyJobConfig.cc (1,002 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <api/CAnomalyJobConfig.h> #include <core/CBoostJsonParser.h> #include <core/CLogger.h> #include <core/CStringUtils.h> #include <core/CTimeUtils.h> #include <core/Constants.h> #include <api/CAnomalyJobConfigReader.h> #include <model/CAnomalyDetectorModelConfig.h> #include <model/CLimits.h> #include <model/FunctionTypes.h> #include <boost/json.hpp> #include <random> namespace ml { namespace api { const std::string CAnomalyJobConfig::JOB_ID{"job_id"}; const std::string CAnomalyJobConfig::JOB_TYPE{"job_type"}; const std::string CAnomalyJobConfig::ANALYSIS_CONFIG{"analysis_config"}; const std::string CAnomalyJobConfig::ANALYSIS_LIMITS{"analysis_limits"}; const std::string CAnomalyJobConfig::DATA_DESCRIPTION{"data_description"}; const std::string CAnomalyJobConfig::BACKGROUND_PERSIST_INTERVAL{"background_persist_interval"}; const std::string CAnomalyJobConfig::MODEL_PLOT_CONFIG{"model_plot_config"}; const std::string CAnomalyJobConfig::FILTERS{"filters"}; const std::string CAnomalyJobConfig::EVENTS{"events"}; const core_t::TTime CAnomalyJobConfig::BASE_MAX_QUANTILE_INTERVAL{21600}; // 6 hours const core_t::TTime CAnomalyJobConfig::DEFAULT_BASE_PERSIST_INTERVAL{10800}; // 3 hours const std::string CAnomalyJobConfig::CAnalysisConfig::BUCKET_SPAN{"bucket_span"}; const std::string CAnomalyJobConfig::CAnalysisConfig::MODEL_PRUNE_WINDOW{"model_prune_window"}; const std::string CAnomalyJobConfig::CAnalysisConfig::SUMMARY_COUNT_FIELD_NAME{ "summary_count_field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CATEGORIZATION_FIELD_NAME{ "categorization_field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CATEGORIZATION_FILTERS{"categorization_filters"}; const std::string CAnomalyJobConfig::CAnalysisConfig::DETECTORS{"detectors"}; const std::string CAnomalyJobConfig::CAnalysisConfig::INFLUENCERS{"influencers"}; const std::string CAnomalyJobConfig::CAnalysisConfig::PER_PARTITION_CATEGORIZATION{ "per_partition_categorization"}; const std::string CAnomalyJobConfig::CAnalysisConfig::ENABLED{"enabled"}; const std::string CAnomalyJobConfig::CAnalysisConfig::STOP_ON_WARN{"stop_on_warn"}; const std::string CAnomalyJobConfig::CAnalysisConfig::LATENCY{"latency"}; const std::string CAnomalyJobConfig::CAnalysisConfig::MULTIVARIATE_BY_FIELDS{"multivariate_by_fields"}; const core_t::TTime CAnomalyJobConfig::CAnalysisConfig::DEFAULT_BUCKET_SPAN{300}; const core_t::TTime CAnomalyJobConfig::CAnalysisConfig::DEFAULT_LATENCY{0}; const std::string CAnomalyJobConfig::CAnalysisConfig::CLEAR{"clear"}; const char CAnomalyJobConfig::CAnalysisConfig::SUFFIX_SEPARATOR{'.'}; const std::string CAnomalyJobConfig::CAnalysisConfig::SCHEDULED_EVENT_PREFIX("scheduledevent."); const std::string CAnomalyJobConfig::CAnalysisConfig::DESCRIPTION_SUFFIX(".description"); const std::string CAnomalyJobConfig::CAnalysisConfig::RULES_SUFFIX(".rules"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES{"detector_rules"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION{"function"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FIELD_NAME{"field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::BY_FIELD_NAME{"by_field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::OVER_FIELD_NAME{ "over_field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::PARTITION_FIELD_NAME{ "partition_field_name"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_DESCRIPTION{ "detector_description"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_INDEX{"detector_index"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::EXCLUDE_FREQUENT{ "exclude_frequent"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::USE_NULL{"use_null"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::CUSTOM_RULES{"custom_rules"}; const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::ALL_TOKEN("all"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::BY_TOKEN("by"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::NONE_TOKEN("none"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::OVER_TOKEN("over"); // Event rate functions const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_COUNT("count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_COUNT_ABBREV("c"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_COUNT("low_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_COUNT_ABBREV("low_c"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_COUNT("high_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_COUNT_ABBREV("high_c"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_DISTINCT_COUNT("distinct_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_DISTINCT_COUNT_ABBREV("dc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_DISTINCT_COUNT("low_distinct_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_DISTINCT_COUNT_ABBREV("low_dc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_DISTINCT_COUNT("high_distinct_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_DISTINCT_COUNT_ABBREV("high_dc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_NON_ZERO_COUNT("non_zero_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_NON_ZERO_COUNT_ABBREV("nzc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_RARE_NON_ZERO_COUNT("rare_non_zero_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_RARE_NON_ZERO_COUNT_ABBREV("rnzc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_RARE("rare"); // No abbreviation for "rare" as "r" is a little too obscure const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_RARE_COUNT("rare_count"); // No abbreviation for "rare_count" as "rc" is sometimes used as an abbreviation // for "return code" const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_FREQ_RARE("freq_rare"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_FREQ_RARE_ABBREV("fr"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_FREQ_RARE_COUNT("freq_rare_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_FREQ_RARE_COUNT_ABBREV("frc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_NON_ZERO_COUNT("low_non_zero_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_NON_ZERO_COUNT_ABBREV("low_nzc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_NON_ZERO_COUNT("high_non_zero_count"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV("high_nzc"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_INFO_CONTENT("info_content"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_INFO_CONTENT("low_info_content"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_INFO_CONTENT("high_info_content"); // Metric functions const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_METRIC("metric"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_AVERAGE("avg"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MEAN("mean"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_MEAN("low_mean"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_MEAN("high_mean"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_AVERAGE("low_avg"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_AVERAGE("high_avg"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MEDIAN("median"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_MEDIAN("low_median"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_MEDIAN("high_median"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MIN("min"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MAX("max"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_VARIANCE("varp"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_VARIANCE("low_varp"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_VARIANCE("high_varp"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_SUM("sum"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_SUM("low_sum"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_SUM("high_sum"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_NON_NULL_SUM("non_null_sum"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_NON_NULL_SUM_ABBREV("nns"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_NON_NULL_SUM("low_non_null_sum"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LOW_NON_NULL_SUM_ABBREV("low_nns"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_NON_NULL_SUM("high_non_null_sum"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_HIGH_NON_NULL_SUM_ABBREV("high_nns"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_TIME_OF_DAY("time_of_day"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_TIME_OF_WEEK("time_of_week"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_LAT_LONG("lat_long"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MAX_VELOCITY("max_velocity"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MIN_VELOCITY("min_velocity"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_MEAN_VELOCITY("mean_velocity"); const std::string CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION_SUM_VELOCITY("sum_velocity"); const std::string CAnomalyJobConfig::CModelPlotConfig::ANNOTATIONS_ENABLED{"annotations_enabled"}; const std::string CAnomalyJobConfig::CModelPlotConfig::ENABLED{"enabled"}; const std::string CAnomalyJobConfig::CModelPlotConfig::TERMS{"terms"}; const std::string CAnomalyJobConfig::CAnalysisLimits::CATEGORIZATION_EXAMPLES_LIMIT{ "categorization_examples_limit"}; const std::string CAnomalyJobConfig::CAnalysisLimits::MODEL_MEMORY_LIMIT{"model_memory_limit"}; const std::size_t CAnomalyJobConfig::CAnalysisLimits::DEFAULT_MEMORY_LIMIT_BYTES{ 1024ULL * 1024 * 1024}; const std::string CAnomalyJobConfig::CDataDescription::TIME_FIELD{"time_field"}; const std::string CAnomalyJobConfig::CDataDescription::TIME_FORMAT{"time_format"}; const std::string CAnomalyJobConfig::CDataDescription::DEFAULT_TIME_FIELD{"time"}; const std::string CAnomalyJobConfig::CEventConfig::DESCRIPTION{"description"}; const std::string CAnomalyJobConfig::CEventConfig::RULES{"rules"}; const std::string CAnomalyJobConfig::CFilterConfig::FILTER_ID{"filter_id"}; const std::string CAnomalyJobConfig::CFilterConfig::ITEMS{"items"}; namespace { const std::string EMPTY_STRING; std::string toString(const json::value& value) { return json::serialize(value); }; const CAnomalyJobConfigReader FILTERS_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CFilterConfig::FILTER_ID, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CFilterConfig::ITEMS, CAnomalyJobConfigReader::E_RequiredParameter); return theReader; }()}; const CAnomalyJobConfigReader EVENTS_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CEventConfig::DESCRIPTION, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CEventConfig::RULES, CAnomalyJobConfigReader::E_RequiredParameter); return theReader; }()}; const CAnomalyJobConfigReader CONFIG_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::JOB_ID, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::JOB_TYPE, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::ANALYSIS_CONFIG, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::ANALYSIS_LIMITS, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::MODEL_PLOT_CONFIG, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::DATA_DESCRIPTION, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::BACKGROUND_PERSIST_INTERVAL, CAnomalyJobConfigReader::E_OptionalParameter); return theReader; }()}; const CAnomalyJobConfigReader ANALYSIS_CONFIG_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::BUCKET_SPAN, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::MODEL_PRUNE_WINDOW, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::SUMMARY_COUNT_FIELD_NAME, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CATEGORIZATION_FIELD_NAME, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CATEGORIZATION_FILTERS, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::PER_PARTITION_CATEGORIZATION, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::DETECTORS, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::INFLUENCERS, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::LATENCY, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::MULTIVARIATE_BY_FIELDS, CAnomalyJobConfigReader::E_OptionalParameter); return theReader; }()}; const CAnomalyJobConfigReader DETECTOR_CONFIG_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FUNCTION, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::FIELD_NAME, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::BY_FIELD_NAME, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::OVER_FIELD_NAME, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::PARTITION_FIELD_NAME, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_DESCRIPTION, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_INDEX, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::EXCLUDE_FREQUENT, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::USE_NULL, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::CUSTOM_RULES, CAnomalyJobConfigReader::E_OptionalParameter); return theReader; }()}; const CAnomalyJobConfigReader CUSTOM_RULES_UPDATE_CONFIG_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_INDEX, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::CUSTOM_RULES, CAnomalyJobConfigReader::E_RequiredParameter); return theReader; }()}; const CAnomalyJobConfigReader PPC_CONFIG_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::ENABLED, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisConfig::STOP_ON_WARN, CAnomalyJobConfigReader::E_OptionalParameter); return theReader; }()}; const CAnomalyJobConfigReader MODEL_PLOT_CONFIG_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CModelPlotConfig::ANNOTATIONS_ENABLED, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CModelPlotConfig::ENABLED, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CModelPlotConfig::TERMS, CAnomalyJobConfigReader::E_OptionalParameter); return theReader; }()}; const CAnomalyJobConfigReader ANALYSIS_LIMITS_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CAnalysisLimits::CATEGORIZATION_EXAMPLES_LIMIT, CAnomalyJobConfigReader::E_OptionalParameter); theReader.addParameter(CAnomalyJobConfig::CAnalysisLimits::MODEL_MEMORY_LIMIT, CAnomalyJobConfigReader::E_RequiredParameter); return theReader; }()}; const CAnomalyJobConfigReader DATA_DESCRIPTION_READER{[] { CAnomalyJobConfigReader theReader; theReader.addParameter(CAnomalyJobConfig::CDataDescription::TIME_FIELD, CAnomalyJobConfigReader::E_RequiredParameter); theReader.addParameter(CAnomalyJobConfig::CDataDescription::TIME_FORMAT, CAnomalyJobConfigReader::E_OptionalParameter); return theReader; }()}; } bool CAnomalyJobConfig::initFromFile(const std::string& configFile) { std::string anomalyJobConfigJson; bool couldReadConfigFile; std::tie(anomalyJobConfigJson, couldReadConfigFile) = ml::core::CStringUtils::readFileToString(configFile); if (couldReadConfigFile == false) { LOG_ERROR(<< "Failed to read config file '" << configFile << "'"); return false; } if (this->parse(anomalyJobConfigJson) == false) { LOG_ERROR(<< "Failed to parse anomaly job config: '" << anomalyJobConfigJson << "'"); return false; } return true; } bool CAnomalyJobConfig::readFile(const std::string& fileName, std::string& fileContents) { bool couldReadFile; std::tie(fileContents, couldReadFile) = ml::core::CStringUtils::readFileToString(fileName); if (couldReadFile == false) { LOG_ERROR(<< "Failed to read file '" << fileName << "'"); return false; } return true; } bool CAnomalyJobConfig::initFromFiles(const std::string& configFile, const std::string& filtersConfigFile, const std::string& eventsConfigFile) { std::string filtersConfigJson; if (filtersConfigFile.empty() == false && this->readFile(filtersConfigFile, filtersConfigJson)) { if (this->parseFilterConfig(filtersConfigJson) == false) { LOG_ERROR(<< "Failed to parse filters job config: '" << filtersConfigJson << "'"); return false; } } std::string eventsConfigJson; if (eventsConfigFile.empty() == false && this->readFile(eventsConfigFile, eventsConfigJson)) { if (this->parseEventConfig(eventsConfigJson) == false) { LOG_ERROR(<< "Failed to parse scheduled events config: '" << eventsConfigJson << "'"); return false; } } m_AnalysisConfig.init(m_RuleFilters, m_ScheduledEvents); std::string anomalyJobConfigJson; if (this->readFile(configFile, anomalyJobConfigJson) == false) { // error logged by readFile return false; } if (this->parse(anomalyJobConfigJson) == false) { LOG_ERROR(<< "Failed to parse anomaly job config: '" << anomalyJobConfigJson << "'"); return false; } return true; } bool CAnomalyJobConfig::parseEventConfig(const std::string& json) { json::value doc; bool ok = core::CBoostJsonParser::parse(json, doc); if (ok == false) { LOG_ERROR(<< "An error occurred while parsing scheduled event config from JSON: \"" << json << "\""); return false; } if (doc.is_object() == false) { LOG_ERROR(<< "An error occurred while parsing scheduled event config from JSON. " << "Expected JSON object but was: \"" << json << "\""); return false; } const json::object& obj = doc.as_object(); m_ScheduledEvents.clear(); if (obj.empty()) { return true; } try { if (obj.contains(EVENTS) == false || obj.at(EVENTS).is_array() == false) { LOG_ERROR(<< "Missing expected array field '" << EVENTS << "'. JSON: " << json); return false; } const json::value& value = obj.at(EVENTS); if (value.is_array() == false) { LOG_ERROR(<< "Expected JSON array but was: \"" << json::serialize(value) << "\""); } json::array arr = value.as_array(); m_Events.clear(); m_Events.resize(arr.size()); for (unsigned int i = 0; i < arr.size(); ++i) { if (arr[i].is_object() == false) { LOG_ERROR(<< "Could not parse scheduled events: expected events array to contain objects. JSON: " << json); return false; } m_Events[i].parse(arr[i], m_RuleFilters, m_ScheduledEvents); } } catch (CAnomalyJobConfigReader::CParseError& e) { LOG_ERROR(<< "Error parsing events config: " << e.what()); return false; } return true; } void CAnomalyJobConfig::CEventConfig::parse(const json::value& filterConfig, const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, TStrDetectionRulePrVec& scheduledEvents) { auto parameters = EVENTS_READER.read(filterConfig); m_Description = parameters[DESCRIPTION].as<std::string>(); auto eventRules = parameters[RULES].jsonObject(); if (eventRules != nullptr) { std::string errorString; CDetectionRulesJsonParser rulesParser(ruleFilters); if (rulesParser.parseRules(*eventRules, m_DetectionRules, errorString) == false) { LOG_ERROR(<< errorString << toString(*eventRules)); throw CAnomalyJobConfigReader::CParseError( "Error parsing scheduled event rules: " + toString(*eventRules)); } } if (m_DetectionRules.size() != 1) { throw CAnomalyJobConfigReader::CParseError( "Scheduled events must have exactly 1 rule: " + toString(*eventRules)); } scheduledEvents.emplace_back(m_Description, m_DetectionRules[0]); } bool CAnomalyJobConfig::parseFilterConfig(const std::string& jsonString) { json::value doc; bool ok = core::CBoostJsonParser::parse(jsonString, doc); if (ok == false) { LOG_ERROR(<< "An error occurred while parsing filter config from JSON: \"" << jsonString << "\""); return false; } if (doc.is_object() == false) { LOG_ERROR(<< "An error occurred while parsing filter config from JSON. " << "Expected JSON object but got \"" << jsonString << "\""); return false; } const json::object& obj = doc.as_object(); if (obj.empty()) { return true; } try { if (obj.contains(FILTERS) == false || obj.at(FILTERS).is_array() == false) { LOG_ERROR(<< "Missing expected array field '" << FILTERS << "'. JSON: " << jsonString); return false; } const json::array& arr = obj.at(FILTERS).as_array(); m_Filters.resize(arr.size()); for (unsigned int i = 0; i < arr.size(); ++i) { if (arr[i].is_object() == false) { LOG_ERROR(<< "Could not parse filters: expected filters array to contain objects. JSON: " << toString(arr[i])); return false; } m_Filters[i].parse(arr[i], m_RuleFilters); } } catch (CAnomalyJobConfigReader::CParseError& e) { LOG_ERROR(<< "Error parsing filter config: " << e.what()); return false; } return true; } void CAnomalyJobConfig::CFilterConfig::parse(const json::value& filterConfig, CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters) { auto parameters = FILTERS_READER.read(filterConfig); m_FilterName = parameters[FILTER_ID].as<std::string>(); m_FilterList = parameters[ITEMS].fallback(TStrVec{}); core::CPatternSet& filter = ruleFilters[m_FilterName]; filter.clear(); if (filter.initFromPatternList(m_FilterList) == false) { throw CAnomalyJobConfigReader::CParseError("Error building filter rules: " + toString(filterConfig)); } } bool CAnomalyJobConfig::parse(const std::string& jsonStr) { json::value doc; bool ok = core::CBoostJsonParser::parse(jsonStr, doc); if (ok == false) { LOG_ERROR(<< "An error occurred while parsing anomaly job config from JSON: \"" << jsonStr << "\""); return false; } LOG_TRACE(<< "Received anomaly job configuration document: " << doc); try { auto parameters = CONFIG_READER.read(doc); m_JobId = parameters[JOB_ID].as<std::string>(); m_JobType = parameters[JOB_TYPE].fallback(EMPTY_STRING); auto analysisConfig = parameters[ANALYSIS_CONFIG].jsonObject(); if (analysisConfig != nullptr) { m_AnalysisConfig.parse(*analysisConfig); } auto analysisLimits = parameters[ANALYSIS_LIMITS].jsonObject(); if (analysisLimits != nullptr) { m_AnalysisLimits.parse(*analysisLimits); } auto description = parameters[DATA_DESCRIPTION].jsonObject(); if (description != nullptr) { m_DataDescription.parse(*description); } auto modelPlotConfig = parameters[MODEL_PLOT_CONFIG].jsonObject(); if (modelPlotConfig != nullptr) { m_ModelConfig.parse(*modelPlotConfig); } const core_t::TTime defaultBackgroundPersistInterval{ DEFAULT_BASE_PERSIST_INTERVAL + this->intervalStagger()}; const std::string& backgroundPersistIntervalString{ parameters[BACKGROUND_PERSIST_INTERVAL].fallback(EMPTY_STRING)}; if (backgroundPersistIntervalString.empty() == false) { m_BackgroundPersistInterval = CAnomalyJobConfig::CAnalysisConfig::durationSeconds( backgroundPersistIntervalString, defaultBackgroundPersistInterval); } else { m_BackgroundPersistInterval = defaultBackgroundPersistInterval; } m_MaxQuantilePersistInterval = BASE_MAX_QUANTILE_INTERVAL + this->intervalStagger(); } catch (CAnomalyJobConfigReader::CParseError& e) { LOG_ERROR(<< "Error parsing anomaly job config: " << e.what()); return false; } m_IsInitialized = true; return true; } core_t::TTime CAnomalyJobConfig::intervalStagger() { std::seed_seq seed(m_JobId.begin(), m_JobId.end()); std::mt19937 generator{seed}; std::uniform_int_distribution<> distribution{0, core::constants::HOUR - 1}; return distribution(generator); } void CAnomalyJobConfig::CModelPlotConfig::parse(const json::value& modelPlotConfig) { auto parameters = MODEL_PLOT_CONFIG_READER.read(modelPlotConfig); m_AnnotationsEnabled = parameters[ANNOTATIONS_ENABLED].fallback(false); m_Enabled = parameters[ENABLED].fallback(false); m_Terms = parameters[TERMS].fallback(EMPTY_STRING); } void CAnomalyJobConfig::CAnalysisLimits::parse(const json::value& analysisLimits) { auto parameters = ANALYSIS_LIMITS_READER.read(analysisLimits); m_CategorizationExamplesLimit = parameters[CATEGORIZATION_EXAMPLES_LIMIT].fallback( model::CLimits::DEFAULT_RESULTS_MAX_EXAMPLES); const std::string memoryLimitStr{parameters[MODEL_MEMORY_LIMIT].as<std::string>()}; m_ModelMemoryLimitMb = CAnomalyJobConfig::CAnalysisLimits::modelMemoryLimitMb(memoryLimitStr); } std::size_t CAnomalyJobConfig::CAnalysisLimits::modelMemoryLimitMb(const std::string& memoryLimitStr) { // We choose to ignore any errors here parsing the model memory limit string // as we assume that it has already been validated by ES. In the event that any // error _does_ occur an error is logged and a default value used. std::size_t memoryLimitBytes{0}; std::tie(memoryLimitBytes, std::ignore) = core::CStringUtils::memorySizeStringToBytes( memoryLimitStr, DEFAULT_MEMORY_LIMIT_BYTES); std::size_t memoryLimitMb{memoryLimitBytes / core::constants::BYTES_IN_MEGABYTES}; if (memoryLimitMb == 0) { LOG_ERROR(<< "Invalid limit value " << memoryLimitStr << ". Limit must have a minimum value of 1mb." << " Using default memory limit value " << DEFAULT_MEMORY_LIMIT_BYTES / core::constants::BYTES_IN_MEGABYTES); memoryLimitMb = DEFAULT_MEMORY_LIMIT_BYTES / core::constants::BYTES_IN_MEGABYTES; } return memoryLimitMb; } void CAnomalyJobConfig::CDataDescription::parse(const json::value& analysisLimits) { auto parameters = DATA_DESCRIPTION_READER.read(analysisLimits); m_TimeField = parameters[TIME_FIELD].fallback(DEFAULT_TIME_FIELD); m_TimeFormat = parameters[TIME_FORMAT].fallback(EMPTY_STRING); // Ignore } void CAnomalyJobConfig::CAnalysisConfig::parseDetectorsConfig(const json::value& detectorsConfig) { // The Job config has already been validated by Java before being passed to // the C++ backend. So we can safely assume that the detector config is a // non-null array - hence this check isn't strictly necessary. if (detectorsConfig.is_array()) { json::array arr = detectorsConfig.as_array(); m_Detectors.resize(arr.size()); int fallbackDetectorIndex{0}; for (std::size_t i = 0; i < arr.size(); ++i) { m_DetectorRules[fallbackDetectorIndex].clear(); m_Detectors[i].parse(arr[fallbackDetectorIndex], m_RuleFilters, (m_SummaryCountFieldName.empty() == false), fallbackDetectorIndex, m_DetectorRules[fallbackDetectorIndex]); if (m_PerPartitionCategorizationEnabled) { if (m_CategorizationFieldName.empty()) { throw CAnomalyJobConfigReader::CParseError( "per_partition_categorization enabled without a categorization field"); } if (m_Detectors[i].partitionFieldName().empty()) { throw CAnomalyJobConfigReader::CParseError( "per_partition_categorization enabled without a partition field"); } if (m_CategorizationPartitionFieldName.empty()) { m_CategorizationPartitionFieldName = m_Detectors[i].partitionFieldName(); } else { if (m_CategorizationPartitionFieldName != m_Detectors[i].partitionFieldName()) { throw CAnomalyJobConfigReader::CParseError( "per_partition_categorization enabled when partition " "field varies between detectors"); } } } ++fallbackDetectorIndex; } } } void CAnomalyJobConfig::CAnalysisConfig::parse(const json::value& analysisConfig) { auto parameters = ANALYSIS_CONFIG_READER.read(analysisConfig); // We choose to ignore any errors here parsing the time duration string as // we assume that it has already been validated by ES. In the event that any // error _does_ occur an error is logged and a default value used. const std::string& bucketSpanString{parameters[BUCKET_SPAN].fallback(EMPTY_STRING)}; m_BucketSpan = CAnomalyJobConfig::CAnalysisConfig::durationSeconds( bucketSpanString, DEFAULT_BUCKET_SPAN); m_SummaryCountFieldName = parameters[SUMMARY_COUNT_FIELD_NAME].fallback(EMPTY_STRING); m_CategorizationFieldName = parameters[CATEGORIZATION_FIELD_NAME].fallback(EMPTY_STRING); m_CategorizationFilters = parameters[CATEGORIZATION_FILTERS].fallback(TStrVec{}); const std::string& modelPruneWindowString{ parameters[MODEL_PRUNE_WINDOW].fallback(EMPTY_STRING)}; if (modelPruneWindowString.empty() == false) { m_ModelPruneWindow = CAnomalyJobConfig::CAnalysisConfig::durationSeconds( modelPruneWindowString, core_t::TTime{0}); // Ensure that the model prune window is never smaller than twice the bucket span. if (m_ModelPruneWindow < (2 * m_BucketSpan)) { LOG_WARN(<< "The value of configuration setting \"model_prune_window\" (" << m_ModelPruneWindow << ") is less than twice the value of \"bucket_span\" (" << m_BucketSpan << "). Setting \"model_prune_window\" to " << (2 * m_BucketSpan)); m_ModelPruneWindow = 2 * m_BucketSpan; } } auto ppc = parameters[PER_PARTITION_CATEGORIZATION].jsonObject(); if (ppc != nullptr) { auto ppcParameters = PPC_CONFIG_READER.read(*ppc); m_PerPartitionCategorizationEnabled = ppcParameters[ENABLED].fallback(false); m_PerPartitionCategorizationStopOnWarn = ppcParameters[STOP_ON_WARN].fallback(false); } auto detectorsConfig = parameters[DETECTORS].jsonObject(); if (detectorsConfig != nullptr) { this->parseDetectorsConfig(*detectorsConfig); } m_Influencers = parameters[INFLUENCERS].fallback(TStrVec{}); const std::string& latencyString{parameters[LATENCY].fallback(EMPTY_STRING)}; if (latencyString.empty() == false) { m_Latency = CAnomalyJobConfig::CAnalysisConfig::durationSeconds( latencyString, DEFAULT_LATENCY); } m_MultivariateByFields = parameters[MULTIVARIATE_BY_FIELDS].fallback(false); } bool CAnomalyJobConfig::CAnalysisConfig::parseRulesUpdate(const json::value& rulesUpdateConfig) { try { auto parameters = CUSTOM_RULES_UPDATE_CONFIG_READER.read(rulesUpdateConfig); int detectorIndex = parameters[CDetectorConfig::DETECTOR_INDEX].as<int>(); auto customRules = parameters[CDetectorConfig::CUSTOM_RULES].jsonObject(); if (customRules != nullptr) { m_DetectorRules[detectorIndex].clear(); if (this->parseRules(detectorIndex, *customRules) == false) { LOG_ERROR(<< "Failed to update detector rules for detector: " << detectorIndex); return false; } } } catch (CAnomalyJobConfigReader::CParseError& e) { LOG_ERROR(<< "Error parsing events config: " << e.what()); return false; } return true; } bool CAnomalyJobConfig::CAnalysisConfig::parseRules(int detectorIndex, const json::value& rules) { return parseRules(m_DetectorRules[detectorIndex], rules); } bool CAnomalyJobConfig::CAnalysisConfig::parseRules(CDetectionRulesJsonParser::TDetectionRuleVec& detectionRules, const json::value& rules) { CDetectionRulesJsonParser rulesParser{m_RuleFilters}; std::string errorString; if (rulesParser.parseRules(rules, detectionRules, errorString) == false) { LOG_ERROR(<< "Error parsing detector rules: " << errorString); return false; } return true; } bool CAnomalyJobConfig::CAnalysisConfig::parseRules(int detectorIndex, const std::string& rules) { return parseRules(m_DetectorRules[detectorIndex], rules); } bool CAnomalyJobConfig::CAnalysisConfig::parseRules(CDetectionRulesJsonParser::TDetectionRuleVec& detectionRules, const std::string& rules) { if (rules.empty()) { return true; } CDetectionRulesJsonParser rulesParser{m_RuleFilters}; return rulesParser.parseRules(rules, detectionRules); } ml::model::CAnomalyDetectorModelConfig CAnomalyJobConfig::CAnalysisConfig::makeModelConfig() const { model_t::ESummaryMode summaryMode{ m_SummaryCountFieldName.empty() ? model_t::E_None : model_t::E_Manual}; model::CAnomalyDetectorModelConfig modelConfig{model::CAnomalyDetectorModelConfig::defaultConfig( m_BucketSpan, summaryMode, m_SummaryCountFieldName, m_Latency, m_MultivariateByFields)}; modelConfig.detectionRules( model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(m_DetectorRules)); modelConfig.scheduledEvents( model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(m_ScheduledEvents)); modelConfig.modelPruneWindow(m_ModelPruneWindow); return modelConfig; } core_t::TTime CAnomalyJobConfig::CAnalysisConfig::durationSeconds(const std::string& durationString, core_t::TTime defaultDuration) { core_t::TTime durationSeconds{0}; std::tie(durationSeconds, std::ignore) = core::CTimeUtils::timeDurationStringToSeconds(durationString, defaultDuration); if (durationSeconds == 0) { LOG_ERROR(<< "Invalid duration value " << durationString << ". Duration must have a minimum value of 1s. " "Using default duration value " << defaultDuration); durationSeconds = defaultDuration; } return durationSeconds; } void CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::parse( const json::value& detectorConfig, const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters, bool haveSummaryCountField, int fallbackDetectorIndex, CDetectionRulesJsonParser::TDetectionRuleVec& detectionRules) { auto parameters = DETECTOR_CONFIG_READER.read(detectorConfig); m_FunctionName = parameters[FUNCTION].as<std::string>(); m_FieldName = parameters[FIELD_NAME].fallback(EMPTY_STRING); m_ByFieldName = parameters[BY_FIELD_NAME].fallback(EMPTY_STRING); m_OverFieldName = parameters[OVER_FIELD_NAME].fallback(EMPTY_STRING); m_ByFieldName = parameters[BY_FIELD_NAME].fallback(EMPTY_STRING); m_PartitionFieldName = parameters[PARTITION_FIELD_NAME].fallback(EMPTY_STRING); m_ExcludeFrequent = parameters[EXCLUDE_FREQUENT].fallback(EMPTY_STRING); m_DetectorDescription = parameters[DETECTOR_DESCRIPTION].fallback(EMPTY_STRING); // The detector index is of type int for historical reasons // and for consistency across the code base. m_DetectorIndex = parameters[DETECTOR_INDEX].fallback(fallbackDetectorIndex); auto customRules = parameters[CUSTOM_RULES].jsonObject(); if (customRules != nullptr) { std::string errorString; CDetectionRulesJsonParser rulesParser(ruleFilters); if (rulesParser.parseRules(*customRules, detectionRules, errorString) == false) { LOG_ERROR(<< errorString << toString(*customRules)); throw CAnomalyJobConfigReader::CParseError( "Error parsing custom rules: " + toString(*customRules)); } } m_UseNull = parameters[USE_NULL].fallback(false); if (this->determineFunction(haveSummaryCountField) == false) { throw CAnomalyJobConfigReader::CParseError("Error determining function"); } if (this->decipherExcludeFrequentSetting() == false) { throw CAnomalyJobConfigReader::CParseError("Error deciphering exclude frequent setting"); } } bool CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::determineFunction(bool haveSummaryCountField) { bool isPopulation{m_OverFieldName.empty() == false}; bool hasByField{m_ByFieldName.empty() == false}; // Some functions must take a field, some mustn't and for the rest it's // optional. Validate this based on the contents of these flags after // determining the function. Similarly for by fields. bool fieldRequired{false}; bool fieldInvalid{false}; bool byFieldRequired{false}; bool byFieldInvalid{false}; if (m_FunctionName.empty()) { LOG_ERROR(<< "No function specified"); return false; } if (m_FunctionName == FUNCTION_COUNT || m_FunctionName == FUNCTION_COUNT_ABBREV) { m_Function = isPopulation ? model::function_t::E_PopulationCount : model::function_t::E_IndividualRareCount; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_DISTINCT_COUNT || m_FunctionName == FUNCTION_DISTINCT_COUNT_ABBREV) { m_Function = isPopulation ? model::function_t::E_PopulationDistinctCount : model::function_t::E_IndividualDistinctCount; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_DISTINCT_COUNT || m_FunctionName == FUNCTION_LOW_DISTINCT_COUNT_ABBREV) { m_Function = isPopulation ? model::function_t::E_PopulationLowDistinctCount : model::function_t::E_IndividualLowDistinctCount; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_DISTINCT_COUNT || m_FunctionName == FUNCTION_HIGH_DISTINCT_COUNT_ABBREV) { m_Function = isPopulation ? model::function_t::E_PopulationHighDistinctCount : model::function_t::E_IndividualHighDistinctCount; fieldRequired = true; } else if (m_FunctionName == FUNCTION_NON_ZERO_COUNT || m_FunctionName == FUNCTION_NON_ZERO_COUNT_ABBREV) { m_Function = model::function_t::E_IndividualNonZeroCount; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_RARE_NON_ZERO_COUNT || m_FunctionName == FUNCTION_RARE_NON_ZERO_COUNT_ABBREV) { m_Function = model::function_t::E_IndividualRareNonZeroCount; fieldInvalid = true; byFieldRequired = true; } else if (m_FunctionName == FUNCTION_RARE) { m_Function = isPopulation ? model::function_t::E_PopulationRare : model::function_t::E_IndividualRare; fieldInvalid = true; byFieldRequired = true; } else if (m_FunctionName == FUNCTION_RARE_COUNT) { m_Function = model::function_t::E_PopulationRareCount; fieldInvalid = true; byFieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_COUNT || m_FunctionName == FUNCTION_LOW_COUNT_ABBREV) { m_Function = isPopulation ? model::function_t::E_PopulationLowCounts : model::function_t::E_IndividualLowCounts; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_HIGH_COUNT || m_FunctionName == FUNCTION_HIGH_COUNT_ABBREV) { m_Function = isPopulation ? model::function_t::E_PopulationHighCounts : model::function_t::E_IndividualHighCounts; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_LOW_NON_ZERO_COUNT || m_FunctionName == FUNCTION_LOW_NON_ZERO_COUNT_ABBREV) { m_Function = model::function_t::E_IndividualLowNonZeroCount; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_HIGH_NON_ZERO_COUNT || m_FunctionName == FUNCTION_HIGH_NON_ZERO_COUNT_ABBREV) { m_Function = model::function_t::E_IndividualHighNonZeroCount; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_FREQ_RARE || m_FunctionName == FUNCTION_FREQ_RARE_ABBREV) { m_Function = model::function_t::E_PopulationFreqRare; fieldInvalid = true; byFieldRequired = true; } else if (m_FunctionName == FUNCTION_FREQ_RARE_COUNT || m_FunctionName == FUNCTION_FREQ_RARE_COUNT_ABBREV) { m_Function = model::function_t::E_PopulationFreqRareCount; fieldInvalid = true; byFieldRequired = true; } else if (m_FunctionName == FUNCTION_INFO_CONTENT) { m_Function = isPopulation ? model::function_t::E_PopulationInfoContent : model::function_t::E_IndividualInfoContent; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_INFO_CONTENT) { m_Function = isPopulation ? model::function_t::E_PopulationLowInfoContent : model::function_t::E_IndividualLowInfoContent; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_INFO_CONTENT) { m_Function = isPopulation ? model::function_t::E_PopulationHighInfoContent : model::function_t::E_IndividualHighInfoContent; fieldRequired = true; } else if (m_FunctionName == FUNCTION_METRIC) { if (haveSummaryCountField) { LOG_ERROR(<< "Function " << m_FunctionName << "() cannot be used with a summary count field"); return false; } m_Function = isPopulation ? model::function_t::E_PopulationMetric : model::function_t::E_IndividualMetric; fieldRequired = true; } else if (m_FunctionName == FUNCTION_AVERAGE || m_FunctionName == FUNCTION_MEAN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricMean : model::function_t::E_IndividualMetricMean; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_AVERAGE || m_FunctionName == FUNCTION_LOW_MEAN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricLowMean : model::function_t::E_IndividualMetricLowMean; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_AVERAGE || m_FunctionName == FUNCTION_HIGH_MEAN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricHighMean : model::function_t::E_IndividualMetricHighMean; fieldRequired = true; } else if (m_FunctionName == FUNCTION_MEDIAN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricMedian : model::function_t::E_IndividualMetricMedian; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_MEDIAN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricLowMedian : model::function_t::E_IndividualMetricLowMedian; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_MEDIAN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricHighMedian : model::function_t::E_IndividualMetricHighMedian; fieldRequired = true; } else if (m_FunctionName == FUNCTION_MIN) { m_Function = isPopulation ? model::function_t::E_PopulationMetricMin : model::function_t::E_IndividualMetricMin; fieldRequired = true; } else if (m_FunctionName == FUNCTION_MAX) { m_Function = isPopulation ? model::function_t::E_PopulationMetricMax : model::function_t::E_IndividualMetricMax; fieldRequired = true; } else if (m_FunctionName == FUNCTION_VARIANCE) { m_Function = isPopulation ? model::function_t::E_PopulationMetricVariance : model::function_t::E_IndividualMetricVariance; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_VARIANCE) { m_Function = isPopulation ? model::function_t::E_PopulationMetricLowVariance : model::function_t::E_IndividualMetricLowVariance; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_VARIANCE) { m_Function = isPopulation ? model::function_t::E_PopulationMetricHighVariance : model::function_t::E_IndividualMetricHighVariance; fieldRequired = true; } else if (m_FunctionName == FUNCTION_SUM) { m_Function = isPopulation ? model::function_t::E_PopulationMetricSum : model::function_t::E_IndividualMetricSum; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_SUM) { m_Function = isPopulation ? model::function_t::E_PopulationMetricLowSum : model::function_t::E_IndividualMetricLowSum; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_SUM) { m_Function = isPopulation ? model::function_t::E_PopulationMetricHighSum : model::function_t::E_IndividualMetricHighSum; fieldRequired = true; } else if (m_FunctionName == FUNCTION_NON_NULL_SUM || m_FunctionName == FUNCTION_NON_NULL_SUM_ABBREV) { m_Function = model::function_t::E_IndividualMetricNonNullSum; fieldRequired = true; } else if (m_FunctionName == FUNCTION_LOW_NON_NULL_SUM || m_FunctionName == FUNCTION_LOW_NON_NULL_SUM_ABBREV) { m_Function = model::function_t::E_IndividualMetricLowNonNullSum; fieldRequired = true; } else if (m_FunctionName == FUNCTION_HIGH_NON_NULL_SUM || m_FunctionName == FUNCTION_HIGH_NON_NULL_SUM_ABBREV) { m_Function = model::function_t::E_IndividualMetricHighNonNullSum; fieldRequired = true; } else if (m_FunctionName == FUNCTION_TIME_OF_DAY) { m_Function = isPopulation ? model::function_t::E_PopulationTimeOfDay : model::function_t::E_IndividualTimeOfDay; fieldRequired = false; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_TIME_OF_WEEK) { m_Function = isPopulation ? model::function_t::E_PopulationTimeOfWeek : model::function_t::E_IndividualTimeOfWeek; fieldRequired = false; fieldInvalid = true; } else if (m_FunctionName == FUNCTION_LAT_LONG) { m_Function = isPopulation ? model::function_t::E_PopulationLatLong : model::function_t::E_IndividualLatLong; fieldRequired = true; } else if (m_FunctionName == FUNCTION_MAX_VELOCITY) { m_Function = isPopulation ? model::function_t::E_PopulationMaxVelocity : model::function_t::E_IndividualMaxVelocity; fieldRequired = true; } else if (m_FunctionName == FUNCTION_MIN_VELOCITY) { m_Function = isPopulation ? model::function_t::E_PopulationMinVelocity : model::function_t::E_IndividualMinVelocity; fieldRequired = true; } else if (m_FunctionName == FUNCTION_MEAN_VELOCITY) { m_Function = isPopulation ? model::function_t::E_PopulationMeanVelocity : model::function_t::E_IndividualMeanVelocity; fieldRequired = true; } else if (m_FunctionName == FUNCTION_SUM_VELOCITY) { m_Function = isPopulation ? model::function_t::E_PopulationSumVelocity : model::function_t::E_IndividualSumVelocity; fieldRequired = true; } else { LOG_ERROR(<< "Invalid function " << m_FunctionName << " specified"); return false; } // Validate if (model::function_t::isPopulation(m_Function) && isPopulation == false) { LOG_ERROR(<< "Function " << m_FunctionName << " requires an 'over' field"); return false; } if (isPopulation && model::function_t::isPopulation(m_Function) == false) { LOG_ERROR(<< "Function " << m_FunctionName << " cannot be used with an 'over' field"); return false; } if (byFieldRequired && hasByField == false) { LOG_ERROR(<< "Function " << m_FunctionName << " requires a 'by' field"); return false; } if (byFieldInvalid && hasByField) { LOG_ERROR(<< "Function " << m_FunctionName << " cannot be used with a 'by' field"); return false; } if (fieldRequired && m_FieldName.empty()) { LOG_ERROR(<< "Function " << m_FunctionName << " requires a field"); return false; } if (fieldInvalid && m_FieldName.empty() == false) { LOG_ERROR(<< "Function " << m_FunctionName << " does not work on a field"); return false; } return true; } model_t::EExcludeFrequent CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::excludeFrequent() const { if (m_OverHasExcludeFrequent) { if (m_ByHasExcludeFrequent) { return model_t::E_XF_Both; } else { return model_t::E_XF_Over; } } else { if (m_ByHasExcludeFrequent) { return model_t::E_XF_By; } } return model_t::E_XF_None; } bool CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::decipherExcludeFrequentSetting() { bool hasByField{m_ByFieldName.empty() == false}; bool isPopulation{m_OverFieldName.empty() == false}; if (m_ExcludeFrequent.empty() == false) { if (m_ExcludeFrequent == ALL_TOKEN) { m_ByHasExcludeFrequent = hasByField; m_OverHasExcludeFrequent = isPopulation; } else if (m_ExcludeFrequent == BY_TOKEN) { m_ByHasExcludeFrequent = hasByField; } else if (m_ExcludeFrequent == OVER_TOKEN) { m_OverHasExcludeFrequent = isPopulation; } else { if (m_ExcludeFrequent != NONE_TOKEN) { LOG_ERROR(<< "Unexpected exclude_frequent value = " << m_ExcludeFrequent); return false; } } } return true; } } }