include/model/CLimits.h

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #ifndef INCLUDED_ml_model_CLimits_h #define INCLUDED_ml_model_CLimits_h #include <core/CLogger.h> #include <core/CStringUtils.h> #include <model/CResourceMonitor.h> #include <model/ImportExport.h> #include <boost/property_tree/ptree.hpp> #include <iosfwd> #include <memory> #include <string> namespace ml { namespace model { //! \brief //! Holds configurable limits for the models. //! //! DESCRIPTION:\n //! Holds limits that prevent Ml custom search commands from //! taking too long to run or using excessive amounts of memory. //! //! IMPLEMENTATION DECISIONS:\n //! Configuration of Ml's analytics commands is stored in config //! files which are similar in format to Windows .ini files but //! with hash as the comment character instead of semi-colon. //! //! Boost's property_tree package can parse such config files, as //! it accepts either hash or semi-colon as comment characters. //! Therefore, we use boost::property_tree::ini_parser to load the //! config file. //! //! To decouple the public interface from the config file format, //! the boost property_tree is copied into separate member //! variables. //! class MODEL_EXPORT CLimits { public: //! Default maximum number of distinct values of a single field before //! analysis of that field will be halted static const size_t DEFAULT_ANOMALY_MAX_FIELD_VALUES; //! Default maximum number of time buckets to process during anomaly //! detection before ceasing to output results static const size_t DEFAULT_ANOMALY_MAX_TIME_BUCKETS; //! Default number of examples to display in results tables static const size_t DEFAULT_RESULTS_MAX_EXAMPLES; //! Default threshold for unusual probabilities to be output even if //! nothing is anomalous on a whole-system basis static const double DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD; public: //! Default constructor explicit CLimits(bool persistenceInForeground = false, double byteLimitMargin = CResourceMonitor::DEFAULT_BYTE_LIMIT_MARGIN); //! Initialise from a config file. This overwrites current settings //! with any found in the config file. Settings that are not present //! in the config file will be reset to their default values. bool init(const std::string& configFile); //! Initialise with given values for the maximum number of //! (categorisation) examples and the model memory limit (in MB). //! All other settings take their default values. void init(std::size_t maxExamples, std::size_t modelMemoryLimitMB); //! Access to settings size_t anomalyMaxTimeBuckets() const; size_t maxExamples() const; double unusualProbabilityThreshold() const; size_t memoryLimitMB() const; //! Access to the resource monitor CResourceMonitor& resourceMonitor(); private: //! Helper method for init(). template<typename FIELDTYPE> static bool processSetting(const boost::property_tree::ptree& propTree, const std::string& iniPath, const FIELDTYPE& defaultValue, FIELDTYPE& value) { try { // This get() will throw an exception if the path isn't found std::string valueStr(propTree.template get<std::string>(iniPath)); // Use our own string-to-type conversion, because what's built // into the boost::property_tree is too lax if (core::CStringUtils::stringToType(valueStr, value) == false) { LOG_ERROR(<< "Invalid value for setting " << iniPath << " : " << valueStr); return false; } } catch (boost::property_tree::ptree_error&) { LOG_DEBUG(<< "Using default value (" << defaultValue << ") for unspecified setting " << iniPath); value = defaultValue; } return true; } private: //! Maximum number of time buckets to process during anomaly detection //! before ceasing to output results size_t m_AnomalyMaxTimeBuckets; //! How many examples should we display in results tables? size_t m_MaxExamples; //! Probability threshold for results to be output double m_UnusualProbabilityThreshold; //! Size of the memory limit for the resource monitor, in MB size_t m_MemoryLimitMB; //! Resource monitor instance CResourceMonitor m_ResourceMonitor; }; } } #endif // INCLUDED_ml_model_CLimits_h

include/model/CLimits.h (57 lines of code) (raw):