include/model/CLimits.h (57 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#ifndef INCLUDED_ml_model_CLimits_h
#define INCLUDED_ml_model_CLimits_h
#include <core/CLogger.h>
#include <core/CStringUtils.h>
#include <model/CResourceMonitor.h>
#include <model/ImportExport.h>
#include <boost/property_tree/ptree.hpp>
#include <iosfwd>
#include <memory>
#include <string>
namespace ml {
namespace model {
//! \brief
//! Holds configurable limits for the models.
//!
//! DESCRIPTION:\n
//! Holds limits that prevent Ml custom search commands from
//! taking too long to run or using excessive amounts of memory.
//!
//! IMPLEMENTATION DECISIONS:\n
//! Configuration of Ml's analytics commands is stored in config
//! files which are similar in format to Windows .ini files but
//! with hash as the comment character instead of semi-colon.
//!
//! Boost's property_tree package can parse such config files, as
//! it accepts either hash or semi-colon as comment characters.
//! Therefore, we use boost::property_tree::ini_parser to load the
//! config file.
//!
//! To decouple the public interface from the config file format,
//! the boost property_tree is copied into separate member
//! variables.
//!
class MODEL_EXPORT CLimits {
public:
//! Default maximum number of distinct values of a single field before
//! analysis of that field will be halted
static const size_t DEFAULT_ANOMALY_MAX_FIELD_VALUES;
//! Default maximum number of time buckets to process during anomaly
//! detection before ceasing to output results
static const size_t DEFAULT_ANOMALY_MAX_TIME_BUCKETS;
//! Default number of examples to display in results tables
static const size_t DEFAULT_RESULTS_MAX_EXAMPLES;
//! Default threshold for unusual probabilities to be output even if
//! nothing is anomalous on a whole-system basis
static const double DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD;
public:
//! Default constructor
explicit CLimits(bool persistenceInForeground = false,
double byteLimitMargin = CResourceMonitor::DEFAULT_BYTE_LIMIT_MARGIN);
//! Initialise from a config file. This overwrites current settings
//! with any found in the config file. Settings that are not present
//! in the config file will be reset to their default values.
bool init(const std::string& configFile);
//! Initialise with given values for the maximum number of
//! (categorisation) examples and the model memory limit (in MB).
//! All other settings take their default values.
void init(std::size_t maxExamples, std::size_t modelMemoryLimitMB);
//! Access to settings
size_t anomalyMaxTimeBuckets() const;
size_t maxExamples() const;
double unusualProbabilityThreshold() const;
size_t memoryLimitMB() const;
//! Access to the resource monitor
CResourceMonitor& resourceMonitor();
private:
//! Helper method for init().
template<typename FIELDTYPE>
static bool processSetting(const boost::property_tree::ptree& propTree,
const std::string& iniPath,
const FIELDTYPE& defaultValue,
FIELDTYPE& value) {
try {
// This get() will throw an exception if the path isn't found
std::string valueStr(propTree.template get<std::string>(iniPath));
// Use our own string-to-type conversion, because what's built
// into the boost::property_tree is too lax
if (core::CStringUtils::stringToType(valueStr, value) == false) {
LOG_ERROR(<< "Invalid value for setting " << iniPath << " : " << valueStr);
return false;
}
} catch (boost::property_tree::ptree_error&) {
LOG_DEBUG(<< "Using default value (" << defaultValue
<< ") for unspecified setting " << iniPath);
value = defaultValue;
}
return true;
}
private:
//! Maximum number of time buckets to process during anomaly detection
//! before ceasing to output results
size_t m_AnomalyMaxTimeBuckets;
//! How many examples should we display in results tables?
size_t m_MaxExamples;
//! Probability threshold for results to be output
double m_UnusualProbabilityThreshold;
//! Size of the memory limit for the resource monitor, in MB
size_t m_MemoryLimitMB;
//! Resource monitor instance
CResourceMonitor m_ResourceMonitor;
};
}
}
#endif // INCLUDED_ml_model_CLimits_h