lib/maths/analytics/CBoostedTreeHyperparameters.cc (969 lines of code) (raw):

/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when * invoked by an Elasticsearch process with a license key installed that permits * use of machine learning features. You may not use this file except in * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ #include <maths/analytics/CBoostedTreeHyperparameters.h> #include <core/CLogger.h> #include <core/CMemoryDef.h> #include <core/CPersistUtils.h> #include <core/CStopWatch.h> #include <core/RestoreMacros.h> #include <maths/analytics/CBoostedTreeImpl.h> #include <maths/analytics/CDataFrameAnalysisInstrumentationInterface.h> #include <maths/common/CBasicStatistics.h> #include <maths/common/CBasicStatisticsPersist.h> #include <maths/common/CBayesianOptimisation.h> #include <maths/common/CLinearAlgebra.h> #include <maths/common/CLowess.h> #include <maths/common/CLowessDetail.h> #include <cmath> #include <limits> #include <memory> namespace ml { namespace maths { namespace analytics { using namespace boosted_tree_detail; namespace { using TVector3x1 = CBoostedTreeHyperparameters::TVector3x1; const std::size_t MIN_VALUE_INDEX{0}; const std::size_t MID_VALUE_INDEX{1}; const std::size_t MAX_VALUE_INDEX{2}; const double LINE_SEARCH_MINIMUM_RELATIVE_EI_TO_CONTINUE{0.01}; // clang-format off const std::string BAYESIAN_OPTIMIZATION_TAG{"bayesian_optimization"}; const std::string BEST_FOREST_LOSS_GAP_TAG{"best_forest_loss_gap"}; const std::string BEST_FOREST_NUMBER_KEPT_NODES_TAG{"best_forest_number_kept_nodes"}; const std::string BEST_FOREST_NUMBER_NEW_NODES_TAG{"best_forest_number_new_nodes"}; const std::string BEST_FOREST_TEST_LOSS_TAG{"best_forest_test_loss"}; const std::string CURRENT_ROUND_TAG{"current_round"}; const std::string DEPTH_PENALTY_MULTIPLIER_TAG{"depth_penalty_multiplier"}; const std::string DOWNSAMPLE_FACTOR_TAG{"downsample_factor"}; const std::string EARLY_HYPERPARAMETER_OPTIMIZATION_STOPPING_ENABLED_TAG{"early_hyperparameter_optimization_stopping_enabled"}; const std::string ETA_GROWTH_RATE_PER_TREE_TAG{"eta_growth_rate_per_tree"}; const std::string ETA_TAG{"eta"}; const std::string FEATURE_BAG_FRACTION_TAG{"feature_bag_fraction"}; const std::string LINE_SEARCH_HYPERPARAMETER_LOSSES_TAG{"line_search_hyperparameters_losses"}; const std::string LINE_SEARCH_RELEVANT_PARAMETERS_TAG{"line_search_relevant_parameters"}; const std::string LEAF_WEIGHT_PENALTY_MULTIPLIER_TAG{"leaf_weight_penalty_multiplier"}; const std::string MAXIMUM_NUMBER_TREES_TAG{"maximum_number_trees"}; const std::string MAXIMUM_OPTIMISATION_ROUNDS_PER_HYPERPARAMETER_TAG{"maximum_optimisation_rounds_per_hyperparameter"}; const std::string MEAN_FOREST_SIZE_ACCUMULATOR_TAG{"mean_forest_size_accumulator"}; const std::string MEAN_TEST_LOSS_ACCUMULATOR_TAG{"mean_test_loss_accumulator"}; const std::string NUMBER_ROUNDS_TAG{"number_rounds"}; const std::string PREDICTION_CHANGE_COST_TAG{"prediction_change_cost"}; const std::string RETRAINED_TREE_ETA_TAG{"retrained_tree_eta"}; const std::string SOFT_TREE_DEPTH_LIMIT_TAG{"soft_tree_depth_limit"}; const std::string SOFT_TREE_DEPTH_TOLERANCE_TAG{"soft_tree_depth_tolerance"}; const std::string STOP_HYPERPARAMETER_OPTIMIZATION_EARLY_TAG{"stop_hyperparameter_optimization_early"}; const std::string TREE_SIZE_PENALTY_MULTIPLIER_TAG{"tree_size_penalty_multiplier"}; const std::string TREE_TOPOLOGY_CHANGE_PENALTY_TAG{"tree_topology_change_penalty"}; // clang-format on } CBoostedTreeHyperparameters::CBoostedTreeHyperparameters() { this->saveCurrent(); this->initializeTunableHyperparameters(); } double CBoostedTreeHyperparameters::penaltyForDepth(std::size_t depth) const { return std::exp((static_cast<double>(depth) / m_SoftTreeDepthLimit.value() - 1.0) / m_SoftTreeDepthTolerance.value()); } double CBoostedTreeHyperparameters::etaForTreeAtPosition(std::size_t index) const { return std::min(m_Eta.value() * common::CTools::stable( std::pow(m_EtaGrowthRatePerTree.value(), static_cast<double>(index))), 1.0); } void CBoostedTreeHyperparameters::maximumOptimisationRoundsPerHyperparameter(std::size_t rounds) { m_MaximumOptimisationRoundsPerHyperparameter = rounds; } void CBoostedTreeHyperparameters::stopHyperparameterOptimizationEarly(bool enable) { m_EarlyHyperparameterOptimizationStoppingEnabled = enable; } void CBoostedTreeHyperparameters::bayesianOptimisationRestarts(std::size_t restarts) { m_BayesianOptimisationRestarts = restarts; } std::size_t CBoostedTreeHyperparameters::numberToTune() const { std::size_t result((m_DepthPenaltyMultiplier.fixed() ? 0 : 1) + (m_TreeSizePenaltyMultiplier.fixed() ? 0 : 1) + (m_LeafWeightPenaltyMultiplier.fixed() ? 0 : 1) + (m_SoftTreeDepthLimit.fixed() ? 0 : 1) + (m_SoftTreeDepthTolerance.fixed() ? 0 : 1) + (m_DownsampleFactor.fixed() ? 0 : 1) + (m_FeatureBagFraction.fixed() ? 0 : 1) + (m_Eta.fixed() ? 0 : 1) + (m_EtaGrowthRatePerTree.fixed() ? 0 : 1)); if (m_IncrementalTraining) { result += (m_TreeTopologyChangePenalty.fixed() ? 0 : 1) + (m_PredictionChangeCost.fixed() ? 0 : 1) + (m_RetrainedTreeEta.fixed() ? 0 : 1); } return result; } void CBoostedTreeHyperparameters::resetFineTuneSearch() { m_CurrentRound = 0; m_StopHyperparameterOptimizationEarly = false; m_BestForestTestLoss = INF; m_BestForestNumberKeptNodes = 0; m_BestForestNumberNewNodes = 0; m_MeanForestSizeAccumulator = TMeanAccumulator{}; m_MeanTestLossAccumulator = TMeanAccumulator{}; this->initializeTunableHyperparameters(); } CBoostedTreeHyperparameters::TOptionalDoubleSizePr CBoostedTreeHyperparameters::initializeFineTuneSearchInterval(const CInitializeFineTuneArguments& args, TDoubleParameter& parameter) const { if (parameter.valid(args.maxValue())) { double maxValue{parameter.toSearchValue(args.maxValue())}; double minValue{maxValue - parameter.toSearchValue(args.searchInterval())}; double meanValue{(minValue + maxValue) / 2.0}; LOG_TRACE(<< "mean value = " << meanValue); core::CStopWatch watch{true}; TVector3x1 fallback; fallback(MIN_VALUE_INDEX) = minValue; fallback(MID_VALUE_INDEX) = meanValue; fallback(MAX_VALUE_INDEX) = maxValue; auto[maybeNullInterval, lossGap, forestSize] = this->testLossLineSearch(args, minValue, maxValue); auto interval = maybeNullInterval.value_or(fallback); args.truncateParameter()(interval); LOG_TRACE(<< "search interval = [" << interval.toDelimited() << "]"); LOG_TRACE(<< "search took = " << watch.lap() << "ms"); parameter.fixToRange(parameter.fromSearchValue(interval(MIN_VALUE_INDEX)), parameter.fromSearchValue(interval(MAX_VALUE_INDEX))); parameter.set(parameter.fromSearchValue(interval(MID_VALUE_INDEX))); return TOptionalDoubleSizePr{std::in_place, lossGap, forestSize}; } parameter.fix(); return {}; } CBoostedTreeHyperparameters::TOptionalVector3x1DoubleSizeTr CBoostedTreeHyperparameters::testLossLineSearch(const CInitializeFineTuneArguments& args, double intervalLeftEnd, double intervalRightEnd) const { // This has the following steps: // 1. Search the interval [intervalLeftEnd, intervalRightEnd] using fixed // steps, // 2. Fine tune, via Bayesian Optimisation targeting expected improvement, // and stop if the expected improvement small compared to the current // minimum test loss, // 3. Fit a LOWESS model to the test losses and compute the minimum. TDoubleDoubleDoubleSizeTupleVec testLosses; this->initialTestLossLineSearch(args, intervalLeftEnd, intervalRightEnd, testLosses); if (testLosses.empty()) { return {{}, 0.0, 0}; } this->fineTuneTestLoss(args, intervalLeftEnd, intervalRightEnd, testLosses); return this->minimizeTestLoss(intervalLeftEnd, intervalRightEnd, std::move(testLosses)); } void CBoostedTreeHyperparameters::initialTestLossLineSearch( const CInitializeFineTuneArguments& args, double intervalLeftEnd, double intervalRightEnd, TDoubleDoubleDoubleSizeTupleVec& testLosses) const { auto& tree = args.tree(); testLosses.reserve(maxLineSearchIterations()); double minTestLoss{std::numeric_limits<double>::max()}; for (auto parameter : {intervalLeftEnd, (2.0 * intervalLeftEnd + intervalRightEnd) / 3.0, (intervalLeftEnd + 2.0 * intervalRightEnd) / 3.0, intervalRightEnd}) { if (args.updateParameter()(tree, parameter) == false) { tree.m_TrainingProgress.increment( (maxLineSearchIterations() - testLosses.size()) * tree.m_Hyperparameters.maximumNumberTrees().value()); break; } auto result = tree.trainForest(args.frame(), tree.m_TrainingRowMasks[0], tree.m_TestingRowMasks[0], tree.m_TrainingProgress, minTestLoss); tree.hyperparameters().captureHyperparametersAndLoss(result.s_TestLoss); double meanTestLoss{common::CBasicStatistics::mean(result.s_TestLoss)}; testLosses.emplace_back(parameter, meanTestLoss, result.s_LossGap, result.s_Forest.size()); minTestLoss = std::min(minTestLoss, meanTestLoss); } } void CBoostedTreeHyperparameters::fineTuneTestLoss(const CInitializeFineTuneArguments& args, double intervalLeftEnd, double intervalRightEnd, TDoubleDoubleDoubleSizeTupleVec& testLosses) const { using TMinAccumulator = common::CBasicStatistics::SMin<double>::TAccumulator; TMinAccumulator minTestLoss; for (const auto& testLoss : testLosses) { minTestLoss.add(std::get<1>(testLoss)); } auto boptVector = [](double parameter) { return common::SConstant<common::CBayesianOptimisation::TVector>::get(1, parameter); }; auto adjustTestLoss = [minTestLoss, &args](double parameter, double testLoss) { return args.adjustLoss()(parameter, minTestLoss[0], testLoss); }; common::CBayesianOptimisation bopt{{{intervalLeftEnd, intervalRightEnd}}}; for (auto & [ parameter, testLoss, lossGap, size ] : testLosses) { double adjustedTestLoss{adjustTestLoss(parameter, testLoss)}; bopt.add(boptVector(parameter), adjustedTestLoss, 0.0); testLoss = adjustedTestLoss; } auto& tree = args.tree(); // Ensure we choose one value based on expected improvement. std::size_t minNumberTestLosses{6}; while (testLosses.size() < maxLineSearchIterations()) { common::CBayesianOptimisation::TVector parameter; common::CBayesianOptimisation::TOptionalDouble EI; std::tie(parameter, EI) = bopt.maximumExpectedImprovement(); double threshold{LINE_SEARCH_MINIMUM_RELATIVE_EI_TO_CONTINUE * minTestLoss[0]}; LOG_TRACE(<< "EI = " << EI << " threshold to continue = " << threshold); if ((testLosses.size() >= minNumberTestLosses && EI != std::nullopt && *EI < threshold) || args.updateParameter()(tree, parameter(0)) == false) { tree.m_TrainingProgress.increment( (maxLineSearchIterations() - testLosses.size()) * tree.m_Hyperparameters.maximumNumberTrees().value()); break; } auto result = tree.trainForest(args.frame(), tree.m_TrainingRowMasks[0], tree.m_TestingRowMasks[0], tree.m_TrainingProgress, minTestLoss[0]); minTestLoss.add(common::CBasicStatistics::mean(result.s_TestLoss)); double adjustedTestLoss{adjustTestLoss( parameter(0), common::CBasicStatistics::mean(result.s_TestLoss))}; bopt.add(parameter, adjustedTestLoss, 0.0); tree.hyperparameters().captureHyperparametersAndLoss(result.s_TestLoss); testLosses.emplace_back(parameter(0), adjustedTestLoss, result.s_LossGap, result.s_Forest.size()); } std::sort(testLosses.begin(), testLosses.end()); LOG_TRACE(<< "test losses = " << testLosses); } CBoostedTreeHyperparameters::TOptionalVector3x1DoubleSizeTr CBoostedTreeHyperparameters::minimizeTestLoss(double intervalLeftEnd, double intervalRightEnd, TDoubleDoubleDoubleSizeTupleVec testLosses) const { double minLossValue{std::get<0>(*std::min_element( testLosses.begin(), testLosses.end(), [](const auto& lhs, const auto& rhs) { return std::get<1>(lhs) < std::get<1>(rhs); }))}; double minLossGap{std::numeric_limits<double>::max()}; double minSize{std::numeric_limits<double>::max()}; common::CLowess<2>::TDoubleDoublePrVec testLossCurveValues; common::CLowess<2>::TDoubleDoublePrVec lossGapCurveValues; common::CLowess<2>::TDoubleDoublePrVec forestSizeCurveValues; testLossCurveValues.reserve(testLosses.size()); lossGapCurveValues.reserve(testLosses.size()); forestSizeCurveValues.reserve(testLosses.size()); for (const auto & [ parameter, testLoss, lossGap, size ] : testLosses) { minLossGap = std::min(minLossGap, lossGap); minSize = std::min(minSize, static_cast<double>(size)); testLossCurveValues.emplace_back(parameter, testLoss); lossGapCurveValues.emplace_back(parameter, lossGap); forestSizeCurveValues.emplace_back(parameter, static_cast<double>(size)); } std::size_t numberFolds{testLosses.size()}; common::CLowess<2> testLossCurve; common::CLowess<2> lossGapCurve; common::CLowess<2> forestSizeCurve; testLossCurve.fit(std::move(testLossCurveValues), numberFolds); lossGapCurve.fit(std::move(lossGapCurveValues), numberFolds); forestSizeCurve.fit(std::move(forestSizeCurveValues), numberFolds); double bestParameter; double bestParameterTestLoss; std::tie(bestParameter, bestParameterTestLoss) = testLossCurve.minimum(); double lossGap{std::max(lossGapCurve.predict(bestParameter), minLossGap)}; double forestSize{std::max(forestSizeCurve.predict(bestParameter), minSize)}; LOG_TRACE(<< "best parameter = " << bestParameter << ", test loss = " << bestParameterTestLoss << ", loss gap = " << lossGap << ", forest size = " << forestSize); double width{(intervalRightEnd - intervalLeftEnd) / static_cast<double>(maxLineSearchIterations())}; intervalLeftEnd = std::min(bestParameter - width, minLossValue); intervalRightEnd = std::max(bestParameter + width, minLossValue); LOG_TRACE(<< "interval = [" << intervalLeftEnd << "," << intervalRightEnd << "]"); return {TVector3x1{{intervalLeftEnd, bestParameter, intervalRightEnd}}, lossGap, static_cast<std::size_t>(std::ceil(forestSize))}; } void CBoostedTreeHyperparameters::initializeFineTuneSearch(double lossGap, std::size_t numberTrees) { // We need sensible bounds for the region we'll search for optimal values. // For all parameters where we have initial estimates we use bounds of the // form a * initial and b * initial for a < 1 < b. For other parameters we // use a fixed range. // // We also parameterise so the probability any subinterval contains a good // value is proportional to its length. For parameters whose difference is // naturally measured as a ratio, i.e. diff(p_1, p_0) = p_1 / p_0 for p_0 // less than p_1, This translates to using log parameter values. // If we decided to fix any parameter we should just exclude all losses // which don't matched the fixed parameter value. m_LineSearchHyperparameterLosses.erase( std::remove_if(m_LineSearchHyperparameterLosses.begin(), m_LineSearchHyperparameterLosses.end(), [this](const auto& loss) { bool result{false}; this->foreachTunableParameter([&](std::size_t i, const auto& parameter) { result |= parameter.fixed() && std::get<0>(loss)(i) != parameter.value(); }); return result; }), m_LineSearchHyperparameterLosses.end()); auto recordedHyperparameters = m_TunableHyperparameters; this->initializeTunableHyperparameters(); m_LineSearchRelevantParameters.reserve(m_TunableHyperparameters.size()); for (auto parameter : m_TunableHyperparameters) { auto coordinate = std::find(recordedHyperparameters.begin(), recordedHyperparameters.end(), parameter); if (coordinate != recordedHyperparameters.end()) { m_LineSearchRelevantParameters.push_back( coordinate - recordedHyperparameters.begin()); } } common::CBayesianOptimisation::TDoubleDoublePrVec boundingBox; boundingBox.reserve(m_TunableHyperparameters.size()); this->foreachTunableParameter([&](std::size_t, const auto& parameter) { boundingBox.push_back(parameter.searchRange()); }); LOG_TRACE(<< "hyperparameter search bounding box = " << boundingBox); m_BayesianOptimization = std::make_unique<common::CBayesianOptimisation>( std::move(boundingBox), m_BayesianOptimisationRestarts.value_or(common::CBayesianOptimisation::RESTARTS)); m_CurrentRound = 0; m_NumberRounds = m_MaximumOptimisationRoundsPerHyperparameter * m_TunableHyperparameters.size(); this->checkIfCanSkipFineTuneSearch(); // We purposely don't record the test loss because it isn't comparable with the // value computed in fine tune. However, an estimate of the test train loss gap // is needed if we incrementally train. m_BestForestLossGap = lossGap; // The stored number of trees is used for final train when we train on // all the data and so can't measure when to stop. std::size_t numberTreesToRestore{m_MaximumNumberTrees.value()}; m_MaximumNumberTrees.set(numberTrees); this->saveCurrent(); m_MaximumNumberTrees.set(numberTreesToRestore); } void CBoostedTreeHyperparameters::checkIfCanSkipFineTuneSearch() { if (m_EarlyHyperparameterOptimizationStoppingEnabled && m_IncrementalTraining == false) { auto toRestore = std::make_unique<common::CBayesianOptimisation>( m_BayesianOptimization->boundingBox(), m_BayesianOptimization->restarts()); std::swap(toRestore, m_BayesianOptimization); // Add information about observed line search training runs to the GP. for (auto & [ parameters, meanTestLoss, testLossVariance ] : m_LineSearchHyperparameterLosses) { TVector parameters_{static_cast<TVector::TIndexType>( m_LineSearchRelevantParameters.size())}; for (std::size_t i = 0; i < m_LineSearchRelevantParameters.size(); ++i) { parameters_(i) = parameters(m_LineSearchRelevantParameters[i]); } m_BayesianOptimization->add(std::move(parameters_), meanTestLoss, testLossVariance); } // Perform three rounds of kernel parameter optimization. m_BayesianOptimization->maximumLikelihoodKernel(3); m_StopHyperparameterOptimizationEarly = this->optimisationMakingNoProgress(); if (m_StopHyperparameterOptimizationEarly) { LOG_INFO(<< "Skipping fine-tune hyperparameters"); } else { // Only reset Bayesian optimisation if we are going to fine tune or // else we won't be able to compute hyperparameter importances. std::swap(m_BayesianOptimization, toRestore); } } } bool CBoostedTreeHyperparameters::optimisationMakingNoProgress() const { if (m_EarlyHyperparameterOptimizationStoppingEnabled == false) { return false; } if (m_StopHyperparameterOptimizationEarly) { return true; } double cov{m_BayesianOptimization->excessCoefficientOfVariation()}; LOG_TRACE(<< "excessCoefficientOfVariation " << cov); return cov < 1e-3; } void CBoostedTreeHyperparameters::startFineTuneSearch() { std::size_t dimension{m_TunableHyperparameters.size()}; std::size_t n{m_NumberRounds / 3 + 1}; common::CSampling::sobolSequenceSample(dimension, n, m_HyperparameterSamples); } void CBoostedTreeHyperparameters::addRoundStats(const TMeanAccumulator& meanForestSizeAccumulator, double meanTestLoss) { m_MeanForestSizeAccumulator += meanForestSizeAccumulator; m_MeanTestLossAccumulator.add(meanTestLoss); } bool CBoostedTreeHyperparameters::selectNext(const TMeanVarAccumulator& testLossMoments, double explainedVariance) { TVector parameters{this->currentParametersVector()}; double meanTestLoss{common::CBasicStatistics::mean(testLossMoments)}; double testLossVariance{common::CBasicStatistics::variance(testLossMoments)}; LOG_TRACE(<< "round = " << m_CurrentRound << ", loss = " << meanTestLoss << ", total variance = " << testLossVariance << ", explained variance = " << explainedVariance); LOG_TRACE(<< "parameters = " << this->print()); m_BayesianOptimization->add(parameters, meanTestLoss, testLossVariance); // One fold might have examples which are harder to predict on average than // another fold, particularly if the sample size is small. What we really care // about is the variation between fold loss values after accounting for any // systematic effect due to sampling. Running for multiple rounds allows us // to estimate this effect and we remove it when characterising the uncertainty // in the loss values in the Gaussian Process. m_BayesianOptimization->explainedErrorVariance(explainedVariance); TVector minBoundary; TVector maxBoundary; std::tie(minBoundary, maxBoundary) = m_BayesianOptimization->boundingBox(); // We don't stop early for incremental training because iterations are fast. bool canStopEarly{m_CurrentRound > 3 && m_IncrementalTraining == false}; if (m_CurrentRound < m_HyperparameterSamples.size()) { std::copy(m_HyperparameterSamples[m_CurrentRound].begin(), m_HyperparameterSamples[m_CurrentRound].end(), parameters.data()); parameters = minBoundary + parameters.cwiseProduct(maxBoundary - minBoundary); if (canStopEarly) { m_BayesianOptimization->maximumLikelihoodKernel(3); } } else { std::tie(parameters, std::ignore) = m_BayesianOptimization->maximumExpectedImprovement(3); } if (canStopEarly && this->optimisationMakingNoProgress()) { m_StopHyperparameterOptimizationEarly = true; return false; } this->setHyperparameterValues(std::move(parameters)); return true; } bool CBoostedTreeHyperparameters::captureBest(const TMeanVarAccumulator& testLossMoments, double meanLossGap, double numberKeptNodes, double numberNewNodes, std::size_t numberTrees) { // We capture the parameters with the lowest error at one standard // deviation above the mean. If the mean error improvement is marginal // we prefer the solution with the least variation across the folds. double testLoss{common::CBasicStatistics::mean(testLossMoments)}; double penalizedTestLoss{testLoss + this->modelSizePenalty(numberKeptNodes, numberNewNodes)}; double bestPenalizedTestLoss{ m_BestForestTestLoss + this->modelSizePenalty(m_BestForestNumberKeptNodes, m_BestForestNumberNewNodes)}; if (penalizedTestLoss < bestPenalizedTestLoss) { m_BestForestTestLoss = testLoss; m_BestForestNumberKeptNodes = numberKeptNodes; m_BestForestNumberNewNodes = numberNewNodes; m_BestForestLossGap = meanLossGap; // During hyperparameter search we have a fixed upper bound on // the number of trees which we use for every round and we stop // adding trees early based on cross-validation loss. The stored // number of trees is used for final train when we train on all // the data and so can't measure when to stop. std::size_t numberTreesToRestore{m_MaximumNumberTrees.value()}; m_MaximumNumberTrees.set(numberTrees); this->saveCurrent(); m_MaximumNumberTrees.set(numberTreesToRestore); return true; } return false; } double CBoostedTreeHyperparameters::modelSizePenalty(double numberKeptNodes, double numberNewNodes) const { // eps * "forest number nodes" * E[GP] / "average forest number nodes" to meanLoss. return (m_MeanForestSizeAccumulator.s_Count != 0 && m_MeanTestLossAccumulator.s_Count != 0) ? RELATIVE_SIZE_PENALTY * (numberKeptNodes + numberNewNodes) / (numberKeptNodes + common::CBasicStatistics::mean(m_MeanForestSizeAccumulator)) * common::CBasicStatistics::mean(m_MeanTestLossAccumulator) : 0.0; } CBoostedTreeHyperparameters::THyperparameterImportanceVec CBoostedTreeHyperparameters::importances() const { THyperparameterImportanceVec importances; importances.reserve(NUMBER_HYPERPARAMETERS); auto anovaMainEffects = m_BayesianOptimization->anovaMainEffects(); for (std::size_t i = 0; i < static_cast<std::size_t>(NUMBER_HYPERPARAMETERS); ++i) { auto hyperparameter = static_cast<EHyperparameter>(i); double hyperparameterValue; SHyperparameterImportance::EType hyperparameterType{SHyperparameterImportance::E_Double}; bool skip{false}; switch (hyperparameter) { case E_Alpha: hyperparameterValue = m_DepthPenaltyMultiplier.value(); break; case E_DownsampleFactor: hyperparameterValue = m_DownsampleFactor.value(); break; case E_Eta: hyperparameterValue = m_Eta.value(); break; case E_EtaGrowthRatePerTree: hyperparameterValue = m_EtaGrowthRatePerTree.value(); break; case E_FeatureBagFraction: hyperparameterValue = m_FeatureBagFraction.value(); break; case E_Gamma: hyperparameterValue = m_TreeSizePenaltyMultiplier.value(); break; case E_Lambda: hyperparameterValue = m_LeafWeightPenaltyMultiplier.value(); break; case E_MaximumNumberTrees: hyperparameterValue = static_cast<double>(m_MaximumNumberTrees.value()); hyperparameterType = SHyperparameterImportance::E_Uint64; break; case E_SoftTreeDepthLimit: hyperparameterValue = m_SoftTreeDepthLimit.value(); break; case E_SoftTreeDepthTolerance: hyperparameterValue = m_SoftTreeDepthTolerance.value(); break; case E_PredictionChangeCost: hyperparameterValue = m_PredictionChangeCost.value(); skip = (m_IncrementalTraining == false); break; case E_RetrainedTreeEta: hyperparameterValue = m_RetrainedTreeEta.value(); skip = (m_IncrementalTraining == false); break; case E_TreeTopologyChangePenalty: hyperparameterValue = m_TreeTopologyChangePenalty.value(); skip = (m_IncrementalTraining == false); break; } if (skip) { continue; } double absoluteImportance{0.0}; double relativeImportance{0.0}; bool supplied{true}; auto tunableIndex = std::distance(m_TunableHyperparameters.begin(), std::find(m_TunableHyperparameters.begin(), m_TunableHyperparameters.end(), i)); if (static_cast<std::size_t>(tunableIndex) < m_TunableHyperparameters.size()) { supplied = false; std::tie(absoluteImportance, relativeImportance) = anovaMainEffects[tunableIndex]; } importances.push_back({hyperparameter, hyperparameterValue, absoluteImportance, relativeImportance, supplied, hyperparameterType}); } return importances; } void CBoostedTreeHyperparameters::recordHyperparameters( CDataFrameTrainBoostedTreeInstrumentationInterface& instrumentation) const { auto& hyperparameters = instrumentation.hyperparameters(); hyperparameters.s_Eta = m_Eta.value(); hyperparameters.s_RetrainedTreeEta = m_RetrainedTreeEta.value(); hyperparameters.s_DepthPenaltyMultiplier = m_DepthPenaltyMultiplier.value(); hyperparameters.s_SoftTreeDepthLimit = m_SoftTreeDepthLimit.value(); hyperparameters.s_SoftTreeDepthTolerance = m_SoftTreeDepthTolerance.value(); hyperparameters.s_TreeSizePenaltyMultiplier = m_TreeSizePenaltyMultiplier.value(); hyperparameters.s_LeafWeightPenaltyMultiplier = m_LeafWeightPenaltyMultiplier.value(); hyperparameters.s_TreeTopologyChangePenalty = m_TreeTopologyChangePenalty.value(); hyperparameters.s_DownsampleFactor = m_DownsampleFactor.value(); hyperparameters.s_MaxTrees = m_MaximumNumberTrees.value(); hyperparameters.s_FeatureBagFraction = m_FeatureBagFraction.value(); hyperparameters.s_PredictionChangeCost = m_PredictionChangeCost.value(); hyperparameters.s_EtaGrowthRatePerTree = m_EtaGrowthRatePerTree.value(); hyperparameters.s_MaxOptimizationRoundsPerHyperparameter = m_MaximumOptimisationRoundsPerHyperparameter; } void CBoostedTreeHyperparameters::checkSearchInvariants() const { if (m_BayesianOptimization == nullptr) { HANDLE_FATAL(<< "Internal error: must supply an optimizer. Please report this problem."); } } void CBoostedTreeHyperparameters::checkRestoredInvariants(bool expectOptimizerInitialized) const { if (expectOptimizerInitialized) { VIOLATES_INVARIANT_NO_EVALUATION(m_BayesianOptimization, ==, nullptr); } VIOLATES_INVARIANT(m_CurrentRound, >, m_NumberRounds); for (const auto& samples : m_HyperparameterSamples) { VIOLATES_INVARIANT(m_TunableHyperparameters.size(), !=, samples.size()); } } std::size_t CBoostedTreeHyperparameters::estimateMemoryUsage() const { std::size_t numberToTune{this->numberToTune()}; return sizeof(*this) + numberToTune * sizeof(int) + // m_TunableHyperparameters (m_NumberRounds / 3 + 1) * numberToTune * sizeof(double) + // m_HyperparameterSamples common::CBayesianOptimisation::estimateMemoryUsage(numberToTune, m_NumberRounds) + numberToTune * sizeof(std::size_t) + // m_LineSearchRelevantParameters numberToTune * maxLineSearchIterations() * // m_LineSearchHyperparameterLosses (sizeof(TVectorDoubleDoubleTr) + numberToTune * sizeof(double)); } std::size_t CBoostedTreeHyperparameters::memoryUsage() const { std::size_t mem{core::memory::dynamicSize(m_TunableHyperparameters)}; mem += core::memory::dynamicSize(m_HyperparameterSamples); mem += core::memory::dynamicSize(m_BayesianOptimization); mem += core::memory::dynamicSize(m_LineSearchRelevantParameters); mem += core::memory::dynamicSize(m_LineSearchHyperparameterLosses); return mem; } void CBoostedTreeHyperparameters::acceptPersistInserter(core::CStatePersistInserter& inserter) const { core::CPersistUtils::persistIfNotNull(BAYESIAN_OPTIMIZATION_TAG, m_BayesianOptimization, inserter); core::CPersistUtils::persist(BEST_FOREST_LOSS_GAP_TAG, m_BestForestLossGap, inserter); core::CPersistUtils::persist(BEST_FOREST_TEST_LOSS_TAG, m_BestForestTestLoss, inserter); core::CPersistUtils::persist(BEST_FOREST_NUMBER_KEPT_NODES_TAG, m_BestForestNumberKeptNodes, inserter); core::CPersistUtils::persist(BEST_FOREST_NUMBER_NEW_NODES_TAG, m_BestForestNumberNewNodes, inserter); core::CPersistUtils::persist(CURRENT_ROUND_TAG, m_CurrentRound, inserter); core::CPersistUtils::persist(DEPTH_PENALTY_MULTIPLIER_TAG, m_DepthPenaltyMultiplier, inserter); core::CPersistUtils::persist(DOWNSAMPLE_FACTOR_TAG, m_DownsampleFactor, inserter); core::CPersistUtils::persist(ETA_GROWTH_RATE_PER_TREE_TAG, m_EtaGrowthRatePerTree, inserter); core::CPersistUtils::persist(ETA_TAG, m_Eta, inserter); core::CPersistUtils::persist(FEATURE_BAG_FRACTION_TAG, m_FeatureBagFraction, inserter); core::CPersistUtils::persist(LEAF_WEIGHT_PENALTY_MULTIPLIER_TAG, m_LeafWeightPenaltyMultiplier, inserter); core::CPersistUtils::persist(LINE_SEARCH_HYPERPARAMETER_LOSSES_TAG, m_LineSearchHyperparameterLosses, inserter); core::CPersistUtils::persist(LINE_SEARCH_RELEVANT_PARAMETERS_TAG, m_LineSearchRelevantParameters, inserter); core::CPersistUtils::persist(MAXIMUM_NUMBER_TREES_TAG, m_MaximumNumberTrees, inserter); core::CPersistUtils::persist(MAXIMUM_OPTIMISATION_ROUNDS_PER_HYPERPARAMETER_TAG, m_MaximumOptimisationRoundsPerHyperparameter, inserter); core::CPersistUtils::persist(MEAN_FOREST_SIZE_ACCUMULATOR_TAG, m_MeanForestSizeAccumulator, inserter); core::CPersistUtils::persist(MEAN_TEST_LOSS_ACCUMULATOR_TAG, m_MeanTestLossAccumulator, inserter); core::CPersistUtils::persist(NUMBER_ROUNDS_TAG, m_NumberRounds, inserter); core::CPersistUtils::persist(PREDICTION_CHANGE_COST_TAG, m_PredictionChangeCost, inserter); core::CPersistUtils::persist(RETRAINED_TREE_ETA_TAG, m_RetrainedTreeEta, inserter); core::CPersistUtils::persist(SOFT_TREE_DEPTH_LIMIT_TAG, m_SoftTreeDepthLimit, inserter); core::CPersistUtils::persist(SOFT_TREE_DEPTH_TOLERANCE_TAG, m_SoftTreeDepthTolerance, inserter); core::CPersistUtils::persist(EARLY_HYPERPARAMETER_OPTIMIZATION_STOPPING_ENABLED_TAG, m_EarlyHyperparameterOptimizationStoppingEnabled, inserter); core::CPersistUtils::persist(STOP_HYPERPARAMETER_OPTIMIZATION_EARLY_TAG, m_StopHyperparameterOptimizationEarly, inserter); core::CPersistUtils::persist(TREE_SIZE_PENALTY_MULTIPLIER_TAG, m_TreeSizePenaltyMultiplier, inserter); core::CPersistUtils::persist(TREE_TOPOLOGY_CHANGE_PENALTY_TAG, m_TreeTopologyChangePenalty, inserter); // m_TunableHyperparameters is not persisted explicitly, it is re-generated // from overriden hyperparameters. // m_HyperparameterSamples is not persisted explicitly, it is re-generated. } bool CBoostedTreeHyperparameters::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name{traverser.name()}; RESTORE_NO_ERROR(BAYESIAN_OPTIMIZATION_TAG, m_BayesianOptimization = std::make_unique<common::CBayesianOptimisation>(traverser)) RESTORE(BEST_FOREST_LOSS_GAP_TAG, core::CPersistUtils::restore(BEST_FOREST_LOSS_GAP_TAG, m_BestForestLossGap, traverser)) RESTORE(BEST_FOREST_TEST_LOSS_TAG, core::CPersistUtils::restore(BEST_FOREST_TEST_LOSS_TAG, m_BestForestTestLoss, traverser)) RESTORE(BEST_FOREST_NUMBER_KEPT_NODES_TAG, core::CPersistUtils::restore(BEST_FOREST_NUMBER_KEPT_NODES_TAG, m_BestForestNumberKeptNodes, traverser)) RESTORE(BEST_FOREST_NUMBER_NEW_NODES_TAG, core::CPersistUtils::restore(BEST_FOREST_NUMBER_NEW_NODES_TAG, m_BestForestNumberNewNodes, traverser)) RESTORE(CURRENT_ROUND_TAG, core::CPersistUtils::restore(CURRENT_ROUND_TAG, m_CurrentRound, traverser)) RESTORE(DEPTH_PENALTY_MULTIPLIER_TAG, core::CPersistUtils::restore(DEPTH_PENALTY_MULTIPLIER_TAG, m_DepthPenaltyMultiplier, traverser)) RESTORE(DOWNSAMPLE_FACTOR_TAG, core::CPersistUtils::restore(DOWNSAMPLE_FACTOR_TAG, m_DownsampleFactor, traverser)) RESTORE(EARLY_HYPERPARAMETER_OPTIMIZATION_STOPPING_ENABLED_TAG, core::CPersistUtils::restore( EARLY_HYPERPARAMETER_OPTIMIZATION_STOPPING_ENABLED_TAG, m_EarlyHyperparameterOptimizationStoppingEnabled, traverser)) RESTORE(ETA_TAG, core::CPersistUtils::restore(ETA_TAG, m_Eta, traverser)) RESTORE(ETA_GROWTH_RATE_PER_TREE_TAG, core::CPersistUtils::restore(ETA_GROWTH_RATE_PER_TREE_TAG, m_EtaGrowthRatePerTree, traverser)) RESTORE(FEATURE_BAG_FRACTION_TAG, core::CPersistUtils::restore(FEATURE_BAG_FRACTION_TAG, m_FeatureBagFraction, traverser)) RESTORE(LINE_SEARCH_HYPERPARAMETER_LOSSES_TAG, core::CPersistUtils::restore(LINE_SEARCH_HYPERPARAMETER_LOSSES_TAG, m_LineSearchHyperparameterLosses, traverser)) RESTORE(LINE_SEARCH_RELEVANT_PARAMETERS_TAG, core::CPersistUtils::restore(LINE_SEARCH_RELEVANT_PARAMETERS_TAG, m_LineSearchRelevantParameters, traverser)) RESTORE(LEAF_WEIGHT_PENALTY_MULTIPLIER_TAG, core::CPersistUtils::restore(LEAF_WEIGHT_PENALTY_MULTIPLIER_TAG, m_LeafWeightPenaltyMultiplier, traverser)) RESTORE(MAXIMUM_OPTIMISATION_ROUNDS_PER_HYPERPARAMETER_TAG, core::CPersistUtils::restore( MAXIMUM_OPTIMISATION_ROUNDS_PER_HYPERPARAMETER_TAG, m_MaximumOptimisationRoundsPerHyperparameter, traverser)) RESTORE(MAXIMUM_NUMBER_TREES_TAG, core::CPersistUtils::restore(MAXIMUM_NUMBER_TREES_TAG, m_MaximumNumberTrees, traverser)) RESTORE(MEAN_FOREST_SIZE_ACCUMULATOR_TAG, core::CPersistUtils::restore(MEAN_FOREST_SIZE_ACCUMULATOR_TAG, m_MeanForestSizeAccumulator, traverser)) RESTORE(MEAN_TEST_LOSS_ACCUMULATOR_TAG, core::CPersistUtils::restore(MEAN_TEST_LOSS_ACCUMULATOR_TAG, m_MeanTestLossAccumulator, traverser)) RESTORE(NUMBER_ROUNDS_TAG, core::CPersistUtils::restore(NUMBER_ROUNDS_TAG, m_NumberRounds, traverser)) RESTORE(PREDICTION_CHANGE_COST_TAG, core::CPersistUtils::restore(PREDICTION_CHANGE_COST_TAG, m_PredictionChangeCost, traverser)) RESTORE(RETRAINED_TREE_ETA_TAG, core::CPersistUtils::restore(RETRAINED_TREE_ETA_TAG, m_RetrainedTreeEta, traverser)) RESTORE(SOFT_TREE_DEPTH_LIMIT_TAG, core::CPersistUtils::restore(SOFT_TREE_DEPTH_LIMIT_TAG, m_SoftTreeDepthLimit, traverser)) RESTORE(SOFT_TREE_DEPTH_TOLERANCE_TAG, core::CPersistUtils::restore(SOFT_TREE_DEPTH_TOLERANCE_TAG, m_SoftTreeDepthTolerance, traverser)) RESTORE(STOP_HYPERPARAMETER_OPTIMIZATION_EARLY_TAG, core::CPersistUtils::restore(STOP_HYPERPARAMETER_OPTIMIZATION_EARLY_TAG, m_StopHyperparameterOptimizationEarly, traverser)) RESTORE(TREE_SIZE_PENALTY_MULTIPLIER_TAG, core::CPersistUtils::restore(TREE_SIZE_PENALTY_MULTIPLIER_TAG, m_TreeSizePenaltyMultiplier, traverser)) RESTORE(TREE_TOPOLOGY_CHANGE_PENALTY_TAG, core::CPersistUtils::restore(TREE_TOPOLOGY_CHANGE_PENALTY_TAG, m_TreeTopologyChangePenalty, traverser)) } while (traverser.next()); this->initializeTunableHyperparameters(); return true; } std::uint64_t CBoostedTreeHyperparameters::checksum(std::uint64_t seed) const { seed = common::CChecksum::calculate(seed, m_BayesianOptimization); seed = common::CChecksum::calculate(seed, m_BestForestLossGap); seed = common::CChecksum::calculate(seed, m_BestForestTestLoss); seed = common::CChecksum::calculate(seed, m_BestForestNumberKeptNodes); seed = common::CChecksum::calculate(seed, m_BestForestNumberNewNodes); seed = common::CChecksum::calculate(seed, m_CurrentRound); seed = common::CChecksum::calculate(seed, m_DepthPenaltyMultiplier); seed = common::CChecksum::calculate(seed, m_DownsampleFactor); seed = common::CChecksum::calculate(seed, m_EarlyHyperparameterOptimizationStoppingEnabled); seed = common::CChecksum::calculate(seed, m_Eta); seed = common::CChecksum::calculate(seed, m_EtaGrowthRatePerTree); seed = common::CChecksum::calculate(seed, m_FeatureBagFraction); seed = common::CChecksum::calculate(seed, m_HyperparameterSamples); seed = common::CChecksum::calculate(seed, m_LeafWeightPenaltyMultiplier); seed = common::CChecksum::calculate(seed, m_LineSearchRelevantParameters); seed = common::CChecksum::calculate(seed, m_LineSearchHyperparameterLosses); seed = common::CChecksum::calculate(seed, m_MaximumNumberTrees); seed = common::CChecksum::calculate(seed, m_MaximumOptimisationRoundsPerHyperparameter); seed = common::CChecksum::calculate(seed, m_MeanForestSizeAccumulator); seed = common::CChecksum::calculate(seed, m_MeanTestLossAccumulator); seed = common::CChecksum::calculate(seed, m_NumberRounds); seed = common::CChecksum::calculate(seed, m_PredictionChangeCost); seed = common::CChecksum::calculate(seed, m_RetrainedTreeEta); seed = common::CChecksum::calculate(seed, m_SoftTreeDepthLimit); seed = common::CChecksum::calculate(seed, m_SoftTreeDepthTolerance); seed = common::CChecksum::calculate(seed, m_StopHyperparameterOptimizationEarly); seed = common::CChecksum::calculate(seed, m_TreeSizePenaltyMultiplier); seed = common::CChecksum::calculate(seed, m_TreeTopologyChangePenalty); return common::CChecksum::calculate(seed, m_TunableHyperparameters); } std::string CBoostedTreeHyperparameters::print() const { return "(\ndepth penalty multiplier = " + m_DepthPenaltyMultiplier.print() + "\nsoft depth limit = " + m_SoftTreeDepthLimit.print() + "\nsoft depth tolerance = " + m_SoftTreeDepthTolerance.print() + "\ntree size penalty multiplier = " + m_TreeSizePenaltyMultiplier.print() + "\nleaf weight penalty multiplier = " + m_LeafWeightPenaltyMultiplier.print() + "\ntree topology change penalty = " + m_TreeTopologyChangePenalty.print() + "\ndownsample factor = " + m_DownsampleFactor.print() + "\nfeature bag fraction = " + m_FeatureBagFraction.print() + "\neta = " + m_Eta.print() + "\neta growth rate per tree = " + m_EtaGrowthRatePerTree.print() + "\nretrained tree eta = " + m_RetrainedTreeEta.print() + "\nprediction change cost = " + m_PredictionChangeCost.print() + "\nmaximum number trees = " + m_MaximumNumberTrees.print() + "\n)"; } CBoostedTreeHyperparameters::TStrVec CBoostedTreeHyperparameters::names() { return {DOWNSAMPLE_FACTOR_TAG, ETA_TAG, ETA_GROWTH_RATE_PER_TREE_TAG, RETRAINED_TREE_ETA_TAG, FEATURE_BAG_FRACTION_TAG, PREDICTION_CHANGE_COST_TAG, DEPTH_PENALTY_MULTIPLIER_TAG, TREE_SIZE_PENALTY_MULTIPLIER_TAG, LEAF_WEIGHT_PENALTY_MULTIPLIER_TAG, SOFT_TREE_DEPTH_LIMIT_TAG, SOFT_TREE_DEPTH_TOLERANCE_TAG, TREE_TOPOLOGY_CHANGE_PENALTY_TAG}; } void CBoostedTreeHyperparameters::initializeTunableHyperparameters() { m_TunableHyperparameters.clear(); m_TunableHyperparameters.reserve(NUMBER_HYPERPARAMETERS); auto addTrainHyperparameter = [&](EHyperparameter type, const TDoubleParameter& parameter) { if (parameter.fixed() == false) { m_TunableHyperparameters.push_back(type); } }; auto addIncrementalHyperparameter = [&](EHyperparameter type, const TDoubleParameter& parameter) { if (m_IncrementalTraining && (parameter.fixed() == false)) { m_TunableHyperparameters.push_back(type); } }; for (int i = 0; i < static_cast<int>(NUMBER_HYPERPARAMETERS); ++i) { switch (static_cast<EHyperparameter>(i)) { // Train hyperparameters. case E_DownsampleFactor: addTrainHyperparameter(E_DownsampleFactor, m_DownsampleFactor); break; case E_Alpha: addTrainHyperparameter(E_Alpha, m_DepthPenaltyMultiplier); break; case E_Lambda: addTrainHyperparameter(E_Lambda, m_LeafWeightPenaltyMultiplier); break; case E_Gamma: addTrainHyperparameter(E_Gamma, m_TreeSizePenaltyMultiplier); break; case E_SoftTreeDepthLimit: addTrainHyperparameter(E_SoftTreeDepthLimit, m_SoftTreeDepthLimit); break; case E_SoftTreeDepthTolerance: addTrainHyperparameter(E_SoftTreeDepthTolerance, m_SoftTreeDepthTolerance); break; case E_Eta: addTrainHyperparameter(E_Eta, m_Eta); break; case E_EtaGrowthRatePerTree: if ((m_Eta.fixed() || m_EtaGrowthRatePerTree.fixed()) == false) { m_TunableHyperparameters.push_back(E_EtaGrowthRatePerTree); } break; case E_FeatureBagFraction: addTrainHyperparameter(E_FeatureBagFraction, m_FeatureBagFraction); break; case E_PredictionChangeCost: addIncrementalHyperparameter(E_PredictionChangeCost, m_PredictionChangeCost); break; case E_RetrainedTreeEta: addIncrementalHyperparameter(E_RetrainedTreeEta, m_RetrainedTreeEta); break; case E_TreeTopologyChangePenalty: addIncrementalHyperparameter(E_TreeTopologyChangePenalty, m_TreeTopologyChangePenalty); break; case E_MaximumNumberTrees: // Maximum number trees is not tuned directly and estimated from // loss curves. break; } } } void CBoostedTreeHyperparameters::saveCurrent() { m_DepthPenaltyMultiplier.save(); m_TreeSizePenaltyMultiplier.save(); m_LeafWeightPenaltyMultiplier.save(); m_SoftTreeDepthLimit.save(); m_SoftTreeDepthTolerance.save(); m_TreeTopologyChangePenalty.save(); m_DownsampleFactor.save(); m_FeatureBagFraction.save(); m_Eta.save(); m_EtaGrowthRatePerTree.save(); m_RetrainedTreeEta.save(); m_PredictionChangeCost.save(); m_MaximumNumberTrees.save(); } void CBoostedTreeHyperparameters::restoreBest() { m_DepthPenaltyMultiplier.load(); m_TreeSizePenaltyMultiplier.load(); m_LeafWeightPenaltyMultiplier.load(); m_SoftTreeDepthLimit.load(); m_SoftTreeDepthTolerance.load(); m_TreeTopologyChangePenalty.load(); m_DownsampleFactor.load(); m_FeatureBagFraction.load(); m_Eta.load(); m_EtaGrowthRatePerTree.load(); m_RetrainedTreeEta.load(); m_PredictionChangeCost.load(); m_MaximumNumberTrees.load(); LOG_TRACE(<< "loss* = " << m_BestForestTestLoss); LOG_TRACE(<< "parameters* = " << this->print()); } void CBoostedTreeHyperparameters::captureScale() { m_DepthPenaltyMultiplier.captureScale(); m_TreeSizePenaltyMultiplier.captureScale(); m_LeafWeightPenaltyMultiplier.captureScale(); m_SoftTreeDepthLimit.captureScale(); m_SoftTreeDepthTolerance.captureScale(); m_TreeTopologyChangePenalty.captureScale(); m_DownsampleFactor.captureScale(); m_FeatureBagFraction.captureScale(); m_Eta.captureScale(); m_EtaGrowthRatePerTree.captureScale(); m_RetrainedTreeEta.captureScale(); m_PredictionChangeCost.captureScale(); m_MaximumNumberTrees.captureScale(); } void CBoostedTreeHyperparameters::captureHyperparametersAndLoss(const TMeanVarAccumulator& testLoss) { if (m_EarlyHyperparameterOptimizationStoppingEnabled) { m_LineSearchHyperparameterLosses.emplace_back( this->currentParametersVector(), common::CBasicStatistics::mean(testLoss), common::CBasicStatistics::variance(testLoss)); } } CBoostedTreeHyperparameters::TVector CBoostedTreeHyperparameters::currentParametersVector() const { TVector parameters{m_TunableHyperparameters.size()}; this->foreachTunableParameter([&](std::size_t i, const auto& parameter) { parameters(i) = parameter.toSearchValue(); }); return parameters; } void CBoostedTreeHyperparameters::setHyperparameterValues(TVector parameters) { TVector minBoundary; TVector maxBoundary; std::tie(minBoundary, maxBoundary) = m_BayesianOptimization->boundingBox(); // Downsampling directly affects the loss terms: it multiplies the sums over // gradients and Hessians in expectation by the downsample factor. To preserve // the same effect for regularisers we need to scale these terms by the same // multiplier. double scale{1.0}; if (m_ScalingDisabled == false && m_DownsampleFactor.fixed() == false) { auto i = std::distance(m_TunableHyperparameters.begin(), std::find(m_TunableHyperparameters.begin(), m_TunableHyperparameters.end(), E_DownsampleFactor)); if (static_cast<std::size_t>(i) < m_TunableHyperparameters.size()) { scale = std::min( 1.0, 2.0 * m_DownsampleFactor.fromSearchValue(parameters(i)) / (m_DownsampleFactor.fromSearchValue(minBoundary(i)) + m_DownsampleFactor.fromSearchValue(maxBoundary(i)))); } } // Write parameters for next round. for (std::size_t i = 0; i < m_TunableHyperparameters.size(); ++i) { switch (m_TunableHyperparameters[i]) { case E_Alpha: m_DepthPenaltyMultiplier .set(m_DepthPenaltyMultiplier.fromSearchValue(parameters(i))) .scale(scale); break; case E_DownsampleFactor: m_DownsampleFactor.set(m_DownsampleFactor.fromSearchValue(parameters(i))); break; case E_Eta: m_Eta.set(m_Eta.fromSearchValue(parameters(i))); break; case E_EtaGrowthRatePerTree: m_EtaGrowthRatePerTree.set( m_EtaGrowthRatePerTree.fromSearchValue(parameters(i))); break; case E_FeatureBagFraction: m_FeatureBagFraction.set(m_FeatureBagFraction.fromSearchValue(parameters(i))); break; case E_Gamma: m_TreeSizePenaltyMultiplier .set(m_TreeSizePenaltyMultiplier.fromSearchValue(parameters(i))) .scale(scale); break; case E_Lambda: m_LeafWeightPenaltyMultiplier .set(m_LeafWeightPenaltyMultiplier.fromSearchValue(parameters(i))) .scale(scale); break; case E_SoftTreeDepthLimit: m_SoftTreeDepthLimit.set(m_SoftTreeDepthLimit.fromSearchValue(parameters(i))); break; case E_SoftTreeDepthTolerance: m_SoftTreeDepthTolerance.set( m_SoftTreeDepthTolerance.fromSearchValue(parameters(i))); break; case E_PredictionChangeCost: m_PredictionChangeCost.set( m_PredictionChangeCost.fromSearchValue(parameters(i))); break; case E_RetrainedTreeEta: m_RetrainedTreeEta.set(m_RetrainedTreeEta.fromSearchValue(parameters(i))); break; case E_TreeTopologyChangePenalty: m_TreeTopologyChangePenalty .set(m_TreeTopologyChangePenalty.fromSearchValue(parameters(i))) .scale(scale); break; case E_MaximumNumberTrees: break; } } } template<typename F> void CBoostedTreeHyperparameters::foreachTunableParameter(const F& f) const { for (std::size_t i = 0; i < m_TunableHyperparameters.size(); ++i) { switch (m_TunableHyperparameters[i]) { case E_Alpha: f(i, m_DepthPenaltyMultiplier); break; case E_DownsampleFactor: f(i, m_DownsampleFactor); break; case E_Eta: f(i, m_Eta); break; case E_EtaGrowthRatePerTree: f(i, m_EtaGrowthRatePerTree); break; case E_FeatureBagFraction: f(i, m_FeatureBagFraction); break; case E_Gamma: f(i, m_TreeSizePenaltyMultiplier); break; case E_Lambda: f(i, m_LeafWeightPenaltyMultiplier); break; case E_SoftTreeDepthLimit: f(i, m_SoftTreeDepthLimit); break; case E_SoftTreeDepthTolerance: f(i, m_SoftTreeDepthTolerance); break; case E_PredictionChangeCost: f(i, m_PredictionChangeCost); break; case E_RetrainedTreeEta: f(i, m_RetrainedTreeEta); break; case E_TreeTopologyChangePenalty: f(i, m_TreeTopologyChangePenalty); break; case E_MaximumNumberTrees: break; } } } } } }