in src/io/config.cpp [259:392]
void Config::CheckParamConflict() {
// check if objective, metric, and num_class match
int num_class_check = num_class;
bool objective_type_multiclass = CheckMultiClassObjective(objective) || (objective == std::string("custom") && num_class_check > 1);
if (objective_type_multiclass) {
if (num_class_check <= 1) {
Log::Fatal("Number of classes should be specified and greater than 1 for multiclass training");
}
} else {
if (task == TaskType::kTrain && num_class_check != 1) {
Log::Fatal("Number of classes must be 1 for non-multiclass training");
}
}
for (std::string metric_type : metric) {
bool metric_type_multiclass = (CheckMultiClassObjective(metric_type)
|| metric_type == std::string("multi_logloss")
|| metric_type == std::string("multi_error")
|| metric_type == std::string("auc_mu")
|| (metric_type == std::string("custom") && num_class_check > 1));
if ((objective_type_multiclass && !metric_type_multiclass)
|| (!objective_type_multiclass && metric_type_multiclass)) {
Log::Fatal("Multiclass objective and metrics don't match");
}
}
if (num_machines > 1) {
is_parallel = true;
} else {
is_parallel = false;
tree_learner = "serial";
}
bool is_single_tree_learner = tree_learner == std::string("serial");
if (is_single_tree_learner) {
is_parallel = false;
num_machines = 1;
}
if (is_single_tree_learner || tree_learner == std::string("feature")) {
is_data_based_parallel = false;
} else if (tree_learner == std::string("data")
|| tree_learner == std::string("voting")) {
is_data_based_parallel = true;
if (histogram_pool_size >= 0
&& tree_learner == std::string("data")) {
Log::Warning("Histogram LRU queue was enabled (histogram_pool_size=%f).\n"
"Will disable this to reduce communication costs",
histogram_pool_size);
// Change pool size to -1 (no limit) when using data parallel to reduce communication costs
histogram_pool_size = -1;
}
}
if (is_data_based_parallel) {
if (!forcedsplits_filename.empty()) {
Log::Fatal("Don't support forcedsplits in %s tree learner",
tree_learner.c_str());
}
}
// Check max_depth and num_leaves
if (max_depth > 0) {
double full_num_leaves = std::pow(2, max_depth);
if (full_num_leaves > num_leaves
&& num_leaves == kDefaultNumLeaves) {
Log::Warning("Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves."
" (num_leaves=%d).",
num_leaves);
}
if (full_num_leaves < num_leaves) {
// Fits in an int, and is more restrictive than the current num_leaves
num_leaves = static_cast<int>(full_num_leaves);
}
}
// force col-wise for gpu & CUDA
if (device_type == std::string("gpu") || device_type == std::string("cuda")) {
force_col_wise = true;
force_row_wise = false;
if (deterministic) {
Log::Warning("Although \"deterministic\" is set, the results ran by GPU may be non-deterministic.");
}
}
// force gpu_use_dp for CUDA
if (device_type == std::string("cuda") && !gpu_use_dp) {
Log::Warning("CUDA currently requires double precision calculations.");
gpu_use_dp = true;
}
// linear tree learner must be serial type and run on CPU device
if (linear_tree) {
if (device_type != std::string("cpu")) {
device_type = "cpu";
Log::Warning("Linear tree learner only works with CPU.");
}
if (tree_learner != std::string("serial")) {
tree_learner = "serial";
Log::Warning("Linear tree learner must be serial.");
}
if (zero_as_missing) {
Log::Fatal("zero_as_missing must be false when fitting linear trees.");
}
if (objective == std::string("regresson_l1")) {
Log::Fatal("Cannot use regression_l1 objective when fitting linear trees.");
}
}
// min_data_in_leaf must be at least 2 if path smoothing is active. This is because when the split is calculated
// the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int, so it can
// be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path smoothing the
// calculated split gain can be positive even with zero gradient and hessian.
if (path_smooth > kEpsilon && min_data_in_leaf < 2) {
min_data_in_leaf = 2;
Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active.");
}
if (is_parallel && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) {
// In distributed mode, local node doesn't have histograms on all features, cannot perform "intermediate" monotone constraints.
Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints in distributed learning, auto set to \"basic\" method.");
monotone_constraints_method = "basic";
}
if (feature_fraction_bynode != 1.0 && (monotone_constraints_method == std::string("intermediate") || monotone_constraints_method == std::string("advanced"))) {
// "intermediate" monotone constraints need to recompute splits. If the features are sampled when computing the
// split initially, then the sampling needs to be recorded or done once again, which is currently not supported
Log::Warning("Cannot use \"intermediate\" or \"advanced\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method.");
monotone_constraints_method = "basic";
}
if (max_depth > 0 && monotone_penalty >= max_depth) {
Log::Warning("Monotone penalty greater than tree depth. Monotone features won't be used.");
}
if (min_data_in_leaf <= 0 && min_sum_hessian_in_leaf <= kEpsilon) {
Log::Warning(
"Cannot set both min_data_in_leaf and min_sum_hessian_in_leaf to 0. "
"Will set min_data_in_leaf to 1.");
min_data_in_leaf = 1;
}
}