in src/autotrain/trainers/tabular/utils.py [0:0]
def get_params(trial, model, task):
if model == "xgboost":
params = {
"learning_rate": trial.suggest_float("learning_rate", 1e-2, 0.25, log=True),
"reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 100.0, log=True),
"reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 100.0, log=True),
"subsample": trial.suggest_float("subsample", 0.1, 1.0),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.1, 1.0),
"max_depth": trial.suggest_int("max_depth", 1, 9),
"early_stopping_rounds": trial.suggest_int("early_stopping_rounds", 100, 500),
"n_estimators": trial.suggest_categorical("n_estimators", [7000, 15000, 20000]),
"tree_method": "hist",
"random_state": 42,
}
return params
if model == "logistic_regression":
if task in CLASSIFICATION_TASKS:
params = {
"C": trial.suggest_float("C", 1e-8, 1e3, log=True),
"fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
"solver": trial.suggest_categorical("solver", ["liblinear", "saga"]),
"penalty": trial.suggest_categorical("penalty", ["l1", "l2"]),
"n_jobs": -1,
}
return params
raise ValueError("Task not supported")
if model == "random_forest":
params = {
"n_estimators": trial.suggest_int("n_estimators", 10, 10000),
"max_depth": trial.suggest_int("max_depth", 2, 15),
"max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2", None]),
"min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
"min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
"bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
"n_jobs": -1,
}
if task in CLASSIFICATION_TASKS:
params["criterion"] = trial.suggest_categorical("criterion", ["gini", "entropy"])
return params
if task in REGRESSION_TASKS:
params["criterion"] = trial.suggest_categorical(
"criterion", ["squared_error", "absolute_error", "poisson"]
)
return params
raise ValueError("Task not supported")
if model == "extra_trees":
params = {
"n_estimators": trial.suggest_int("n_estimators", 10, 10000),
"max_depth": trial.suggest_int("max_depth", 2, 15),
"max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2", None]),
"min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
"min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
"bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
"n_jobs": -1,
}
if task in CLASSIFICATION_TASKS:
params["criterion"] = trial.suggest_categorical("criterion", ["gini", "entropy"])
return params
if task in REGRESSION_TASKS:
params["criterion"] = trial.suggest_categorical("criterion", ["squared_error", "absolute_error"])
return params
raise ValueError("Task not supported")
if model == "decision_tree":
params = {
"max_depth": trial.suggest_int("max_depth", 1, 15),
"min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
"min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
"max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2", None]),
"splitter": trial.suggest_categorical("splitter", ["best", "random"]),
}
if task in CLASSIFICATION_TASKS:
params["criterion"] = trial.suggest_categorical("criterion", ["gini", "entropy"])
return params
if task in REGRESSION_TASKS:
params["criterion"] = trial.suggest_categorical(
"criterion", ["squared_error", "absolute_error", "friedman_mse", "poisson"]
)
return params
raise ValueError("Task not supported")
if model == "linear_regression":
if task in REGRESSION_TASKS:
params = {
"fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
}
return params
raise ValueError("Task not supported")
if model == "svm":
if task in CLASSIFICATION_TASKS:
params = {
"C": trial.suggest_float("C", 1e-8, 1e3, log=True),
"fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
"penalty": "l2",
"max_iter": trial.suggest_int("max_iter", 1000, 10000),
}
return params
if task in REGRESSION_TASKS:
params = {
"C": trial.suggest_float("C", 1e-8, 1e3, log=True),
"fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
"loss": trial.suggest_categorical("loss", ["epsilon_insensitive", "squared_epsilon_insensitive"]),
"epsilon": trial.suggest_float("epsilon", 1e-8, 1e-1, log=True),
"max_iter": trial.suggest_int("max_iter", 1000, 10000),
}
return params
raise ValueError("Task not supported")
if model == "ridge":
params = {
"alpha": trial.suggest_float("alpha", 1e-8, 1e3, log=True),
"fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
"max_iter": trial.suggest_int("max_iter", 1000, 10000),
}
if task in CLASSIFICATION_TASKS:
return params
if task in REGRESSION_TASKS:
return params
raise ValueError("Task not supported")
if model == "lasso":
if task in REGRESSION_TASKS:
params = {
"alpha": trial.suggest_float("alpha", 1e-8, 1e3, log=True),
"fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
"max_iter": trial.suggest_int("max_iter", 1000, 10000),
}
return params
raise ValueError("Task not supported")
if model == "knn":
params = {
"n_neighbors": trial.suggest_int("n_neighbors", 1, 25),
"weights": trial.suggest_categorical("weights", ["uniform", "distance"]),
"algorithm": trial.suggest_categorical("algorithm", ["ball_tree", "kd_tree", "brute"]),
"leaf_size": trial.suggest_int("leaf_size", 1, 100),
"p": trial.suggest_categorical("p", [1, 2]),
"metric": trial.suggest_categorical("metric", ["minkowski", "euclidean", "manhattan"]),
}
if task in CLASSIFICATION_TASKS or task in REGRESSION_TASKS:
return params
raise ValueError("Task not supported")
return ValueError("Invalid model")