Project-BasicAlgorithm/core/training/xgboost.py (26 lines of code) (raw):

# Licensed to Apache Software Foundation (ASF) under one or more contributor # license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright # ownership. Apache Software Foundation (ASF) licenses this file to you under # the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import mlflow from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from xgboost import XGBClassifier from core.metrics import eval_classification_metrics from core.utils import get_oridinal_encoder, train_model from .params import XGBoostParams def train_xgboost( train_x, train_y, test_x, test_y, param_file=None, params=None, search_params=None ): pipeline_mods = [] pipeline_mods.append(("oridinal_encoder", get_oridinal_encoder())) pipeline = Pipeline(steps=pipeline_mods) train_x = pipeline.fit_transform(train_x) params = XGBoostParams( XGBClassifier, param_file=param_file, param_str=params, use_label_encoder=True, search_params=search_params, ) model = train_model(XGBClassifier, params, train_x, train_y) pipeline.steps.append(("model", model)) y_pred = pipeline.predict(test_x) metrics = eval_classification_metrics(test_y, y_pred) return pipeline, metrics