core/maxframe/learn/contrib/xgboost/classifier.py (91 lines of code) (raw):
# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union
import numpy as np
from .... import tensor as mt
from ....tensor.merge.vstack import _vstack
from ..utils import make_import_error_func
from .core import XGBScikitLearnBase, xgboost
if not xgboost:
XGBClassifier = make_import_error_func("xgboost")
else:
from xgboost.sklearn import XGBClassifierBase
from .core import wrap_evaluation_matrices
from .predict import predict
from .train import train
class XGBClassifier(XGBScikitLearnBase, XGBClassifierBase):
"""
Implementation of the scikit-learn API for XGBoost classification.
"""
def __init__(
self,
xgb_model: Union[xgboost.XGBClassifier, xgboost.Booster] = None,
**kwargs,
):
super().__init__(**kwargs)
self._set_model(xgb_model)
def fit(
self,
X,
y,
sample_weight=None,
base_margin=None,
eval_set=None,
sample_weight_eval_set=None,
base_margin_eval_set=None,
num_class=None,
**kw,
):
session = kw.pop("session", None)
run_kwargs = kw.pop("run_kwargs", None) or dict()
dtrain, evals = wrap_evaluation_matrices(
None,
X,
y,
sample_weight,
base_margin,
eval_set,
sample_weight_eval_set,
base_margin_eval_set,
)
params = self.get_xgb_params()
self._n_features_in = X.shape[1]
self.n_classes_ = num_class or 1
if self.n_classes_ > 2:
params["objective"] = "multi:softprob"
params["num_class"] = self.n_classes_
else:
params["objective"] = "binary:logistic"
self.evals_result_ = dict()
result = train(
params,
dtrain,
num_boost_round=self.get_num_boosting_rounds(),
evals=evals,
evals_result=self.evals_result_,
num_class=num_class,
session=session,
run_kwargs=run_kwargs,
)
self._Booster = result
return self
def predict(self, data, **kw):
prob = self.predict_proba(data, flag=True, **kw)
if prob.ndim > 1:
prediction = mt.argmax(prob, axis=1)
else:
prediction = (prob > 0.5).astype(np.int64)
return prediction
def predict_proba(self, data, ntree_limit=None, flag=False, **kw):
if ntree_limit is not None:
raise NotImplementedError("ntree_limit is not currently supported")
prediction = predict(self.get_booster(), data, flag=flag, **kw)
if len(prediction.shape) == 2 and prediction.shape[1] == self.n_classes_:
# multi-class
return prediction
if (
len(prediction.shape) == 2
and self.n_classes_ == 2
and prediction.shape[1] >= self.n_classes_
):
# multi-label
return prediction
# binary logistic function
classone_probs = prediction
classzero_probs = 1.0 - classone_probs
return mt.transpose(_vstack((classzero_probs, classone_probs)))
@property
def classes_(self) -> np.ndarray:
return np.arange(self.n_classes_)