Project-AutoML/automl/mod/mod

# Licensed to Apache Software Foundation (ASF) under one or more contributor # license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright # ownership. Apache Software Foundation (ASF) licenses this file to you under # the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import pickle import numpy as np from autosklearn.classification import AutoSklearnClassifier from sklearn.pipeline import Pipeline from sklearn.preprocessing import OrdinalEncoder from automl.metrics import eval_classification_metrics from automl.mod.tool import BasePredictor, Tool from automl.params import Params class AutoSklearn(Tool): model_path = "autosklearn.pkl" conda_env = { "dependencies": [ "python=3.8.10", "pip", { "pip": [ "mlflow", "click==8.0.", "scikit-learn==0.24.2", "boto3==1.22.2", "pandas>=1.0.0", "setuptools<59.6.0", "auto-sklearn==0.14.6", "flaml==1.0.1" ], }, ], "name": "MLflow-AutoML", } @staticmethod def train_automl(train_x, train_y, other_params=None, **kwargs): params = Params(param_str=other_params, **kwargs) print(params) pipeline_mods = [] pipeline_mods.append( ( "oridinal_encoder", OrdinalEncoder( unknown_value=np.nan, handle_unknown="use_encoded_value" ), ) ) pipeline = Pipeline(steps=pipeline_mods) feat_type = [ "Categorical" if x.name in {"object", "category"} else "Numerical" for x in train_x.dtypes ] train_x = pipeline.fit_transform(train_x) classifier = AutoSklearnClassifier(**params.input_params) classifier.fit(train_x, train_y, feat_type=feat_type) pipeline.steps.append(("classifier", classifier)) return pipeline @staticmethod def eval(pipeline: Pipeline, test_x, test_y, task="classification"): oridinal_encoder = pipeline.steps[0][1] classifier = pipeline.steps[1][1] test_x = oridinal_encoder.transform(test_x) y_pred = classifier.predict(test_x) if task == "classification": metrics = eval_classification_metrics(test_y, y_pred) else: metrics = super().eval_automl(automl, test_x, test_y) return metrics @staticmethod def save_automl(classifier: AutoSklearnClassifier, save_path: str): with open(save_path, "wb") as w_f: pickle.dump(classifier, w_f) class Predictor(BasePredictor): def load_automl(self, model_path): with open(model_path, "rb") as r_f: self.pipeline: AutoSklearnClassifier = pickle.load(r_f) self.oridinal_encoder = self.pipeline.steps[0][1] self.automl = self.pipeline.steps[1][1] def predict(self, inputs): if isinstance(self.automl, AutoSklearnClassifier): result = self.predict_classification(inputs) else: result = self.automl.predict(inputs) return result def predict_classification(self, inputs): inputs = self.oridinal_encoder.transform(inputs) pred_proba = self.classifier.predict_proba(inputs) label_indexes = pred_proba.argmax(axis=1) probs = pred_proba[np.arange(pred_proba.shape[0]), label_indexes] labels = ( self.classifier.automl_.InputValidator.target_validator.inverse_transform( label_indexes ) ) result = [] for label, pro in zip(labels, probs): result.append({"label": label, "confidence": float(pro)}) return result

Project-AutoML/automl/mod/mod_autosklearn.py (93 lines of code) (raw):