ml/classifiers.py

# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # """ Credit Risk Models The module contains model definitions of various tested models for credit assessment """ from typing import Any, Dict, List import joblib import logging import pandas as pd from django.core.exceptions import BadRequest from sklearn.preprocessing import LabelEncoder log = logging.getLogger(__name__) class Classifier(object): """ Basic Scorecard Model Warning: This class should not be used directly. Use derived classes instead. """ def __init__(self, model=None, categorical=[], label_encoders: List[LabelEncoder] = {}): self.model = model self.categorical = categorical self.label_encoders = label_encoders # def __str__(self): # return f""" # Model Object # ---------------------------------------------------------------- # Classifier: {self.classifier().__class__.__name__} # Test Size: {self.test_size} # Random State: {self.random_state} # Number of Splits: {self.n_splits} # Parameter Grid: {self.params} # {self.model} # """ def preprocessing(self, data: Dict[str, Any]): """ Preprocess python dict object for prediction Parameters ---------- data: dict dictionary of data to predict """ categorical = [x for x in self.categorical if x != 'risk'] # log.info(f"Categorical: {categorical}") # for category in categorical: # if category not in list(data.keys()): # data[category] = None for key, value in data.items(): if type(value) == str: data[key] = value data = pd.DataFrame(data, index=[0]) # fill missing values # data.fillna(self.values_fill_missing) le = self.label_encoders data = data.dropna() # convert categoricals for category in categorical: failed_trials = [] try: data[category] = le[category].transform(data[category]) except KeyError as e: failed_trials.append(e) log.debug(f"An error occured: {str(e)}") if len(failed_trials) >= 3: raise BadRequest(failed_trials) else: data[e] = None return data def predict(self, data): """ Predict scorecard model Args: data: array Data to perform prediction on. """ return self.model.predict_proba(data) def postprocessing(self, prediction): label = "bad" if prediction[1] > 0.5: label = "good" return {"probability": prediction[1], "label": label} def compute_prediction(self, data: Dict[str, Any]): try: input_data = self.preprocessing(data) prediction = self.predict(input_data)[0] prediction = self.postprocessing(prediction) except Exception as e: log.debug(f'An error occured: {str(e)}') raise BadRequest(str(e)) return prediction class RandomForestClassifier(Classifier): def __init__( self, model=joblib.load('zoo/models/german/rf_classifier.joblib'), categorical=joblib.load('zoo/models/german/categorical.joblib'), label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): super(RandomForestClassifier, self).__init__(model, categorical, label_encoders) class SVC(Classifier): def __init__( self, model=joblib.load('zoo/models/german/svc_classifier.joblib'), categorical=joblib.load('zoo/models/german/categorical.joblib'), label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): super(SVC, self).__init__(model, categorical, label_encoders) class MLP(Classifier): def __init__( self, model=joblib.load('zoo/models/german/mlp_classifier.joblib'), categorical=joblib.load('zoo/models/german/categorical.joblib'), label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): super(MLP, self).__init__(model, categorical, label_encoders) class GradientBoostClassifier(Classifier): def __init__( self, model=joblib.load('zoo/models/german/gb_classifier.joblib'), categorical=joblib.load('zoo/models/german/categorical.joblib'), label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): super(GradientBoostClassifier, self).__init__(model, categorical, label_encoders)

ml/classifiers.py (81 lines of code) (raw):