Project-BasicAlgorithm/core/utils.py (21 lines of code) (raw):
# Licensed to Apache Software Foundation (ASF) under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Apache Software Foundation (ASF) licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
def get_onehot_encoder(sparse=False, handle_unknown="ignore"):
return OneHotEncoder(sparse=sparse, handle_unknown=handle_unknown)
def get_oridinal_encoder(unknown_value=np.nan, handle_unknown="use_encoded_value"):
return OrdinalEncoder(unknown_value=unknown_value, handle_unknown=handle_unknown)
def train_model(model_cls, params, train_x, train_y):
"""
train model directly, or train model with searching params
"""
model = model_cls(**params.input_params)
if params.search_params:
optimized_model = GridSearchCV(estimator=model, param_grid=params.search_params)
optimized_model.fit(train_x, train_y)
model = optimized_model.best_estimator_
params = optimized_model.cv_results_['params']
mean_test_score = optimized_model.cv_results_['mean_test_score']
for param, score in zip(params, mean_test_score):
print(param, score)
else:
model.fit(train_x, train_y)
return model