in stats/statistical_scoring.py [0:0]
def prepare_data(data):
data['job'] = data['job'].astype('int')
cols = data.columns
num_cols = data._get_numeric_data().columns
categorical = list(set(cols) - set(num_cols))
le = LabelEncoder()
for val in categorical:
data[val] = le.fit_transform(data[val])
for col in data.columns:
if col not in categorical:
data[col] = (data[col] - np.mean(data[col])) / np.std(data[col])
input_data = data.iloc[len(data) - 1]
input_data = input_data.to_dict()
input_data = pd.DataFrame(input_data, index=[0]).drop(columns=['risk'])
return data, input_data