def prepare_data()

in stats/statistical_scoring.py [0:0]


def prepare_data(data):

    data['job'] = data['job'].astype('int')

    cols = data.columns
    num_cols = data._get_numeric_data().columns
    categorical = list(set(cols) - set(num_cols))

    le = LabelEncoder()
    for val in categorical:
        data[val] = le.fit_transform(data[val])

    for col in data.columns:
        if col not in categorical:
            data[col] = (data[col] - np.mean(data[col])) / np.std(data[col])

    input_data = data.iloc[len(data) - 1]
    input_data = input_data.to_dict()
    input_data = pd.DataFrame(input_data, index=[0]).drop(columns=['risk'])

    return data, input_data