def compute_drift()

in src/drift_detector.py [0:0]


def compute_drift(train_df, infer_df):
    col_type = utils.get_column_types(train_df)

    train_count, infer_count = len(train_df), len(infer_df)

    drift_data = []

    for col in train_df.columns:
        drift_col = {'Feature': col, 'Column Type': col_type[col]}

        train_col, infer_col = train_df[col], infer_df[col]
        train_prob, infer_prob, compute_unique_count_drift = \
            utils.get_prob_dist_func(train_col, infer_col, col_type[col])

        drift_col['drift_score'] = utils.compute_drift_score(train_prob, infer_prob)

        drift_col['NaN % Diff'], drift_col['is_nan_signif'] = \
            utils.compute_nan_stats(train_col, infer_col, col_type[col])

        if col_type[col] == ColType.NUMERICAL:
            train_norm_col, infer_norm_col = utils.normalize(train_col, infer_col)
            drift_col['KS'], drift_col['p-value'] = ks_2samp(train_norm_col, infer_norm_col)
            drift_col['wasserstein_distance'] = wasserstein_distance(train_norm_col, infer_norm_col)
        elif col_type[col] == ColType.CATEGORICAL:
            # Chisquare requires frequency and it has to be larger than 5. Hence it's multiplied by the size of
            # the inference data set.
            train_freq = [int(p * infer_count) for p in train_prob]
            infer_freq = [int(p * infer_count) for p in infer_prob]
            drift_col['chisquare'], drift_col['p-value'] = chisquare(train_freq, infer_freq)

            drift_col['Unique Count Drift'] = compute_unique_count_drift

        drift_col['jensenshannon'] = jensenshannon(train_prob, infer_prob)

        drift_data.append(drift_col)

    drift_df = pd.DataFrame(drift_data)

    return drift_df