def get_stats()

in python/vmaf/core/train_test_model.py [0:0]


    def get_stats(cls, ys_label, ys_label_pred, **kwargs):

        # cannot have None
        assert all(x is not None for x in ys_label)
        assert all(x is not None for x in ys_label_pred)

        # RMSE
        rmse = RmsePerfMetric(ys_label, ys_label_pred) \
            .evaluate(enable_mapping=True)['score']

        # spearman
        srcc = SrccPerfMetric(ys_label, ys_label_pred) \
            .evaluate(enable_mapping=True)['score']

        # pearson
        pcc = PccPerfMetric(ys_label, ys_label_pred) \
            .evaluate(enable_mapping=True)['score']

        # kendall
        kendall = KendallPerfMetric(ys_label, ys_label_pred) \
            .evaluate(enable_mapping=True)['score']

        stats = {'RMSE': rmse,
                 'SRCC': srcc,
                 'PCC': pcc,
                 'KENDALL': kendall,
                 'ys_label': list(ys_label),
                 'ys_label_pred': list(ys_label_pred)}

        # create perf metric distributions, if multiple predictions are passed in as kwargs
        # spearman distribution for now
        if 'ys_label_pred_all_models' in kwargs:

            ys_label_pred_all_models = kwargs['ys_label_pred_all_models']

            srcc_all_models = []
            pcc_all_models = []
            rmse_all_models = []

            for ys_label_pred_some_model in ys_label_pred_all_models:
                srcc_some_model = SrccPerfMetric(ys_label, ys_label_pred_some_model) \
                    .evaluate(enable_mapping=True)['score']
                pcc_some_model = PccPerfMetric(ys_label, ys_label_pred_some_model) \
                    .evaluate(enable_mapping=True)['score']
                rmse_some_model = RmsePerfMetric(ys_label, ys_label_pred_some_model) \
                    .evaluate(enable_mapping=True)['score']
                srcc_all_models.append(srcc_some_model)
                pcc_all_models.append(pcc_some_model)
                rmse_all_models.append(rmse_some_model)

            stats['SRCC_across_model_distribution'] = srcc_all_models
            stats['PCC_across_model_distribution'] = pcc_all_models
            stats['RMSE_across_model_distribution'] = rmse_all_models

        split_test_indices_for_perf_ci = kwargs['split_test_indices_for_perf_ci'] \
            if 'split_test_indices_for_perf_ci' in kwargs else False

        ys_label_raw = kwargs['ys_label_raw'] if 'ys_label_raw' in kwargs else None

        if ys_label_raw is not None:

            ys_label_raw_list = []
            if isinstance(ys_label_raw[0], dict):
                for d in ys_label_raw:
                    ys_label_raw_list.append(list(d.values()))
            else:
                ys_label_raw_list = ys_label_raw

            try:
                # AUC
                result = AucPerfMetric(ys_label_raw_list, ys_label_pred).evaluate()
                stats['AUC_DS'] = result['AUC_DS']
                stats['AUC_BW'] = result['AUC_BW']
            except TypeError:
                stats['AUC_DS'] = float('nan')
                stats['AUC_BW'] = float('nan')

            try:
                # ResPow
                respow = ResolvingPowerPerfMetric(ys_label_raw_list, ys_label_pred) \
                    .evaluate(enable_mapping=False)['score']
                stats['ResPow'] = respow
            except (TypeError, AssertionError):
                stats['ResPow'] = float('nan')

            try:
                # ResPow
                respow_norm = ResolvingPowerPerfMetric(ys_label_raw_list, ys_label_pred) \
                    .evaluate(enable_mapping=True)['score']
                stats['ResPowNormalized'] = respow_norm
            except (TypeError, AssertionError):
                stats['ResPowNormalized'] = float('nan')

        if 'ys_label_stddev' in kwargs and 'ys_label_stddev' and kwargs['ys_label_stddev'] is not None:
            stats['ys_label_stddev'] = kwargs['ys_label_stddev']

        if split_test_indices_for_perf_ci:

            # ensure labels and predictions are arrays
            if type(ys_label) is not np.array:
                ys_label = np.asarray(ys_label)
            if type(ys_label_pred) is not np.array:
                ys_label_pred = np.asarray(ys_label_pred)

            # replicate logic of BootstrapVmafQualityRunner
            sample_size = len(ys_label)
            n_splits_test_indices = kwargs['n_splits_test_indices'] if 'n_splits_test_indices' in kwargs \
                else cls.DEFAULT_N_SPLITS_TEST_INDICES

            srcc_distribution = []
            pcc_distribution = []
            rmse_distribution = []

            for i_test_split in range(n_splits_test_indices):

                np.random.seed(i_test_split)  # seed is i_test_split
                # random sample with replacement
                idxs = np.random.choice(range(sample_size), size=sample_size, replace=True)

                ys_label_resampled = ys_label[idxs]
                ys_label_pred_resampled = ys_label_pred[idxs]

                srcc_distribution.append(
                    SrccPerfMetric(ys_label_resampled, ys_label_pred_resampled).evaluate(enable_mapping=True)['score']
                )

                pcc_distribution.append(
                    PccPerfMetric(ys_label_resampled, ys_label_pred_resampled).evaluate(enable_mapping=True)['score']
                )

                rmse_distribution.append(
                    RmsePerfMetric(ys_label_resampled, ys_label_pred_resampled).evaluate(enable_mapping=True)['score']
                )

            stats['SRCC_across_test_splits_distribution'] = srcc_distribution
            stats['PCC_across_test_splits_distribution'] = pcc_distribution
            stats['RMSE_across_test_splits_distribution'] = rmse_distribution

        return stats