def compute_ranks()

in prediction_generation/original-project/analysis/scripts/rank_common.py [0:0]


def compute_ranks(results, keep_methods=None, higher_better=True):
    """Compute the ranks

    Parameters
    ----------
    results : dict
        Mapping from dataset name to dict, where each dict in turn is a map 
        from method name to a score value.

    keep_methods: list
        Methods to include in the ranks

    higher_better: bool
        Whether a higher or a lower value is considered better

    Returns
    -------
    avg_ranks : dict
        Map from method name to average rank

    all_ranks: dict
        Map from dataset name to dictionary, which is in turn a map from method 
        name to rank for that dataset and that method.

    """
    vec_ranks = []
    all_ranks = {}

    for dset in results:
        methods = results[dset].keys()
        methods = sorted(methods)

        methods = [m for m in methods if m in keep_methods]
        assert methods == keep_methods

        if higher_better:
            values = [-results[dset][m] for m in methods]
        else:
            values = [results[dset][m] for m in methods]

        if any(np.isnan(v) for v in values):
            print(
                "Skipping dataset %s because of nans" % dset, file=sys.stderr
            )
            continue

        ranks = rankdata(values, method="average")

        vec_ranks.append(ranks)
        rank_dict = {m: ranks[i] for i, m in enumerate(methods)}

        all_ranks[dset] = rank_dict

    avg_ranks = np.mean(vec_ranks, axis=0)
    avg_ranks = {m: r for m, r in zip(methods, avg_ranks)}
    return avg_ranks, all_ranks