def average_results()

in prediction_generation/original-project/analysis/scripts/make_table.py [0:0]


def average_results(results):
    """Average the results

    NOTE: This function filters out some methods/datasets for which we have
    insufficient results.
    """
    experiment = list(set(r.experiment for r in results))[0]

    expected_methods = list(Method)

    # keep only expected methods
    results = list(filter(lambda r: r.method in expected_methods, results))

    # remove RBOCPDMS for 'best', because it fails too often
    if experiment == Experiment.best:
        warning(
            "\nWarning: Removing RBOCPDMS (experiment = %s) due to insufficient results\n"
            % experiment
        )
        results = list(filter(lambda r: r.method != Method.rbocpdms, results))
        expected_methods.remove(Method.rbocpdms)

    # remove datasets for which we do not have complete results
    to_remove = []
    for dataset in set(r.dataset for r in results):
        dset_results = filter(lambda r: r.dataset == dataset, results)
        if any(r.score is None for r in dset_results):
            to_remove.append(dataset)
    if to_remove:
        warning(
            "\nWarning: Filtering out datasets: %r due to incomplete results for some detectors.\n"
            % to_remove
        )
    results = list(filter(lambda r: not r.dataset in to_remove, results))

    # check that we are now complete: for all datasets and all methods in the
    # remaining results, we have a non-None score.
    assert all(r.score is not None for r in results)

    # compute the average per method
    methods = set(r.method for r in results)
    avg = {}
    for method in methods:
        method_scores = [r.score for r in results if r.method == method]
        avg_score = sum(method_scores) / len(method_scores)
        avg[method.name] = avg_score

    return avg