def get_metrics_evaluator()

in src/alpaca_eval/analyze.py [0:0]


def get_metrics_evaluator(analyzer, df_crossannotations, evaluator_name=None):
    """Gets the metrics for an annotator given its crossannotations."""

    all_metrics = dict()
    all_metrics["Human agreement"] = (
        analyzer.agreement_of_annotations(annotations_1=df_crossannotations, n_majority_vote_1=1)["score"] * 100
    )

    all_metrics["Price [$/1000 examples]"] = df_crossannotations["price_per_example"].mean() * 1000
    all_metrics["Time [seconds/1000 examples]"] = df_crossannotations["time_per_example"].mean() * 1000

    correlations = analyzer.estimate_correlations(df_crossannotations)
    all_metrics["Spearman corr."] = correlations["spearman"]
    all_metrics["Pearson corr."] = correlations["pearson"]

    if evaluator_name == "humans":
        all_metrics["Bias"] = 0
        all_metrics["Variance"] = analyzer.estimate_variance(df_crossannotations) * 100
    else:
        try:
            all_metrics["Bias"] = analyzer.estimate_bias(df_crossannotations) * 100
        except:
            all_metrics["Bias"] = np.nan

        try:
            all_metrics["Variance"] = analyzer.estimate_variance(df_crossannotations) * 100
        except:
            all_metrics["Variance"] = np.nan

    all_metrics["Proba. prefer longer"] = analyzer.get_length_biases(df_crossannotations)["probability_prefer_longer"]
    all_metrics["Proba. prefer lists"] = analyzer.get_list_biases(df_crossannotations)["probability_prefer_list"]
    all_metrics["Proba. prefer 1"] = 2 - df_crossannotations["preference"].mean()
    all_metrics["# parsed"] = len(df_crossannotations.preference.dropna())
    return all_metrics