in src/alpaca_eval/analyze.py [0:0]
def get_metrics_evaluator(analyzer, df_crossannotations, evaluator_name=None):
"""Gets the metrics for an annotator given its crossannotations."""
all_metrics = dict()
all_metrics["Human agreement"] = (
analyzer.agreement_of_annotations(annotations_1=df_crossannotations, n_majority_vote_1=1)["score"] * 100
)
all_metrics["Price [$/1000 examples]"] = df_crossannotations["price_per_example"].mean() * 1000
all_metrics["Time [seconds/1000 examples]"] = df_crossannotations["time_per_example"].mean() * 1000
correlations = analyzer.estimate_correlations(df_crossannotations)
all_metrics["Spearman corr."] = correlations["spearman"]
all_metrics["Pearson corr."] = correlations["pearson"]
if evaluator_name == "humans":
all_metrics["Bias"] = 0
all_metrics["Variance"] = analyzer.estimate_variance(df_crossannotations) * 100
else:
try:
all_metrics["Bias"] = analyzer.estimate_bias(df_crossannotations) * 100
except:
all_metrics["Bias"] = np.nan
try:
all_metrics["Variance"] = analyzer.estimate_variance(df_crossannotations) * 100
except:
all_metrics["Variance"] = np.nan
all_metrics["Proba. prefer longer"] = analyzer.get_length_biases(df_crossannotations)["probability_prefer_longer"]
all_metrics["Proba. prefer lists"] = analyzer.get_list_biases(df_crossannotations)["probability_prefer_list"]
all_metrics["Proba. prefer 1"] = 2 - df_crossannotations["preference"].mean()
all_metrics["# parsed"] = len(df_crossannotations.preference.dropna())
return all_metrics