in src/alpaca_eval/analyze.py [0:0]
def _select_n_annotations(self, df, n_annotators=None, is_rm_less_than: bool = True):
"""Gets examples with at least n annotations. Adds `index` and `n_annotated` columns."""
if "n_annotated" in df.columns:
df = df.drop(columns="n_annotated")
df["index"] = df.groupby(self.keys)[self.annotation_key].cumcount()
if is_rm_less_than:
# remove samples that have more than n_annotators
df = df[df["index"] < n_annotators]
# select examples that have at least n_annotators
counts = df.groupby(self.keys)[self.annotation_key].count()
counts.name = "n_annotated"
n_annotators = n_annotators or counts.min()
counts = counts[counts >= n_annotators].reset_index()
df_selected = df.merge(counts, on=self.keys)
return df_selected.copy()