in src/alpaca_eval/annotators/base.py [0:0]
def _annotate(self, df_to_annotate: pd.DataFrame, **decoding_kwargs) -> pd.DataFrame:
"""Annotate the examples."""
df_annotated = df_to_annotate.copy()
for annotator in self.annotators.keys():
# only annotate examples that have not been annotated yet
curr_idcs = df_to_annotate[self.annotator_column] == annotator
if self.annotation_key in df_to_annotate.columns:
curr_idcs &= df_to_annotate[self.annotation_key].isna()
# drop the output keys that you will be adding
for k in self.other_output_keys_to_keep:
if k in df_to_annotate.columns:
df_annotated.loc[curr_idcs, k] = None
logging.info(f"Annotating {curr_idcs.sum()} examples with {annotator}")
# actual annotation
columns_to_annotate = self.available_fields_to_format
if self.is_reapply_parsing:
# add other_output_keys_to_keep to columns_to_annotate
columns_to_annotate = columns_to_annotate + [
c for c in self.other_output_keys_to_keep if c in df_to_annotate.columns
]
# if df_to_annotate "raw_completion" is a dict, put it back to a json string so that you can reparse it
# TODO: this is for backward compatibility, remove in the future
if "raw_completion" in df_to_annotate.columns:
df_to_annotate["raw_completion"] = df_to_annotate["raw_completion"].apply(
lambda x: json.dumps(x) if isinstance(x, dict) else x
)
curr_annotated = self.annotators[annotator](
df_to_annotate.loc[curr_idcs, columns_to_annotate],
**decoding_kwargs,
)
df_annotated = self._merge_annotations(df_annotated, curr_annotated)
return df_annotated