in src/alpaca_eval/annotators/base.py [0:0]
def __call__(self, df_to_annotate: pd.DataFrame, **decoding_kwargs) -> pd.DataFrame:
"""Annotates the given examples.
Parameters
----------
df_to_annotate : pd.DataFrame
Examples to annotate
decoding_kwargs :
Additional arguments to pass to `fn_completions`.
"""
df_to_annotate = df_to_annotate.copy() # avoid in place modifications
if df_to_annotate.empty:
df_to_annotate[self.annotation_column] = []
return df_to_annotate
df_to_annotate = self._preprocess(df_to_annotate)
# the following only reapplies the parsing in case you already stored the raw completions. requires batch_size=1
if self.completion_column in df_to_annotate.columns and self.batch_size == 1:
# keep only the rows that have not been annotated yet
main_df_to_annotate = df_to_annotate
idx_not_completed = df_to_annotate[self.completion_column].isna()
df_to_annotate = df_to_annotate[idx_not_completed].copy()
if not df_to_annotate.empty:
# prompts and completions here will not be the same length as the dataframe due to batching
prompts, df_to_annotate = self._make_prompts(df_to_annotate)
completions = self.fn_completions(prompts=prompts, **self.completions_kwargs, **decoding_kwargs)
for k, v in completions.items():
if k != "completions":
if self.batch_size != 1 and (len(df_to_annotate) == len(v) * self.batch_size):
v = [el for el in v for _ in range(self.batch_size)]
df_to_annotate[k] = v
if "per_example" in k:
df_to_annotate[k] = df_to_annotate[k] / self.batch_size
# the following is only needed if you want to only reapply the parsing
if self.completion_column in df_to_annotate.columns:
if not df_to_annotate.empty:
df_to_annotate[self.completion_column] = completions[self.completion_key] # only works for bs 1
main_df_to_annotate[idx_not_completed] = df_to_annotate # puts back all the new completions
df_to_annotate = main_df_to_annotate
completions_to_parse = df_to_annotate[self.completion_column]
else:
completions_to_parse = completions[self.completion_key]
# note: reparsing only works if you use the same completion_key
annotations_to_save, completions_to_save = self._parse_completions(completions=completions_to_parse)
df_to_annotate[self.annotation_column] = annotations_to_save
if self.completion_column is not None:
df_to_annotate[self.completion_column] = completions_to_save
df_annotated = self._postprocess(df_to_annotate)
return df_annotated