def _annotate()

in src/alpaca_eval/annotators/base.py [0:0]


    def _annotate(self, df_to_annotate: pd.DataFrame, **decoding_kwargs) -> pd.DataFrame:
        """Annotate the examples."""

        df_annotated = df_to_annotate.copy()
        for annotator in self.annotators.keys():
            # only annotate examples that have not been annotated yet
            curr_idcs = df_to_annotate[self.annotator_column] == annotator
            if self.annotation_key in df_to_annotate.columns:
                curr_idcs &= df_to_annotate[self.annotation_key].isna()

            # drop the output keys that you will be adding
            for k in self.other_output_keys_to_keep:
                if k in df_to_annotate.columns:
                    df_annotated.loc[curr_idcs, k] = None

            logging.info(f"Annotating {curr_idcs.sum()} examples with {annotator}")

            # actual annotation
            columns_to_annotate = self.available_fields_to_format
            if self.is_reapply_parsing:
                # add other_output_keys_to_keep to columns_to_annotate
                columns_to_annotate = columns_to_annotate + [
                    c for c in self.other_output_keys_to_keep if c in df_to_annotate.columns
                ]
                # if df_to_annotate "raw_completion" is a dict, put it back to a json string so that you can reparse it
                # TODO: this is for backward compatibility, remove in the future
                if "raw_completion" in df_to_annotate.columns:
                    df_to_annotate["raw_completion"] = df_to_annotate["raw_completion"].apply(
                        lambda x: json.dumps(x) if isinstance(x, dict) else x
                    )

            curr_annotated = self.annotators[annotator](
                df_to_annotate.loc[curr_idcs, columns_to_annotate],
                **decoding_kwargs,
            )

            df_annotated = self._merge_annotations(df_annotated, curr_annotated)

        return df_annotated