in src/alpaca_eval/annotators/pairwise_evaluator.py [0:0]
def _preprocess(self, to_annotate: utils.AnyData) -> pd.DataFrame:
# same as preprocess but with potential random noising and dealing with eauality
df_to_annotate = super()._preprocess(to_annotate)
# 1. adds random noise => avoids annotating examples that will be noised out.
if self.p_label_flip:
logging.info(f"Adding random noise to the labels p_label_flip={self.p_label_flip}.")
# if you have 25% change of flipping the label, you have 50% chance of selecting random label
# note that the noise is always binary (1 or 2), even when the annotation is a float (e.g. using logprobs)
p_noise = self.p_label_flip * 2
noisy_preference = df_to_annotate.apply(
# we add "noisy_label" at the beginning to use ~independent seeds between tasks
lambda x: utils.random_seeded_choice( # seed on inputs for reproducibility
seed="noisy_preference" + "".join(x[self.random_seed_keys]) + str(self.seed),
choices=[np.nan, 1, 2],
weights=[1 - p_noise, self.p_label_flip, self.p_label_flip],
),
axis=1,
)
df_to_annotate["is_noisy_label"] = ~noisy_preference.isna()
# keeps previously annotated examples when you did not add noise
df_to_annotate[self.annotation_key] = np.where(
df_to_annotate["is_noisy_label"],
noisy_preference,
df_to_annotate[self.annotation_key],
)
# 2. deals with equality
idcs_is_same_outputs = df_to_annotate["output_1"] == df_to_annotate["output_2"]
df_to_annotate.loc[idcs_is_same_outputs, self.annotation_key] = 1.5
# for backward compatibility 0 used to mean same output => replace with 1.5
df_to_annotate[self.annotation_key] = df_to_annotate[self.annotation_key].replace({0: 1.5})
return df_to_annotate