def get_list_biases()

in src/alpaca_eval/analyze.py [0:0]
24 lines of code
6 McCabe index (conditional complexity)

    def get_list_biases(self, annotations: Union[pd.DataFrame, str]) -> dict[str, float]:
        """Estimate the biases for sentences with lists."""
        try:
            df = annotations.drop_duplicates(subset=self.keys).copy()
            df["best_output"] = np.where(
                df[self.annotation_key].between(1, 1.5, inclusive="left"), df.output_1, df.output_2
            )
            df["worse_output"] = np.where(
                df[self.annotation_key].between(1.5, 2, inclusive="right"), df.output_1, df.output_2
            )

            # Step 1: Create new columns indicating whether `best_output` and `worse_output` contain lists
            df["is_best_list"] = df["best_output"].apply(utils.contains_list)
            df["is_worse_list"] = df["worse_output"].apply(utils.contains_list)
            # Step 2: Create a new column indicating whether either `best_output` or `worse_output` has a list but
            # not both
            df["either_list"] = df["is_best_list"] ^ df["is_worse_list"]
            # Step 3: Count the number of times you prefer `best_output` when either `best_output` or `worse_output` has
            # a list but not both
            prefer_best_either_list = df[(df["either_list"]) & df["is_best_list"]].shape[0]
            # Step 4: Count number of instances when either `best_output` or `worse_output` has a list but not both
            total_either_list = df[df["either_list"]].shape[0]
            # Step 5: Calculate the probability
            probability_prefer_list = prefer_best_either_list / total_either_list

            percentage_list = (df["is_best_list"].mean() - df["is_worse_list"].mean()) / df["is_worse_list"].mean()
        except Exception as e:
            logging.warning(f"Could not compute list biases: {e}")
            probability_prefer_list = np.nan
            percentage_list = np.nan

        return dict(
            probability_prefer_list=probability_prefer_list,
            percentage_list=percentage_list,
        )