def _get_ttest_df()

in src/alpaca_eval/plotting.py [0:0]


def _get_ttest_df(df, n_samples=None, random_state=123, sorted_idx=None):
    """return a dataframe of pairwise relative ttest with potential subsampling"""
    df_pivoted = df.pivot(index="instruction", values="preference", columns=["generator_2"])
    if n_samples is not None:
        df_pivoted = df_pivoted.sample(n=n_samples, random_state=random_state)
    # win_rate = metrics.pairwise_to_winrate(df["preference"])['win_rate']
    if sorted_idx is None:
        sorted_idx = list(
            df.groupby("generator_2")["preference"]
            .apply(lambda x: metrics.pairwise_to_winrate(x)["win_rate"])
            .sort_values(ascending=False)
            .index
        )
    return _pairwise_ttest(df_pivoted[sorted_idx].replace({0: 1, 1: 0})).astype(float)  # draw is 0 but to test order it