def get_mt_bench_agreement()

in fastchat/llm_judge/compute_agreement.py [0:0]


def get_mt_bench_agreement(data, judge1, judge2, ban):
    if judge1.startswith("gpt4") and judge2 == "human":
        stats = [0, 0]
        for votes in data.values():
            if judge1 not in votes or judge2 not in votes:
                continue
            assert len(votes[judge1]) == 1
            if convertvote(votes[judge1][0]) in ban:
                continue
            for v in votes[judge2]:
                if convertvote(v) in ban:
                    continue
                stats[1] += 1
                stats[0] += equalvote(votes[judge1][0], v)
        return stats[0], stats[1]
    elif judge1 == "human" and judge2 == "human":
        stats = [0, 0]
        for votes in data.values():
            if "human" not in votes:
                continue
            for i in range(len(votes["human"]) - 1):
                for j in range(i + 1, len(votes["human"])):
                    if (
                        convertvote(votes["human"][i]) in ban
                        or convertvote(votes["human"][j]) in ban
                    ):
                        continue
                    stats[1] += 1
                    stats[0] += equalvote(votes["human"][i], votes["human"][j])
        return stats[0], stats[1]
    else:
        raise Exception("Unsupported judges.")