def mcnemar_test()

in leaderboard/stats.py [0:0]


def mcnemar_test(model_a_array: np.ndarray, model_b_array: np.ndarray):
    """
    McNemar's test operates on contingency tables, which we need bo build first.
    """
    both_correct = 0
    both_wrong = 0
    a_correct_b_wrong = 0
    a_wrong_b_correct = 0
    for a, b in zip(model_a_array, model_b_array):
        if a == 1.0 and b == 1.0:
            both_correct += 1
        elif a == 0.0 and b == 0.0:
            both_wrong += 1
        elif a == 1.0 and b == 0.0:
            a_correct_b_wrong += 1
        elif a == 0.0 and b == 1.0:
            a_wrong_b_correct += 1
        else:
            raise ValueError(f"Invalid predictions: {a}, {b}")
    contingency_table = np.array(
        [[both_correct, a_correct_b_wrong], [a_wrong_b_correct, both_wrong]]
    )
    return evaluate.mcnemar(ary=contingency_table, corrected=True)