def simulate()

in leaderboard/power.py [0:0]


def simulate(p_correct: float, n_trials: int, n_points: int, delta: float, alpha: float = 0.05):
    all_agreements = []
    power = []
    antipower = []
    for _ in range(n_trials):
        baseline_p_correct = p_correct
        other_p_correct = baseline_p_correct + delta
        contingency_table = np.zeros((2, 2))
        n_baseline_correct = 0
        n_other_correct = 0
        # Get predictions
        for _ in range(n_points):
            if np.random.random() < baseline_p_correct:
                baseline_table = 0
                n_baseline_correct += 1
            else:
                baseline_table = 1

            if np.random.random() < other_p_correct:
                other_table = 0
                n_other_correct += 1
            else:
                other_table = 1

            contingency_table[baseline_table][other_table] += 1

        agreement = contingency_table[0][0] + contingency_table[1][1]
        all_agreements.append(agreement / n_points)
        _, p = evaluate.mcnemar(ary=contingency_table)
        baseline_accuracy = n_baseline_correct / n_points
        other_accuracy = n_other_correct / n_points
        observed_effect = other_accuracy - baseline_accuracy
        if observed_effect > 0 and p <= alpha:
            power.append(1)
        else:
            power.append(0)

        if observed_effect <= 0 and p >= alpha:
            antipower.append(1)
        else:
            antipower.append(0)

    avg_power = np.mean(power)
    avg_antipower = np.mean(antipower)
    return {
        "delta": delta,
        "power": avg_power,
        "antipower": avg_antipower,
        "agreement": np.mean(all_agreements),
        "p_correct": p_correct,
    }