in src/lighteval/metrics/metrics_sample.py [0:0]
def g_pass_at_k(self, all_scores: list[int]) -> float:
"""Computation of G-Pass@k details from http://arxiv.org/abs/2412.13147"""
c: int = sum(all_scores)
n: int = self.n
ks: int = self.k
thresholds: list[float] = self.thresholds
def _compute_g_pass_at_k(n, c, k, m):
if m > min(c, k) or k > n or c < 0 or n <= 0 or m < 0:
return 0.0
return hypergeom.sf(m - 1, n, c, k)
def compute_g_pass_at_k(n, c, k, t):
m = max(int(np.ceil(k * t)), 1)
return _compute_g_pass_at_k(n, c, k, m)
def compute_mg_pass_at_k(n, c, k):
low, high = int(np.ceil(k * 0.5)), k
mg_pass_at_k = 0.0
for i in range(low + 1, high + 1):
mg_pass_at_k += _compute_g_pass_at_k(n, c, k, i)
mg_pass_at_k = 2 * mg_pass_at_k / k
return mg_pass_at_k
metrics = {}
for k in ks:
for t in thresholds:
metrics[f"G-Pass@{k}_{t}"] = compute_g_pass_at_k(n, c, k, t)
metrics[f"mG-Pass@{k}"] = compute_mg_pass_at_k(n, c, k)
return metrics