in src/python/phyre/metrics.py [0:0]
def compute_metrics(raw_simulation_log: SimulationLog) -> Metrics:
assert isinstance(raw_simulation_log,
(tuple, list)), type(raw_simulation_log)
if not raw_simulation_log:
logger.warning('Computing metrics for empty evaluation log!')
else:
assert len(raw_simulation_log[0]) == 2, raw_simulation_log[0]
simulation_log = [(task, _normalize_sumulation_status(status))
for task, status in raw_simulation_log]
simulation_log = [(task, _normalize_sumulation_status(status))
for task, status in simulation_log
if not status.is_invalid()]
attempts = collections.defaultdict(int)
solved_at = {}
first_solution_points = []
for attempt_index, (task, status) in enumerate(simulation_log, start=1):
attempts[task] += 1
if task not in solved_at and status.is_solved():
first_solution_points.append(attempt_index)
solved_at[task] = attempts[task]
if solved_at and max(solved_at.values()) > MAX_TEST_ATTEMPTS:
logger.warning(
'Used more than %d attempts at least of one of the'
' tasks. It most likely means a bug in evaluation loop.',
MAX_TEST_ATTEMPTS)
# independent_solved_by[i] := how many task was solved with at most i
# attempts on the task.
independent_solved_by = [0]
for num_attempts, group in itertools.groupby(sorted(solved_at.values())):
count = len(list(group))
if num_attempts > MAX_TEST_ATTEMPTS:
break
while len(independent_solved_by) <= num_attempts:
independent_solved_by.append(independent_solved_by[-1])
independent_solved_by[num_attempts] += count
while len(independent_solved_by) <= MAX_TEST_ATTEMPTS:
independent_solved_by.append(independent_solved_by[-1])
independent_solved_by_aucs = [0.]
num, denom = 0., 0.
for up_to in range(1, MAX_TEST_ATTEMPTS + 1):
weight = math.log(up_to + 1) - math.log(up_to)
num += weight * independent_solved_by[up_to]
denom += weight
independent_solved_by_aucs.append(num / denom)
global_solved_by = {
t: sum(num_attempts <= t for num_attempts in first_solution_points)
for t in [100, 1000, 100000]
}
return dict(
independent_solved_by=independent_solved_by,
independent_solved_by_aucs=independent_solved_by_aucs,
global_solved_by=global_solved_by,
total_attempts=sum(attempts.values()),
total_solved=len(first_solution_points),
)