def compute_metrics()

in src/python/phyre/metrics.py [0:0]


def compute_metrics(raw_simulation_log: SimulationLog) -> Metrics:
    assert isinstance(raw_simulation_log,
                      (tuple, list)), type(raw_simulation_log)
    if not raw_simulation_log:
        logger.warning('Computing metrics for empty evaluation log!')
    else:
        assert len(raw_simulation_log[0]) == 2, raw_simulation_log[0]

    simulation_log = [(task, _normalize_sumulation_status(status))
                      for task, status in raw_simulation_log]
    simulation_log = [(task, _normalize_sumulation_status(status))
                      for task, status in simulation_log
                      if not status.is_invalid()]

    attempts = collections.defaultdict(int)
    solved_at = {}
    first_solution_points = []
    for attempt_index, (task, status) in enumerate(simulation_log, start=1):
        attempts[task] += 1
        if task not in solved_at and status.is_solved():
            first_solution_points.append(attempt_index)
            solved_at[task] = attempts[task]

    if solved_at and max(solved_at.values()) > MAX_TEST_ATTEMPTS:
        logger.warning(
            'Used more than %d attempts at least of one of the'
            ' tasks. It most likely means a bug in evaluation loop.',
            MAX_TEST_ATTEMPTS)

    # independent_solved_by[i] := how many task was solved with at most i
    # attempts on the task.
    independent_solved_by = [0]
    for num_attempts, group in itertools.groupby(sorted(solved_at.values())):
        count = len(list(group))
        if num_attempts > MAX_TEST_ATTEMPTS:
            break
        while len(independent_solved_by) <= num_attempts:
            independent_solved_by.append(independent_solved_by[-1])
        independent_solved_by[num_attempts] += count
    while len(independent_solved_by) <= MAX_TEST_ATTEMPTS:
        independent_solved_by.append(independent_solved_by[-1])

    independent_solved_by_aucs = [0.]
    num, denom = 0., 0.
    for up_to in range(1, MAX_TEST_ATTEMPTS + 1):
        weight = math.log(up_to + 1) - math.log(up_to)
        num += weight * independent_solved_by[up_to]
        denom += weight
        independent_solved_by_aucs.append(num / denom)

    global_solved_by = {
        t: sum(num_attempts <= t for num_attempts in first_solution_points)
        for t in [100, 1000, 100000]
    }

    return dict(
        independent_solved_by=independent_solved_by,
        independent_solved_by_aucs=independent_solved_by_aucs,
        global_solved_by=global_solved_by,
        total_attempts=sum(attempts.values()),
        total_solved=len(first_solution_points),
    )