def benchmark()

in benchmark/benchmark.py [0:0]


def benchmark(agent, games, args):
    game_exclusion_list = ["enter.z5", "sherlock.z5", "sherbet.z5", "theatre.z5", "balances.z5"]

    mean_score = 0
    total_time = 0.
    total_steps = 0

    nb_games = 0
    games = sorted(games)
    max_game_name = max(len(os.path.basename(game)) for game in games)
    with tqdm(total=len(games), leave=False) as pbar:
        for game in games:
            game_name = os.path.basename(game)
            pbar.set_postfix_str(game_name)
            if game_name in game_exclusion_list:
                pbar.write("{} (skip)".format(game_name))
                log.info("Excluded game: {}".format(game_name))
                pbar.update(1)
                continue  # Skip excluded games.
            try:
                nb_steps, nb_losts, final_score, max_score, seconds = evaluate(agent, game, args)
            except ValueError as e:
                pbar.write("{} (skip)".format(game_name))
                log.error(str(e))
                pbar.update(1)
                continue  # Skip not supported games.

            nb_games += 1

            norm_score = 100.0 * final_score / max_score
            assert norm_score <= 100.0
            total_time += seconds
            total_steps += nb_steps

            msg = "{}\t{:5.0f} seconds\t{:4d} losts\tScore: {:3d}/{:3d} ({:6.2f}%)"
            msg = msg.format(game_name.ljust(max_game_name), seconds, nb_losts, final_score, max_score, norm_score)
            log.info(msg)
            pbar.write(msg)
            pbar.update(1)

            mean_score += norm_score

    log.critical("Mean score (over {} games) = {:8.4f}% of total possible".format(nb_games, mean_score / nb_games))
    log.critical("Total time {:9.2f} seconds".format(total_time))
    log.critical("Avg. speed: {:8.2f} steps per second".format(total_steps / total_time))