in benchmark/benchmark.py [0:0]
def benchmark(agent, games, args):
game_exclusion_list = ["enter.z5", "sherlock.z5", "sherbet.z5", "theatre.z5", "balances.z5"]
mean_score = 0
total_time = 0.
total_steps = 0
nb_games = 0
games = sorted(games)
max_game_name = max(len(os.path.basename(game)) for game in games)
with tqdm(total=len(games), leave=False) as pbar:
for game in games:
game_name = os.path.basename(game)
pbar.set_postfix_str(game_name)
if game_name in game_exclusion_list:
pbar.write("{} (skip)".format(game_name))
log.info("Excluded game: {}".format(game_name))
pbar.update(1)
continue # Skip excluded games.
try:
nb_steps, nb_losts, final_score, max_score, seconds = evaluate(agent, game, args)
except ValueError as e:
pbar.write("{} (skip)".format(game_name))
log.error(str(e))
pbar.update(1)
continue # Skip not supported games.
nb_games += 1
norm_score = 100.0 * final_score / max_score
assert norm_score <= 100.0
total_time += seconds
total_steps += nb_steps
msg = "{}\t{:5.0f} seconds\t{:4d} losts\tScore: {:3d}/{:3d} ({:6.2f}%)"
msg = msg.format(game_name.ljust(max_game_name), seconds, nb_losts, final_score, max_score, norm_score)
log.info(msg)
pbar.write(msg)
pbar.update(1)
mean_score += norm_score
log.critical("Mean score (over {} games) = {:8.4f}% of total possible".format(nb_games, mean_score / nb_games))
log.critical("Total time {:9.2f} seconds".format(total_time))
log.critical("Avg. speed: {:8.2f} steps per second".format(total_steps / total_time))