in pyhanabi/eval.py [0:0]
def evaluate(agents, num_game, seed, bomb, eps, sad, *, hand_size=5, runners=None, device="cuda:0"):
"""
evaluate agents as long as they have a "act" function
"""
assert agents is None or runners is None
if agents is not None:
runners = [rela.BatchRunner(agent, device, 1000, ["act"]) for agent in agents]
num_player = len(runners)
context = rela.Context()
games = create_envs(
num_game,
seed,
num_player,
hand_size,
bomb,
[eps],
-1,
sad,
False,
False,
)
for g in games:
env = hanalearn.HanabiVecEnv()
env.append(g)
actors = []
for i in range(num_player):
actors.append(rela.R2D2Actor(runners[i], 1))
thread = hanalearn.HanabiThreadLoop(actors, env, True)
context.push_env_thread(thread)
for runner in runners:
runner.start()
context.start()
while not context.terminated():
time.sleep(0.5)
context.terminate()
while not context.terminated():
time.sleep(0.5)
for runner in runners:
runner.stop()
scores = [g.last_score() for g in games]
num_perfect = np.sum([1 for s in scores if s == 25])
return np.mean(scores), num_perfect / len(scores), scores, num_perfect