in test.py [0:0]
def evaluate(
args,
actor_critic,
num_episodes,
device,
num_processes=1,
deterministic=False,
start_level=0,
num_levels=0,
seeds=None,
level_sampler=None,
progressbar=None):
actor_critic.eval()
if level_sampler:
start_level = level_sampler.seed_range()[0]
num_levels = 1
eval_envs, level_sampler = make_lr_venv(
num_envs=num_processes, env_name=args.env_name,
seeds=seeds, device=device,
num_levels=num_levels, start_level=start_level,
no_ret_normalization=args.no_ret_normalization,
distribution_mode=args.distribution_mode,
paint_vel_info=args.paint_vel_info,
level_sampler=level_sampler)
eval_episode_rewards = []
if level_sampler:
obs, _ = eval_envs.reset()
else:
obs = eval_envs.reset()
eval_recurrent_hidden_states = torch.zeros(
num_processes, actor_critic.recurrent_hidden_state_size, device=device)
eval_masks = torch.ones(num_processes, 1, device=device)
while len(eval_episode_rewards) < num_episodes:
with torch.no_grad():
_, action, _, eval_recurrent_hidden_states = actor_critic.act(
obs,
eval_recurrent_hidden_states,
eval_masks,
deterministic=deterministic)
obs, _, done, infos = eval_envs.step(action)
eval_masks = torch.tensor(
[[0.0] if done_ else [1.0] for done_ in done],
dtype=torch.float32,
device=device)
for info in infos:
if 'episode' in info.keys():
eval_episode_rewards.append(info['episode']['r'])
if progressbar:
progressbar.update(1)
eval_envs.close()
if progressbar:
progressbar.close()
if args.verbose:
print("Last {} test episodes: mean/median reward {:.1f}/{:.1f}\n"\
.format(len(eval_episode_rewards), \
np.mean(eval_episode_rewards), np.median(eval_episode_rewards)))
return eval_episode_rewards