def evaluate()

in test.py [0:0]


def evaluate(
    args, 
    actor_critic, 
    num_episodes, 
    device, 
    num_processes=1, 
    deterministic=False,
    start_level=0,
    num_levels=0,
    seeds=None,
    level_sampler=None, 
    progressbar=None):
    actor_critic.eval()
        
    if level_sampler:
        start_level = level_sampler.seed_range()[0]
        num_levels = 1

    eval_envs, level_sampler = make_lr_venv(
        num_envs=num_processes, env_name=args.env_name,
        seeds=seeds, device=device,
        num_levels=num_levels, start_level=start_level,
        no_ret_normalization=args.no_ret_normalization,
        distribution_mode=args.distribution_mode,
        paint_vel_info=args.paint_vel_info,
        level_sampler=level_sampler)

    eval_episode_rewards = []

    if level_sampler:
        obs, _ = eval_envs.reset()
    else:
        obs = eval_envs.reset()
    eval_recurrent_hidden_states = torch.zeros(
        num_processes, actor_critic.recurrent_hidden_state_size, device=device)
    eval_masks = torch.ones(num_processes, 1, device=device)

    while len(eval_episode_rewards) < num_episodes:
        with torch.no_grad():
            _, action, _, eval_recurrent_hidden_states = actor_critic.act(
                obs,
                eval_recurrent_hidden_states,
                eval_masks,
                deterministic=deterministic)

        obs, _, done, infos = eval_envs.step(action)
         
        eval_masks = torch.tensor(
            [[0.0] if done_ else [1.0] for done_ in done],
            dtype=torch.float32,
            device=device)

        for info in infos:
            if 'episode' in info.keys():
                eval_episode_rewards.append(info['episode']['r'])
                if progressbar:
                    progressbar.update(1)

    eval_envs.close()
    if progressbar:
        progressbar.close()

    if args.verbose:
        print("Last {} test episodes: mean/median reward {:.1f}/{:.1f}\n"\
            .format(len(eval_episode_rewards), \
            np.mean(eval_episode_rewards), np.median(eval_episode_rewards)))

    return eval_episode_rewards