def test()

in qlearn/toys/test.py [0:0]


def test(args, env, dqn):
    rewards = []

    # Test performance over several episodes
    done = True
    dqn.online_net.eval()
    # dqn.online_net.freeze_noise()
    for _ in range(args.evaluation_episodes):
        while True:
            if done:
                state, reward_sum, done = env.reset(), 0, False
            if args.agent == 'VariationalDQN':
                action = dqn.act(state[None], sample=False)
            elif args.agent in ['NoisyDQN', 'BayesBackpropDQN', 'MNFDQN']:
                action = dqn.act(state[None], eval=True)
            elif args.agent == 'DQN':
                action = dqn.act(state[None])
            elif args.agent == 'BootstrappedDQN':
                action = dqn.act(state[None]) 
                   # Choose an action greedily
            state, reward, done, _ = env.step(int(action))  # Step
            reward_sum += reward

            if done:
                rewards.append(reward_sum)
                break
    env.close()

    # return average reward
    return sum(rewards) / len(rewards)