in train.py [0:0]
def evaluate(self):
episode_rewards = []
for episode in range(self.cfg.num_eval_episodes):
if self.cfg.fixed_eval:
self.env.set_seed(episode)
obs = self.env.reset()
self.agent.reset()
self.video_recorder.init(enabled=(episode == 0))
done = False
episode_reward = 0
while not done:
with utils.eval_mode(self.agent):
if self.cfg.normalize_obs:
mu, sigma = self.replay_buffer.get_obs_stats()
obs_norm = (obs - mu) / sigma
action = self.agent.act(obs_norm, sample=False)
else:
action = self.agent.act(obs, sample=False)
obs, reward, done, _ = self.env.step(action)
self.video_recorder.record(self.env)
episode_reward += reward
episode_rewards.append(episode_reward)
self.video_recorder.save(f'{self.step}.mp4')
self.logger.log('eval/episode_reward', episode_reward, self.step)
if self.cfg.fixed_eval:
self.env.set_seed(None)
self.logger.dump(self.step)
return np.mean(episode_rewards)