def evaluate()

in train.py [0:0]


    def evaluate(self):
        episode_rewards = []
        for episode in range(self.cfg.num_eval_episodes):
            if self.cfg.fixed_eval:
                self.env.set_seed(episode)
            obs = self.env.reset()
            self.agent.reset()
            self.video_recorder.init(enabled=(episode == 0))
            done = False
            episode_reward = 0
            while not done:
                with utils.eval_mode(self.agent):
                    if self.cfg.normalize_obs:
                        mu, sigma = self.replay_buffer.get_obs_stats()
                        obs_norm = (obs - mu) / sigma
                        action = self.agent.act(obs_norm, sample=False)
                    else:
                        action = self.agent.act(obs, sample=False)
                obs, reward, done, _ = self.env.step(action)
                self.video_recorder.record(self.env)
                episode_reward += reward
            episode_rewards.append(episode_reward)

            self.video_recorder.save(f'{self.step}.mp4')
            self.logger.log('eval/episode_reward', episode_reward, self.step)
        if self.cfg.fixed_eval:
            self.env.set_seed(None)
        self.logger.dump(self.step)
        return np.mean(episode_rewards)