def eval()

in rl/ppo/ppo_trainer.py [0:0]
144 lines of code
23 McCabe index (conditional complexity)

    def eval(self) -> None:
        r"""Main method of trainer evaluation. Calls _eval_checkpoint() that
        is specified in Trainer class that inherits from BaseRLTrainer
        Returns:
            None
        """

        os.makedirs(os.path.join(self.config.CHECKPOINT_FOLDER, 'eval/'), exist_ok=True)

        # add test episode information to config
        test_episodes = json.load(open(self.config.EVAL.DATASET))
        self.config.defrost()
        self.config.ENV.TEST_EPISODES = test_episodes
        self.config.ENV.TEST_EPISODE_COUNT = len(test_episodes)
        self.config.freeze()

        # Map location CPU is almost always better than mapping to a CUDA device.
        checkpoint_path = self.config.LOAD
        ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")
        ppo_cfg = self.config.RL.PPO

        logger.info(f"env config: {self.config}")
        self.envs = construct_envs(self.config, get_env_class(self.config.ENV.ENV_NAME))
        self._setup_actor_critic_agent(ppo_cfg)

        # [!!] Log extra stuff
        logger.info(checkpoint_path)
        logger.info(f"num_steps: {self.config.ENV.NUM_STEPS}")

        # [!!] Only load if present
        if ckpt_dict is not None:
            self.agent.load_state_dict(ckpt_dict["state_dict"])
        else:
            logger.info('NO CHECKPOINT LOADED!')
        self.actor_critic = self.agent.actor_critic

        observations = self.envs.reset()
        batch = self.batch_obs(observations, self.device)

        current_episode_reward = torch.zeros(
            self.envs.num_envs, 1, device=self.device
        )

        test_recurrent_hidden_states = torch.zeros(
            self.actor_critic.net.num_recurrent_layers,
            self.config.NUM_PROCESSES,
            ppo_cfg.hidden_size,
            device=self.device,
        )
        prev_actions = torch.zeros(
            self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long
        )
        not_done_masks = torch.zeros(
            self.config.NUM_PROCESSES, 1, device=self.device
        )
        stats_episodes = dict()  # dict of dicts that stores stats per episode

        rgb_frames = [
            [] for _ in range(self.config.NUM_PROCESSES)
        ]  # type: List[List[np.ndarray]]

        # [!!] Store extra information about the trajectory that the env does not return
        episode_infos = [[] for _ in range(self.config.NUM_PROCESSES)]

        pbar = tqdm.tqdm()
        self.actor_critic.eval()


        while (
            len(stats_episodes) < self.config.ENV.TEST_EPISODE_COUNT
            and self.envs.num_envs > 0
        ):

            # [!!] Show more fine-grained progress. THOR is slow!
            pbar.update()
            
            current_episodes = self.envs.current_episodes()

            with torch.no_grad():
                (
                    _,
                    actions,
                    _,
                    test_recurrent_hidden_states,
                ) = self.actor_critic.act(
                    batch,
                    test_recurrent_hidden_states,
                    prev_actions,
                    not_done_masks,
                    deterministic=False,
                )

                prev_actions.copy_(actions)

            outputs = self.envs.step([a[0].item() for a in actions])

            observations, rewards, dones, infos = [
                list(x) for x in zip(*outputs)
            ]
            batch = self.batch_obs(observations, self.device)

            not_done_masks = torch.tensor(
                [[0.0] if done else [1.0] for done in dones],
                dtype=torch.float,
                device=self.device,
            )

            rewards = torch.tensor(
                rewards, dtype=torch.float, device=self.device
            ).unsqueeze(1)

            current_episode_reward += rewards

            # [!!] store epiode history
            for i in range(self.envs.num_envs):
                episode_infos[i].append(infos[i])


            next_episodes = self.envs.current_episodes()
            envs_to_pause = []
            n_envs = self.envs.num_envs
            for i in range(n_envs):

                if (
                    next_episodes[i]['scene_id'],
                    next_episodes[i]['episode_id'],
                ) in stats_episodes:
                    envs_to_pause.append(i)

                # episode ended
                if not_done_masks[i].item() == 0:
                    # pbar.update()
                    episode_stats = dict()
                    episode_stats["reward"] = current_episode_reward[i].item()
                    episode_stats.update(
                        self._extract_scalars_from_info(infos[i])
                    )
                    current_episode_reward[i] = 0


                    # [!!] Add per-step episode information
                    episode_info = []
                    for info in episode_infos[i]:
                        act_data = {'reward': info['reward'], 'action': info['action'], 'target': None, 'success': info['success']} 
                        if 'target' in info:
                            act_data['target'] = info['target']['objectId']
                        episode_info.append(act_data)
                    episode_stats['step_info'] = episode_info
                    episode_infos[i] = []

                    # use scene_id + episode_id as unique id for storing stats
                    stats_episodes[
                        (
                            current_episodes[i]['scene_id'],
                            current_episodes[i]['episode_id'],
                        )
                    ] = episode_stats

                    # [!!] Save episode data in the eval folder for processing
                    scene, episode = current_episodes[i]['scene_id'], current_episodes[i]['episode_id']
                    torch.save({'scene_id':scene,
                                'episode_id':episode,
                                'stats':episode_stats},
                                f'{self.config.CHECKPOINT_FOLDER}/eval/{scene}_{episode}.pth')
                    

            (
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            ) = self._pause_envs(
                envs_to_pause,
                self.envs,
                test_recurrent_hidden_states,
                not_done_masks,
                current_episode_reward,
                prev_actions,
                batch,
                rgb_frames,
            )

        num_episodes = len(stats_episodes)
        aggregated_stats = dict()
        # for stat_key in next(iter(stats_episodes.values())).keys(): # [!!] Only output reward
        for stat_key in ['reward']:
            aggregated_stats[stat_key] = (
                sum([v[stat_key] for v in stats_episodes.values()])
                / num_episodes
            )

        for k, v in aggregated_stats.items():
            logger.info(f"Average episode {k}: {v:.4f}")

        self.envs.close()