in rl/ppo/ppo_trainer.py [0:0]
def eval(self) -> None:
r"""Main method of trainer evaluation. Calls _eval_checkpoint() that
is specified in Trainer class that inherits from BaseRLTrainer
Returns:
None
"""
os.makedirs(os.path.join(self.config.CHECKPOINT_FOLDER, 'eval/'), exist_ok=True)
# add test episode information to config
test_episodes = json.load(open(self.config.EVAL.DATASET))
self.config.defrost()
self.config.ENV.TEST_EPISODES = test_episodes
self.config.ENV.TEST_EPISODE_COUNT = len(test_episodes)
self.config.freeze()
# Map location CPU is almost always better than mapping to a CUDA device.
checkpoint_path = self.config.LOAD
ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu")
ppo_cfg = self.config.RL.PPO
logger.info(f"env config: {self.config}")
self.envs = construct_envs(self.config, get_env_class(self.config.ENV.ENV_NAME))
self._setup_actor_critic_agent(ppo_cfg)
# [!!] Log extra stuff
logger.info(checkpoint_path)
logger.info(f"num_steps: {self.config.ENV.NUM_STEPS}")
# [!!] Only load if present
if ckpt_dict is not None:
self.agent.load_state_dict(ckpt_dict["state_dict"])
else:
logger.info('NO CHECKPOINT LOADED!')
self.actor_critic = self.agent.actor_critic
observations = self.envs.reset()
batch = self.batch_obs(observations, self.device)
current_episode_reward = torch.zeros(
self.envs.num_envs, 1, device=self.device
)
test_recurrent_hidden_states = torch.zeros(
self.actor_critic.net.num_recurrent_layers,
self.config.NUM_PROCESSES,
ppo_cfg.hidden_size,
device=self.device,
)
prev_actions = torch.zeros(
self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long
)
not_done_masks = torch.zeros(
self.config.NUM_PROCESSES, 1, device=self.device
)
stats_episodes = dict() # dict of dicts that stores stats per episode
rgb_frames = [
[] for _ in range(self.config.NUM_PROCESSES)
] # type: List[List[np.ndarray]]
# [!!] Store extra information about the trajectory that the env does not return
episode_infos = [[] for _ in range(self.config.NUM_PROCESSES)]
pbar = tqdm.tqdm()
self.actor_critic.eval()
while (
len(stats_episodes) < self.config.ENV.TEST_EPISODE_COUNT
and self.envs.num_envs > 0
):
# [!!] Show more fine-grained progress. THOR is slow!
pbar.update()
current_episodes = self.envs.current_episodes()
with torch.no_grad():
(
_,
actions,
_,
test_recurrent_hidden_states,
) = self.actor_critic.act(
batch,
test_recurrent_hidden_states,
prev_actions,
not_done_masks,
deterministic=False,
)
prev_actions.copy_(actions)
outputs = self.envs.step([a[0].item() for a in actions])
observations, rewards, dones, infos = [
list(x) for x in zip(*outputs)
]
batch = self.batch_obs(observations, self.device)
not_done_masks = torch.tensor(
[[0.0] if done else [1.0] for done in dones],
dtype=torch.float,
device=self.device,
)
rewards = torch.tensor(
rewards, dtype=torch.float, device=self.device
).unsqueeze(1)
current_episode_reward += rewards
# [!!] store epiode history
for i in range(self.envs.num_envs):
episode_infos[i].append(infos[i])
next_episodes = self.envs.current_episodes()
envs_to_pause = []
n_envs = self.envs.num_envs
for i in range(n_envs):
if (
next_episodes[i]['scene_id'],
next_episodes[i]['episode_id'],
) in stats_episodes:
envs_to_pause.append(i)
# episode ended
if not_done_masks[i].item() == 0:
# pbar.update()
episode_stats = dict()
episode_stats["reward"] = current_episode_reward[i].item()
episode_stats.update(
self._extract_scalars_from_info(infos[i])
)
current_episode_reward[i] = 0
# [!!] Add per-step episode information
episode_info = []
for info in episode_infos[i]:
act_data = {'reward': info['reward'], 'action': info['action'], 'target': None, 'success': info['success']}
if 'target' in info:
act_data['target'] = info['target']['objectId']
episode_info.append(act_data)
episode_stats['step_info'] = episode_info
episode_infos[i] = []
# use scene_id + episode_id as unique id for storing stats
stats_episodes[
(
current_episodes[i]['scene_id'],
current_episodes[i]['episode_id'],
)
] = episode_stats
# [!!] Save episode data in the eval folder for processing
scene, episode = current_episodes[i]['scene_id'], current_episodes[i]['episode_id']
torch.save({'scene_id':scene,
'episode_id':episode,
'stats':episode_stats},
f'{self.config.CHECKPOINT_FOLDER}/eval/{scene}_{episode}.pth')
(
self.envs,
test_recurrent_hidden_states,
not_done_masks,
current_episode_reward,
prev_actions,
batch,
rgb_frames,
) = self._pause_envs(
envs_to_pause,
self.envs,
test_recurrent_hidden_states,
not_done_masks,
current_episode_reward,
prev_actions,
batch,
rgb_frames,
)
num_episodes = len(stats_episodes)
aggregated_stats = dict()
# for stat_key in next(iter(stats_episodes.values())).keys(): # [!!] Only output reward
for stat_key in ['reward']:
aggregated_stats[stat_key] = (
sum([v[stat_key] for v in stats_episodes.values()])
/ num_episodes
)
for k, v in aggregated_stats.items():
logger.info(f"Average episode {k}: {v:.4f}")
self.envs.close()