in mtrl/experiment/metaworld.py [0:0]
def evaluate_vec_env_of_tasks(self, vec_env: VecEnv, step: int, episode: int):
"""Evaluate the agent's performance on the different environments,
vectorized as a single instance of vectorized environment.
Since we are evaluating on multiple tasks, we track additional metadata
to track which metric corresponds to which task.
Args:
vec_env (VecEnv): vectorized environment.
step (int): step for tracking the training of the agent.
episode (int): episode for tracking the training of the agent.
"""
episode_step = 0
for mode in self.eval_modes_to_env_ids:
self.logger.log(f"{mode}/episode", episode, step)
episode_reward, mask, done, success = [
np.full(shape=vec_env.num_envs, fill_value=fill_value)
for fill_value in [0.0, 1.0, False, 0.0]
] # (num_envs, 1)
multitask_obs = vec_env.reset() # (num_envs, 9, 84, 84)
agent = self.agent
offset = self.config.experiment.num_eval_episodes
while episode_step < self.max_episode_steps:
with agent_utils.eval_mode(agent):
action = agent.select_action(
multitask_obs=multitask_obs, modes=["eval"]
)
multitask_obs, reward, done, info = vec_env.step(action)
success += np.asarray([x["success"] for x in info])
mask = mask * (1 - done.astype(int))
episode_reward += reward * mask
episode_step += 1
start_index = 0
success = (success > 0).astype("float")
for mode in self.eval_modes_to_env_ids:
num_envs = len(self.eval_modes_to_env_ids[mode])
self.logger.log(
f"{mode}/episode_reward",
episode_reward[start_index : start_index + offset * num_envs].mean(),
step,
)
self.logger.log(
f"{mode}/success",
success[start_index : start_index + offset * num_envs].mean(),
step,
)
for _current_env_index, _current_env_id in enumerate(
self.eval_modes_to_env_ids[mode]
):
self.logger.log(
f"{mode}/episode_reward_env_index_{_current_env_index}",
episode_reward[
start_index
+ _current_env_index * offset : start_index
+ (_current_env_index + 1) * offset
].mean(),
step,
)
self.logger.log(
f"{mode}/success_env_index_{_current_env_index}",
success[
start_index
+ _current_env_index * offset : start_index
+ (_current_env_index + 1) * offset
].mean(),
step,
)
self.logger.log(
f"{mode}/env_index_{_current_env_index}", _current_env_id, step
)
start_index += offset * num_envs
self.logger.dump(step)