in MTRF/algorithms/softlearning/algorithms/phased_sac.py [0:0]
def _evaluation_paths(self):
if self._epoch > 0 and self._eval_n_episodes < 1 and self.fake_eval_paths is not None:
return ()
if (self._epoch == 0 or self.fake_eval_paths is None) and self._eval_n_episodes < 1:
# Do one round of evaluation in order to get a bunch of logs to fill in empty training phases
eval_n_episodes = 1
else:
eval_n_episodes = self._eval_n_episodes
should_save_video = (
self._video_save_frequency > 0
and self._epoch % self._video_save_frequency == 0)
paths = []
for goal_index in range(self._num_goals):
with self._policies[goal_index].set_deterministic(self._eval_deterministic):
self._evaluation_environment.set_goal(goal_index)
self._evaluation_environment.reset()
paths.append(
rollouts(
eval_n_episodes,
self._evaluation_environment,
self._policies[goal_index],
self._samplers[goal_index]._max_path_length,
render_kwargs=(self._eval_render_kwargs
if should_save_video else {})
)
)
# TODO: interleave videos from different policies
if should_save_video:
# fps = 1 // getattr(self._evaluation_environment, 'dt', 1/30)
for rollout_num in range(len(paths[0])):
video_frames = []
for goal_index in range(self._num_goals):
video_frames.append(paths[goal_index][rollout_num].pop('images'))
video_frames = np.concatenate(video_frames)
video_file_name = f'evaluation_path_{self._epoch}_{rollout_num}.mp4'
video_file_path = os.path.join(
os.getcwd(), 'videos', video_file_name)
fps = 60
save_video(video_frames, video_file_path, fps=fps)
if self._eval_n_episodes < 1:
self.fake_eval_paths = paths
return paths