in train.py [0:0]
def setup(self):
# create logger
self.logger = Logger(self.work_dir, use_tb=self.cfg.use_tb)
# create envs
self.train_env = dmc.make(self.cfg.task_name, self.cfg.frame_stack,
self.cfg.action_repeat, self.cfg.seed)
self.eval_env = dmc.make(self.cfg.task_name, self.cfg.frame_stack,
self.cfg.action_repeat, self.cfg.seed)
# create replay buffer
data_specs = (self.train_env.observation_spec(),
self.train_env.action_spec(),
specs.Array((1,), np.float32, 'reward'),
specs.Array((1,), np.float32, 'discount'))
self.replay_storage = ReplayBufferStorage(data_specs,
self.work_dir / 'buffer')
self.replay_loader = make_replay_loader(
self.work_dir / 'buffer', self.cfg.replay_buffer_size,
self.cfg.batch_size, self.cfg.replay_buffer_num_workers,
self.cfg.save_snapshot, self.cfg.nstep, self.cfg.discount)
self._replay_iter = None
self.video_recorder = VideoRecorder(
self.work_dir if self.cfg.save_video else None)
self.train_video_recorder = TrainVideoRecorder(
self.work_dir if self.cfg.save_train_video else None)