in train.py [0:0]
def __init__(self, cfg):
self.work_dir = os.getcwd()
print(f'workspace: {self.work_dir}')
self.cfg = cfg
self.logger = Logger(self.work_dir,
save_tb=cfg.log_save_tb,
log_frequency=cfg.log_freq,
agent='sac_svg')
utils.set_seed_everywhere(cfg.seed)
self.device = torch.device(cfg.device)
self.env = utils.make_norm_env(cfg)
self.episode = 0
self.episode_step = 0
self.episode_reward = 0
self.done = False
cfg.obs_dim = int(self.env.observation_space.shape[0])
cfg.action_dim = self.env.action_space.shape[0]
cfg.action_range = [
float(self.env.action_space.low.min()),
float(self.env.action_space.high.max())
]
self.agent = hydra.utils.instantiate(cfg.agent)
if isinstance(cfg.replay_buffer_capacity, str):
cfg.replay_buffer_capacity = int(eval(cfg.replay_buffer_capacity))
self.replay_buffer = ReplayBuffer(
self.env.observation_space.shape,
self.env.action_space.shape,
int(cfg.replay_buffer_capacity),
self.device,
normalize_obs=cfg.normalize_obs,
)
self.replay_dir = os.path.join(self.work_dir, 'replay')
self.video_recorder = VideoRecorder(
self.work_dir if cfg.save_video else None)
self.step = 0
self.steps_since_eval = 0
self.steps_since_save = 0
self.best_eval_rew = None