hucc/agents/sac.py [68:95]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if cfg.optim_alpha is None:
            self._log_alpha = th.tensor(log_alpha)
            self._optim_alpha = None
        else:
            self._log_alpha = th.tensor(log_alpha, requires_grad=True)
            self._optim_alpha = hydra.utils.instantiate(
                cfg.optim_alpha, [self._log_alpha]
            )

        self._buffer = ReplayBuffer(
            size=self._rpbuf_size, interleave=env.num_envs
        )
        self._n_samples_since_update = 0
        self._cur_rewards: List[th.Tensor] = []

        self._target = deepcopy(model)
        # We'll never need gradients for the target network
        for param in self._target.parameters():
            param.requires_grad_(False)

        self._q = self._model.q
        self._q_tgt = self._target.q
        if cfg.trace:
            self._q = TracedModule(self._model.q)
            self._q_tgt = TracedModule(self._target.q)

        self._action_space = env.action_space
        self._action_factor = env.action_space.high[0]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



hucc/agents/sacse.py [73:100]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if cfg.optim_alpha is None:
            self._log_alpha = th.tensor(log_alpha)
            self._optim_alpha = None
        else:
            self._log_alpha = th.tensor(log_alpha, requires_grad=True)
            self._optim_alpha = hydra.utils.instantiate(
                cfg.optim_alpha, [self._log_alpha]
            )

        self._buffer = ReplayBuffer(
            size=self._rpbuf_size, interleave=env.num_envs
        )
        self._n_samples_since_update = 0
        self._cur_rewards: List[th.Tensor] = []

        self._target = deepcopy(model)
        # We'll never need gradients for the target network
        for param in self._target.parameters():
            param.requires_grad_(False)

        self._q = self._model.q
        self._q_tgt = self._target.q
        if cfg.trace:
            self._q = TracedModule(self._model.q)
            self._q_tgt = TracedModule(self._target.q)

        self._action_space = env.action_space
        self._action_factor = env.action_space.high[0]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



