def update()

in agent/baseline_agent.py [0:0]


    def update(self, replay_buffer, L, step):
        if self.decoder_type == 'inverse':
            obs, action, reward, next_obs, not_done, k_obs = replay_buffer.sample(k=True)
        else:
            obs, action, _, reward, next_obs, not_done = replay_buffer.sample()

        L.log('train/batch_reward', reward.mean(), step)

        self.update_critic(obs, action, reward, next_obs, not_done, L, step)

        if step % self.actor_update_freq == 0:
            self.update_actor_and_alpha(obs, L, step)

        if step % self.critic_target_update_freq == 0:
            utils.soft_update_params(
                self.critic.Q1, self.critic_target.Q1, self.critic_tau
            )
            utils.soft_update_params(
                self.critic.Q2, self.critic_target.Q2, self.critic_tau
            )
            utils.soft_update_params(
                self.critic.encoder, self.critic_target.encoder,
                self.encoder_tau
            )

        if self.decoder is not None and step % self.decoder_update_freq == 0:  # decoder_type is pixel
            self.update_decoder(obs, action, next_obs, L, step)

        if self.decoder_type == 'contrastive':
            self.update_contrastive(obs, action, next_obs, L, step)
        elif self.decoder_type == 'inverse':
            self.update_inverse(obs, action, k_obs, L, step)