in agent/baseline_agent.py [0:0]
def update(self, replay_buffer, L, step):
if self.decoder_type == 'inverse':
obs, action, reward, next_obs, not_done, k_obs = replay_buffer.sample(k=True)
else:
obs, action, _, reward, next_obs, not_done = replay_buffer.sample()
L.log('train/batch_reward', reward.mean(), step)
self.update_critic(obs, action, reward, next_obs, not_done, L, step)
if step % self.actor_update_freq == 0:
self.update_actor_and_alpha(obs, L, step)
if step % self.critic_target_update_freq == 0:
utils.soft_update_params(
self.critic.Q1, self.critic_target.Q1, self.critic_tau
)
utils.soft_update_params(
self.critic.Q2, self.critic_target.Q2, self.critic_tau
)
utils.soft_update_params(
self.critic.encoder, self.critic_target.encoder,
self.encoder_tau
)
if self.decoder is not None and step % self.decoder_update_freq == 0: # decoder_type is pixel
self.update_decoder(obs, action, next_obs, L, step)
if self.decoder_type == 'contrastive':
self.update_contrastive(obs, action, next_obs, L, step)
elif self.decoder_type == 'inverse':
self.update_inverse(obs, action, k_obs, L, step)