in svg/replay_buffer.py [0:0]
def add(self, obs, action, reward, next_obs, done, done_no_max):
# For saving
self.payload.append((
obs.copy(), next_obs.copy(),
action.copy(), reward,
not done, not done_no_max
))
if self.normalize_obs:
self.welford.add_data(obs)
# if self.full and not self.not_dones[self.idx]:
if done:
self.done_idxs.add(self.idx)
elif self.full:
self.done_idxs.discard(self.idx)
np.copyto(self.obses[self.idx], obs)
np.copyto(self.actions[self.idx], action)
np.copyto(self.rewards[self.idx], reward)
np.copyto(self.next_obses[self.idx], next_obs)
np.copyto(self.not_dones[self.idx], not done)
np.copyto(self.not_dones_no_max[self.idx], not done_no_max)
self.idx = (self.idx + 1) % self.capacity
self.global_idx += 1
self.full = self.full or self.idx == 0