in rlmeta/agents/dqn/apex_dqn_agent.py [0:0]
def make_replay(self) -> Optional[List[NestedTensor]]:
trajectory_len = len(self.trajectory)
if trajectory_len <= self.multi_step:
return None
replay = []
append = replay.append
for i in range(0, trajectory_len - self.multi_step):
cur = self.trajectory[i]
nxt = self.trajectory[i + self.multi_step]
obs = cur["obs"]
act = cur["action"]
next_obs = nxt["obs"]
done = nxt["done"]
reward = 0.0
for j in range(self.multi_step):
reward += (self.gamma**j) * self.trajectory[i + j]["reward"]
append({
"obs": obs,
"action": act,
"reward": torch.tensor([reward]),
"next_obs": next_obs,
"done": torch.tensor([done]),
})
return replay