in rlmeta/agents/ppo/ppo_agent.py [0:0]
def make_replay(self) -> List[NestedTensor]:
v = 0.0
gae = 0.0
ret = []
for cur in reversed(self.trajectory):
reward = cur.pop("reward")
v_ = v
v = cur["v"]
if self.reward_rescaling:
v = self.reward_rescaler.recover(v)
delta = reward + self.gamma * v_ - v
gae = delta + self.gamma * self.gae_lambda * gae
cur["gae"] = gae
ret.append(gae + v)
if self.reward_rescaling:
ret = data_utils.stack_tensors(ret)
self.reward_rescaler.update(ret)
ret = self.reward_rescaler.rescale(ret)
ret = ret.unbind()
for cur, r in zip(self.trajectory, reversed(ret)):
cur["return"] = r
return self.trajectory