in ppo_ewma/roller.py [0:0]
def multi_step(self, nstep, **act_kwargs) -> dict:
"""
step vectorized environment nstep times, return results
final flag specifies if the final reward, observation,
and first should be included in the segment (default: False)
"""
if self._venv.num == 0:
self._step_count += nstep
return {}
state_in = self.get_state()
singles = [self.single_step(**act_kwargs) for i in range(nstep)]
out = self.singles_to_multi(singles)
out["state_in"] = state_in
finalrew, out["finalob"], out["finalfirst"] = tree_map(
tu.np2th, self._venv.observe()
)
out["finalstate"] = self.get_state()
out["reward"] = th.cat([out["lastrew"][:, 1:], finalrew[:, None]], dim=1)
if self._keep_cost:
out["finalcost"] = tu.np2th(
np.array([i.get("cost", 0.0) for i in self._venv.get_info()])
)
out["cost"] = th.cat(
[out["lastcost"][:, 1:], out["finalcost"][:, None]], dim=1
)
del out["lastrew"]
return out