in ppo_ewma/roller.py [0:0]
def single_step(self, **act_kwargs) -> dict:
"""
step vectorized environment once, return results
"""
out = {}
lastrew, ob, first = tree_map(tu.np2th, self._venv.observe())
if self._keep_cost:
out.update(
lastcost=tu.np2th(
np.array([i.get("cost", 0.0) for i in self._venv.get_info()])
)
)
ac, newstate, other_outs = self._act_fn(
ob=ob, first=first, state_in=self._state, **act_kwargs
)
self._state = newstate
out.update(lastrew=lastrew, ob=ob, first=first, ac=ac)
self._venv.act(tree_map(tu.th2np, ac))
for (k, v) in other_outs.items():
out[k] = v
self._step_count += 1
return out