def multi_step()

in ppo_ewma/roller.py [0:0]


    def multi_step(self, nstep, **act_kwargs) -> dict:
        """
        step vectorized environment nstep times, return results
        final flag specifies if the final reward, observation,
        and first should be included in the segment (default: False)
        """
        if self._venv.num == 0:
            self._step_count += nstep
            return {}
        state_in = self.get_state()
        singles = [self.single_step(**act_kwargs) for i in range(nstep)]
        out = self.singles_to_multi(singles)
        out["state_in"] = state_in
        finalrew, out["finalob"], out["finalfirst"] = tree_map(
            tu.np2th, self._venv.observe()
        )
        out["finalstate"] = self.get_state()
        out["reward"] = th.cat([out["lastrew"][:, 1:], finalrew[:, None]], dim=1)
        if self._keep_cost:
            out["finalcost"] = tu.np2th(
                np.array([i.get("cost", 0.0) for i in self._venv.get_info()])
            )
            out["cost"] = th.cat(
                [out["lastcost"][:, 1:], out["finalcost"][:, None]], dim=1
            )
        del out["lastrew"]
        return out