def roll_out()

in ml3/optimizee.py [0:0]


    def roll_out(self, goal, time_horizon, dmodel, env, real_rollout=False):

        state = torch.Tensor(env.reset())
        states = []
        actions = []
        states.append(state.clone())
        for t in range(time_horizon):

            u = self.forward(torch.cat((state.detach(), goal[:]), dim=0) / self.norm_in)
            u = u.clamp(-1.0, 1.0)
            if not real_rollout:
                pred_next_state = dmodel.step_model(state.squeeze(), u.squeeze()).clone()
            else:
                pred_next_state = torch.Tensor(env.step_model(state.squeeze().detach().numpy(), u.squeeze().detach().numpy()).copy())
            states.append(pred_next_state.clone())
            actions.append(u.clone())
            state_cost = torch.norm(pred_next_state[:]-goal[:]).detach().unsqueeze(0)
            state = pred_next_state.clone()

        # rewards to pass to meta loss
        rewards = [state_cost]*time_horizon
        return torch.stack(states), torch.stack(actions), torch.stack(rewards).detach()