in ml3/optimizee.py [0:0]
def roll_out(self, goal, time_horizon, dmodel, env, real_rollout=False):
state = torch.Tensor(env.reset())
states = []
actions = []
states.append(state.clone())
for t in range(time_horizon):
u = self.forward(torch.cat((state.detach(), goal[:]), dim=0) / self.norm_in)
u = u.clamp(-1.0, 1.0)
if not real_rollout:
pred_next_state = dmodel.step_model(state.squeeze(), u.squeeze()).clone()
else:
pred_next_state = torch.Tensor(env.step_model(state.squeeze().detach().numpy(), u.squeeze().detach().numpy()).copy())
states.append(pred_next_state.clone())
actions.append(u.clone())
state_cost = torch.norm(pred_next_state[:]-goal[:]).detach().unsqueeze(0)
state = pred_next_state.clone()
# rewards to pass to meta loss
rewards = [state_cost]*time_horizon
return torch.stack(states), torch.stack(actions), torch.stack(rewards).detach()