in Experiments/PolicyManagers.py [0:0]
def rollout(self, random=False, test=False, visualize=False):
# Reset the noise process! We forgot to do this! :(
self.NoiseProcess.reset()
# Reset some data for the rollout.
counter = 0
eps_reward = 0.
terminal = False
self.reset_lists()
# Reset environment and add state to the list.
state = self.environment.reset()
self.state_trajectory.append(state)
# If we are going to visualize, get an initial image.
if visualize:
image = self.environment.sim.render(600,600, camera_name='frontview')
self.image_trajectory.append(np.flipud(image))
# Instead of maintaining just one LSTM hidden state... now have one for each policy level.
policy_hidden = None
latent_hidden = None
latent_z = None
delta_t = 0
# For number of steps / while we don't terminate:
while not(terminal) and counter<self.max_timesteps:
# Get the action to execute, b, z, and hidden states.
action, latent_z, latent_b, policy_hidden, latent_hidden, delta_t = self.get_OU_action_latents(policy_hidden=policy_hidden, latent_hidden=latent_hidden, random=random, counter=counter, previous_z=latent_z, test=test, delta_t=delta_t)
if self.args.debug:
print("Embed in Trajectory Rollout.")
embed()
# Take a step in the environment.
next_state, onestep_reward, terminal, success = self.environment.step(action)
# Append everything to lists.
self.state_trajectory.append(next_state)
self.action_trajectory.append(action)
self.reward_trajectory.append(onestep_reward)
self.terminal_trajectory.append(terminal)
self.latent_z_trajectory.append(latent_z.detach().cpu().numpy())
self.latent_b_trajectory.append(latent_b.detach().cpu().numpy())
# Copy next state into state.
state = copy.deepcopy(next_state)
# Counter
counter += 1
# Append image to image list if we are visualizing.
if visualize:
image = self.environment.sim.render(600,600, camera_name='frontview')
self.image_trajectory.append(np.flipud(image))
# Now that the episode is done, compute cummulative rewards...
self.cummulative_rewards = copy.deepcopy(np.cumsum(np.array(self.reward_trajectory)[::-1])[::-1])
self.episode_reward_statistics = copy.deepcopy(self.cummulative_rewards[0])
print("Rolled out an episode for ",counter," timesteps.")
print("Achieved reward: ", self.episode_reward_statistics)
# NOW construct an episode out of this..
self.episode = RLUtils.HierarchicalEpisode(self.state_trajectory, self.action_trajectory, self.reward_trajectory, self.terminal_trajectory, self.latent_z_trajectory, self.latent_b_trajectory)