def rollout()

in Experiments/PolicyManagers.py [0:0]


	def rollout(self, random=False, test=False, visualize=False):
		
		# Reset the noise process! We forgot to do this! :( 
		self.NoiseProcess.reset()

		# Reset some data for the rollout. 
		counter = 0		
		eps_reward = 0.			
		terminal = False
		self.reset_lists()

		# Reset environment and add state to the list.
		state = self.environment.reset()
		self.state_trajectory.append(state)		

		# If we are going to visualize, get an initial image.
		if visualize:			
			image = self.environment.sim.render(600,600, camera_name='frontview')
			self.image_trajectory.append(np.flipud(image))

		# Instead of maintaining just one LSTM hidden state... now have one for each policy level.
		policy_hidden = None
		latent_hidden = None
		latent_z = None

		delta_t = 0		

		# For number of steps / while we don't terminate:
		while not(terminal) and counter<self.max_timesteps:

			# Get the action to execute, b, z, and hidden states. 
			action, latent_z, latent_b, policy_hidden, latent_hidden, delta_t = self.get_OU_action_latents(policy_hidden=policy_hidden, latent_hidden=latent_hidden, random=random, counter=counter, previous_z=latent_z, test=test, delta_t=delta_t)

			if self.args.debug:
				print("Embed in Trajectory Rollout.")
				embed()

			# Take a step in the environment. 	
			next_state, onestep_reward, terminal, success = self.environment.step(action)
			
			# Append everything to lists. 
			self.state_trajectory.append(next_state)
			self.action_trajectory.append(action)
			self.reward_trajectory.append(onestep_reward)
			self.terminal_trajectory.append(terminal)
			self.latent_z_trajectory.append(latent_z.detach().cpu().numpy())
			self.latent_b_trajectory.append(latent_b.detach().cpu().numpy())

			# Copy next state into state. 
			state = copy.deepcopy(next_state)

			# Counter
			counter += 1 

			# Append image to image list if we are visualizing. 
			if visualize:
				image = self.environment.sim.render(600,600, camera_name='frontview')
				self.image_trajectory.append(np.flipud(image))
				
		# Now that the episode is done, compute cummulative rewards... 
		self.cummulative_rewards = copy.deepcopy(np.cumsum(np.array(self.reward_trajectory)[::-1])[::-1])
		self.episode_reward_statistics = copy.deepcopy(self.cummulative_rewards[0])
		
		print("Rolled out an episode for ",counter," timesteps.")
		print("Achieved reward: ", self.episode_reward_statistics)

		# NOW construct an episode out of this..	
		self.episode = RLUtils.HierarchicalEpisode(self.state_trajectory, self.action_trajectory, self.reward_trajectory, self.terminal_trajectory, self.latent_z_trajectory, self.latent_b_trajectory)