def rollout_latent

def rollout_latent_policy()

in Experiments/PolicyManagers.py [0:0]
39 lines of code
7 McCabe index (conditional complexity)

	def rollout_latent_policy(self, orig_assembled_inputs, orig_subpolicy_inputs):
		assembled_inputs = orig_assembled_inputs.clone().detach()
		subpolicy_inputs = orig_subpolicy_inputs.clone().detach()

		# Set the previous b time to 0.
		delta_t = 0

		# For number of rollout timesteps:
		for t in range(self.rollout_timesteps-1):

			##########################################
			#### CODE FOR NEW Z SELECTION ROLLOUT ####
			##########################################

			# Pick latent_z and latent_b. 
			selected_b, new_selected_z = self.latent_policy.get_actions(assembled_inputs[:(t+1)].view((t+1,-1)), greedy=True, delta_t=delta_t)

			if t==0:
				selected_b = torch.ones_like(selected_b).to(device).float()

			if selected_b[-1]==1:
				# Copy over ALL z's. This is okay to do because we're greedily selecting, and hte latent policy is hence deterministic.
				selected_z = torch.tensor(new_selected_z).to(device).float()

				# If b was == 1, then... reset b to 0.
				delta_t = 0
			else:
				# Increment counter since last time b was 1.
				delta_t += 1

			# Set z's to 0. 
			assembled_inputs[t+1, self.input_size:self.input_size+self.number_policies] = 0.
			# Set z and b in assembled input for the future latent policy passes. 
			if self.args.discrete_z:
				assembled_inputs[t+1, self.input_size+selected_z[-1]] = 1.
			else:
				assembled_inputs[t+1, self.input_size:self.input_size+self.latent_z_dimensionality] = selected_z[-1]
			
			# This was also using wrong dimensions... oops :P 
			assembled_inputs[t+1, self.input_size+self.latent_z_dimensionality]	 = selected_b[-1]

			# Before copying over, set conditional_info from the environment at the current timestep.

			if self.conditional_viz_env:
				self.set_env_conditional_info()

			if self.conditional_info_size>0:
				assembled_inputs[t+1, -self.conditional_info_size:] = torch.tensor(self.conditional_information).to(device).float()

			# Set z's to 0.
			subpolicy_inputs[t, self.input_size:self.input_size+self.number_policies] = 0.

			# Set z and b in subpolicy input for the future subpolicy passes.			
			if self.args.discrete_z:
				subpolicy_inputs[t, self.input_size+selected_z[-1]] = 1.
			else:
				subpolicy_inputs[t, self.input_size:] = selected_z[-1]

			# Now pass subpolicy net forward and get action and next state. 
			action_to_execute, new_state = self.take_rollout_step(subpolicy_inputs[:(t+1)].view((t+1,-1)), t, use_env=self.conditional_viz_env)
			state_action_tuple = torch.cat([new_state, action_to_execute],dim=1)

			# Now update assembled input. 
			assembled_inputs[t+1, :self.input_size] = state_action_tuple
			subpolicy_inputs[t+1, :self.input_size] = state_action_tuple

		self.latent_trajectory_rollout = copy.deepcopy(subpolicy_inputs[:,:self.state_dim].detach().cpu().numpy())

		concatenated_selected_b = np.concatenate([selected_b.detach().cpu().numpy(),np.zeros((1))],axis=-1)

		if self.args.debug:
			print("Embedding in Latent Policy Rollout.")
			embed()

		# Clear these variables from memory.
		del subpolicy_inputs, assembled_inputs

		return concatenated_selected_b