Experiments/PolicyNetworks.py [487:525]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		latent_b_probabilities = self.batch_softmax_layer(latent_b_preprobabilities).squeeze(1)	
		# Greedily select b. 
		selected_b = self.select_greedy_action(latent_b_probabilities)

		# Predict Gaussian means and variances. 		
		if self.args.mean_nonlinearity:
			mean_outputs = self.activation_layer(self.mean_output_layer(outputs))
		else:
			mean_outputs = self.mean_output_layer(outputs)
		# We should be multiply by self.variance_factor.
		variance_outputs = self.variance_factor*(self.variance_activation_layer(self.variances_output_layer(outputs))+self.variance_activation_bias) + action_epsilon

		noise = torch.randn_like(variance_outputs)

		if greedy: 
			selected_z = mean_outputs
		else:
			# Instead of *sampling* the action from a distribution, construct using mu + sig * eps (random noise).
			selected_z = mean_outputs + variance_outputs * noise

		# If single input and previous_Z is None, this is the first timestep. So set b to 1, and don't do anything to z. 
		if input.shape[0]==1 and previous_z is None:
			selected_b[0] = 1
		# If previous_Z is not None, this is not the first timestep, so don't do anything to z. If b is 0, use previous. 
		elif input.shape[0]==1 and previous_z is not None:
			if selected_b==0:
				selected_z = previous_z
		elif input.shape[0]>1:
			# Now modify z's as per New Z Selection. 
			# Set initial b to 1. 
			selected_b[0] = 1
			# Initial z is already trivially set. 
			for t in range(1,input.shape[0]):
				# If b_t==0, just use previous z. 
				# If b_t==1, sample new z. Here, we've cloned this from sampled_z's, so there's no need to do anything. 
				if selected_b[t]==0:
					selected_z[t] = selected_z[t-1]

		return selected_z, selected_b, hidden
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


Experiments/PolicyNetworks.py [662:701]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		latent_b_probabilities = self.batch_softmax_layer(latent_b_preprobabilities).squeeze(1)	

		# Greedily select b. 
		selected_b = self.select_greedy_action(latent_b_probabilities)

		# Predict Gaussian means and variances. 		
		if self.args.mean_nonlinearity:
			mean_outputs = self.activation_layer(self.mean_output_layer(outputs))
		else:
			mean_outputs = self.mean_output_layer(outputs)
		# We should be multiply by self.variance_factor.
		variance_outputs = self.variance_factor*(self.variance_activation_layer(self.variances_output_layer(outputs))+self.variance_activation_bias) + action_epsilon

		noise = torch.randn_like(variance_outputs)

		if greedy: 
			selected_z = mean_outputs
		else:
			# Instead of *sampling* the action from a distribution, construct using mu + sig * eps (random noise).
			selected_z = mean_outputs + variance_outputs * noise

		# If single input and previous_Z is None, this is the first timestep. So set b to 1, and don't do anything to z. 
		if input.shape[0]==1 and previous_z is None:
			selected_b[0] = 1
		# If previous_Z is not None, this is not the first timestep, so don't do anything to z. If b is 0, use previous. 
		elif input.shape[0]==1 and previous_z is not None:
			if selected_b==0:
				selected_z = previous_z
		elif input.shape[0]>1:
			# Now modify z's as per New Z Selection. 
			# Set initial b to 1. 
			selected_b[0] = 1
			# Initial z is already trivially set. 
			for t in range(1,input.shape[0]):
				# If b_t==0, just use previous z. 
				# If b_t==1, sample new z. Here, we've cloned this from sampled_z's, so there's no need to do anything. 
				if selected_b[t]==0:
					selected_z[t] = selected_z[t-1]

		return selected_z, selected_b, hidden
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -