Experiments/PolicyNetworks.py [891:934]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		if epsilon==0.:
			sampled_z_index = mean_outputs.squeeze(1)
		else:

			# Whether to use reparametrization trick to retrieve the latent_z's.
			if self.args.reparam:

				if self.args.train:
					noise = torch.randn_like(variance_outputs)

					# Instead of *sampling* the latent z from a distribution, construct using mu + sig * eps (random noise).
					sampled_z_index = mean_outputs + variance_outputs*noise
					# Ought to be able to pass gradients through this latent_z now.

					sampled_z_index = sampled_z_index.squeeze(1)

				# If evaluating, greedily get action.
				else:
					sampled_z_index = mean_outputs.squeeze(1)
			else:
				sampled_z_index = self.dists.sample().squeeze(1)
		
		if new_z_selection:
			# Set initial b to 1. 
			sampled_b[0] = 1

			# Initial z is already trivially set. 
			for t in range(1,input.shape[0]):
				# If b_t==0, just use previous z. 
				# If b_t==1, sample new z. Here, we've cloned this from sampled_z's, so there's no need to do anything. 
				if sampled_b[t]==0:
					sampled_z_index[t] = sampled_z_index[t-1]		

		# Also compute logprobabilities of the latent_z's sampled from this net. 
		variational_z_logprobabilities = self.dists.log_prob(sampled_z_index.unsqueeze(1))
		variational_z_probabilities = None

		# Set standard distribution for KL. 
		standard_distribution = torch.distributions.MultivariateNormal(torch.zeros((self.output_size)).to(device),torch.eye((self.output_size)).to(device))
		# Compute KL.
		kl_divergence = torch.distributions.kl_divergence(self.dists, standard_distribution)

		# Prior loglikelihood
		prior_loglikelihood = standard_distribution.log_prob(sampled_z_index)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


Experiments/PolicyNetworks.py [1071:1114]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		if epsilon==0.:
			sampled_z_index = mean_outputs.squeeze(1)
		else:

			# Whether to use reparametrization trick to retrieve the latent_z's.
			if self.args.reparam:

				if self.args.train:
					noise = torch.randn_like(variance_outputs)

					# Instead of *sampling* the latent z from a distribution, construct using mu + sig * eps (random noise).
					sampled_z_index = mean_outputs + variance_outputs*noise
					# Ought to be able to pass gradients through this latent_z now.

					sampled_z_index = sampled_z_index.squeeze(1)

				# If evaluating, greedily get action.
				else:
					sampled_z_index = mean_outputs.squeeze(1)
			else:
				sampled_z_index = self.dists.sample().squeeze(1)
		
		if new_z_selection:
			# Set initial b to 1. 
			sampled_b[0] = 1

			# Initial z is already trivially set. 
			for t in range(1,input.shape[0]):
				# If b_t==0, just use previous z. 
				# If b_t==1, sample new z. Here, we've cloned this from sampled_z's, so there's no need to do anything. 
				if sampled_b[t]==0:
					sampled_z_index[t] = sampled_z_index[t-1]		

		# Also compute logprobabilities of the latent_z's sampled from this net. 
		variational_z_logprobabilities = self.dists.log_prob(sampled_z_index.unsqueeze(1))
		variational_z_probabilities = None

		# Set standard distribution for KL. 
		standard_distribution = torch.distributions.MultivariateNormal(torch.zeros((self.output_size)).to(device),torch.eye((self.output_size)).to(device))
		# Compute KL.
		kl_divergence = torch.distributions.kl_divergence(self.dists, standard_distribution)

		# Prior loglikelihood
		prior_loglikelihood = standard_distribution.log_prob(sampled_z_index)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -