in Experiments/PolicyNetworks.py [0:0]
def __init__(self, input_size, hidden_size, z_dimensions, args, number_layers=4):
# Ensures inheriting from torch.nn.Module goes nicely and cleanly.
# super().__init__()
super(ContinuousVariationalPolicyNetwork, self).__init__()
self.args = args
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = z_dimensions
self.num_layers = number_layers
self.z_exploration_bias = self.args.z_exploration_bias
self.b_exploration_bias = self.args.b_exploration_bias
self.z_probability_factor = self.args.z_probability_factor
self.b_probability_factor = self.args.b_probability_factor
self.batch_size = self.args.batch_size
# Define a bidirectional LSTM now.
self.lstm = torch.nn.LSTM(input_size=self.input_size,hidden_size=self.hidden_size,num_layers=self.num_layers, bidirectional=True)
# Transform to output space - Latent z and Latent b.
# THIS OUTPUT LAYER TAKES 2*HIDDEN SIZE as input because it's bidirectional.
self.termination_output_layer = torch.nn.Linear(2*self.hidden_size,2)
# Softmax activation functions for Bernoulli termination probability and latent z selection .
self.batch_softmax_layer = torch.nn.Softmax(dim=-1)
self.batch_logsoftmax_layer = torch.nn.LogSoftmax(dim=-1)
# Define output layers for the LSTM, and activations for this output layer.
self.mean_output_layer = torch.nn.Linear(2*self.hidden_size,self.output_size)
self.variances_output_layer = torch.nn.Linear(2*self.hidden_size, self.output_size)
self.activation_layer = torch.nn.Tanh()
self.variance_activation_layer = torch.nn.Softplus()
self.variance_activation_bias = 0.
self.variance_factor = 0.01