in Experiments/PolicyManagers.py [0:0]
def __init__(self, number_policies=4, dataset=None, args=None):
super(PolicyManager_Joint, self).__init__()
self.args = args
self.data = self.args.data
self.number_policies = number_policies
self.latent_z_dimensionality = self.args.z_dimensions
self.dataset = dataset
# Global input size: trajectory at every step - x,y,action
# Inputs is now states and actions.
# Model size parameters
self.state_size = 2
self.state_dim = 2
self.input_size = 2*self.state_size
self.hidden_size = self.args.hidden_size
# Number of actions
self.output_size = 2
self.number_layers = self.args.number_layers
self.traj_length = 5
self.conditional_info_size = 6
if self.args.data=='MIME':
self.state_size = 16
self.state_dim = 16
self.input_size = 2*self.state_size
self.output_size = self.state_size
self.traj_length = self.args.traj_length
# Create Baxter visualizer for MIME data
# self.visualizer = BaxterVisualizer.MujocoVisualizer()
self.visualizer = BaxterVisualizer()
if self.args.normalization=='meanvar':
self.norm_sub_value = np.load("Statistics/MIME_Means.npy")
self.norm_denom_value = np.load("Statistics/MIME_Var.npy")
elif self.args.normalization=='minmax':
self.norm_sub_value = np.load("Statistics/MIME_Min.npy")
self.norm_denom_value = np.load("Statistics/MIME_Max.npy") - np.load("Statistics/MIME_Min.npy")
# Max of robot_state + object_state sizes across all Baxter environments.
self.cond_robot_state_size = 60
self.cond_object_state_size = 25
self.conditional_info_size = self.cond_robot_state_size+self.cond_object_state_size
self.conditional_viz_env = False
elif self.args.data=='Roboturk' or self.args.data=='OrigRoboturk' or self.args.data=='FullRoboturk':
self.state_size = 8
self.state_dim = 8
self.input_size = 2*self.state_size
self.output_size = self.state_size
self.traj_length = self.args.traj_length
self.visualizer = SawyerVisualizer()
# Max of robot_state + object_state sizes across all sawyer environments.
# Robot size always 30. Max object state size is... 23.
self.cond_robot_state_size = 30
self.cond_object_state_size = 23
self.number_tasks = 8
self.conditional_info_size = self.cond_robot_state_size+self.cond_object_state_size+self.number_tasks
self.conditional_viz_env = True
elif self.args.data=='Mocap':
self.state_size = 22*3
self.state_dim = 22*3
self.input_size = 2*self.state_size
self.hidden_size = self.args.hidden_size
self.output_size = self.state_size
self.traj_length = self.args.traj_length
self.conditional_info_size = 0
self.conditional_information = None
self.conditional_viz_env = False
# Create visualizer object
self.visualizer = MocapVisualizer(args=self.args)
self.training_phase_size = self.args.training_phase_size
self.number_epochs = self.args.epochs
self.test_set_size = 500
self.baseline_value = 0.
self.beta_decay = 0.9
self. learning_rate = self.args.learning_rate
self.latent_b_loss_weight = self.args.lat_b_wt
self.latent_z_loss_weight = self.args.lat_z_wt
self.initial_epsilon = self.args.epsilon_from
self.final_epsilon = self.args.epsilon_to
self.decay_epochs = self.args.epsilon_over
self.decay_counter = self.decay_epochs*len(self.dataset)
# Log-likelihood penalty.
self.lambda_likelihood_penalty = self.args.likelihood_penalty
self.baseline = None
# Per step decay.
self.decay_rate = (self.initial_epsilon-self.final_epsilon)/(self.decay_counter)