in Experiments/PolicyManagers.py [0:0]
def __init__(self, number_policies=4, dataset=None, args=None):
if args.setting=='imitation':
super(PolicyManager_Pretrain, self).__init__(number_policies=number_policies, dataset=dataset, args=args)
else:
super(PolicyManager_Pretrain, self).__init__()
self.args = args
self.data = self.args.data
# Not used if discrete_z is false.
self.number_policies = number_policies
self.dataset = dataset
# Global input size: trajectory at every step - x,y,action
# Inputs is now states and actions.
# Model size parameters
# if self.args.data=='Continuous' or self.args.data=='ContinuousDir' or self.args.data=='ContinuousNonZero' or self.args.data=='ContinuousDirNZ' or self.args.data=='GoalDirected' or self.args.data=='Separable':
self.state_size = 2
self.state_dim = 2
self.input_size = 2*self.state_size
self.hidden_size = self.args.hidden_size
# Number of actions
self.output_size = 2
self.latent_z_dimensionality = self.args.z_dimensions
self.number_layers = self.args.number_layers
self.traj_length = 5
self.number_epochs = self.args.epochs
self.test_set_size = 500
if self.args.data=='MIME':
self.state_size = 16
self.state_dim = 16
self.input_size = 2*self.state_size
self.hidden_size = self.args.hidden_size
self.output_size = self.state_size
self.latent_z_dimensionality = self.args.z_dimensions
self.number_layers = self.args.number_layers
self.traj_length = self.args.traj_length
self.number_epochs = self.args.epochs
if self.args.normalization=='meanvar':
self.norm_sub_value = np.load("Statistics/MIME_Means.npy")
self.norm_denom_value = np.load("Statistics/MIME_Var.npy")
elif self.args.normalization=='minmax':
self.norm_sub_value = np.load("Statistics/MIME_Min.npy")
self.norm_denom_value = np.load("Statistics/MIME_Max.npy") - self.norm_sub_value
# Max of robot_state + object_state sizes across all Baxter environments.
self.cond_robot_state_size = 60
self.cond_object_state_size = 25
self.conditional_info_size = self.cond_robot_state_size+self.cond_object_state_size
elif self.args.data=='Roboturk' or self.args.data=='OrigRoboturk' or self.args.data=='FullRoboturk':
if self.args.gripper:
self.state_size = 8
self.state_dim = 8
else:
self.state_size = 7
self.state_dim = 7
self.input_size = 2*self.state_size
self.hidden_size = self.args.hidden_size
self.output_size = self.state_size
self.number_layers = self.args.number_layers
self.traj_length = self.args.traj_length
if self.args.normalization=='meanvar':
self.norm_sub_value = np.load("Statistics/Roboturk_Mean.npy")
self.norm_denom_value = np.load("Statistics/Roboturk_Var.npy")
elif self.args.normalization=='minmax':
self.norm_sub_value = np.load("Statistics/Roboturk_Min.npy")
self.norm_denom_value = np.load("Statistics/Roboturk_Max.npy") - self.norm_sub_value
# Max of robot_state + object_state sizes across all sawyer environments.
# Robot size always 30. Max object state size is... 23.
self.cond_robot_state_size = 30
self.cond_object_state_size = 23
self.conditional_info_size = self.cond_robot_state_size+self.cond_object_state_size
elif self.args.data=='Mocap':
self.state_size = 22*3
self.state_dim = 22*3
self.input_size = 2*self.state_size
self.hidden_size = self.args.hidden_size
self.output_size = self.state_size
self.traj_length = self.args.traj_length
self.conditional_info_size = 0
# Training parameters.
self.baseline_value = 0.
self.beta_decay = 0.9
self. learning_rate = self.args.learning_rate
self.initial_epsilon = self.args.epsilon_from
self.final_epsilon = self.args.epsilon_to
self.decay_epochs = self.args.epsilon_over
self.decay_counter = self.decay_epochs*len(self.dataset)
# Log-likelihood penalty.
self.lambda_likelihood_penalty = self.args.likelihood_penalty
self.baseline = None
# Per step decay.
self.decay_rate = (self.initial_epsilon-self.final_epsilon)/(self.decay_counter)