in Experiments/PolicyManagers.py [0:0]
def collect_inputs(self, i, get_latents=False):
if self.args.data=='DeterGoal':
sample_traj, sample_action_seq = self.dataset[i]
latent_b_seq, latent_z_seq = self.dataset.get_latent_variables(i)
start = 0
if self.args.traj_length>0:
sample_action_seq = sample_action_seq[start:self.args.traj_length-1]
latent_b_seq = latent_b_seq[start:self.args.traj_length-1]
latent_z_seq = latent_z_seq[start:self.args.traj_length-1]
sample_traj = sample_traj[start:self.args.traj_length]
else:
# Traj length is going to be -1 here.
# Don't need to modify action sequence because it does have to be one step less than traj_length anyway.
sample_action_seq = sample_action_seq[start:]
sample_traj = sample_traj[start:]
latent_b_seq = latent_b_seq[start:]
latent_z_seq = latent_z_seq[start:]
# The trajectory is going to be one step longer than the action sequence, because action sequences are constructed from state differences. Instead, truncate trajectory to length of action sequence.
# Now manage concatenated trajectory differently - {{s0,_},{s1,a0},{s2,a1},...,{sn,an-1}}.
concatenated_traj = self.concat_state_action(sample_traj, sample_action_seq)
old_concatenated_traj = self.old_concat_state_action(sample_traj, sample_action_seq)
if self.args.data=='DeterGoal':
self.conditional_information = np.zeros((self.args.condition_size))
self.conditional_information[self.dataset.get_goal(i)] = 1
self.conditional_information[4:] = self.dataset.get_goal_position[i]
else:
self.conditional_information = np.zeros((self.args.condition_size))
if get_latents:
return sample_traj, sample_action_seq, concatenated_traj, old_concatenated_traj, latent_b_seq, latent_z_seq
else:
return sample_traj, sample_action_seq, concatenated_traj, old_concatenated_traj
elif self.args.data=='MIME' or self.args.data=='Roboturk' or self.args.data=='OrigRoboturk' or self.args.data=='FullRoboturk' or self.args.data=='Mocap':
# If we're imitating... select demonstrations from the particular task.
if self.args.setting=='imitation' and self.args.data=='Roboturk':
data_element = self.dataset.get_task_demo(self.demo_task_index, i)
else:
data_element = self.dataset[i]
if not(data_element['is_valid']):
return None, None, None, None
trajectory = data_element['demo']
# If normalization is set to some value.
if self.args.normalization=='meanvar' or self.args.normalization=='minmax':
trajectory = (trajectory-self.norm_sub_value)/self.norm_denom_value
action_sequence = np.diff(trajectory,axis=0)
self.current_traj_len = len(trajectory)
if self.args.data=='MIME':
self.conditional_information = np.zeros((self.conditional_info_size))
elif self.args.data=='Roboturk' or self.args.data=='OrigRoboturk' or self.args.data=='FullRoboturk':
robot_states = data_element['robot-state']
object_states = data_element['object-state']
self.conditional_information = np.zeros((self.conditional_info_size))
# Don't set this if pretraining / baseline.
if self.args.setting=='learntsub' or self.args.setting=='imitation':
self.conditional_information = np.zeros((len(trajectory),self.conditional_info_size))
self.conditional_information[:,:self.cond_robot_state_size] = robot_states
# Doing this instead of self.cond_robot_state_size: because the object_states size varies across demonstrations.
self.conditional_information[:,self.cond_robot_state_size:self.cond_robot_state_size+object_states.shape[-1]] = object_states
# Setting task ID too.
self.conditional_information[:,-self.number_tasks+data_element['task-id']] = 1.
# Concatenate
concatenated_traj = self.concat_state_action(trajectory, action_sequence)
old_concatenated_traj = self.old_concat_state_action(trajectory, action_sequence)
if self.args.setting=='imitation':
action_sequence = RLUtils.resample(data_element['demonstrated_actions'],len(trajectory))
concatenated_traj = np.concatenate([trajectory, action_sequence],axis=1)
return trajectory, action_sequence, concatenated_traj, old_concatenated_traj