in env_humanoid_base.py [0:0]
def __init__(self, config):
project_dir = config['project_dir']
char_info_module = config['character'].get('char_info_module')
sim_char_file = config['character'].get('sim_char_file')
base_motion_file = config['character'].get('base_motion_file')
ref_motion_scale = config['character'].get('ref_motion_scale')
environment_file = config['character'].get('environment_file')
ref_motion_file = config['character'].get('ref_motion_file')
self_collision = config['character'].get('self_collision')
actuation = config['character'].get('actuation')
''' Append project_dir to the given file path '''
if project_dir:
for i in range(len(char_info_module)):
char_info_module[i] = os.path.join(project_dir, char_info_module[i])
sim_char_file[i] = os.path.join(project_dir, sim_char_file[i])
base_motion_file[i] = os.path.join(project_dir, base_motion_file[i])
if environment_file is not None:
for i in range(len(environment_file)):
environment_file[i] = os.path.join(project_dir, environment_file[i])
''' Create a base tracking environment '''
self._base_env = env_humanoid_tracking.Env(
fps_sim=config['fps_sim'],
fps_act=config['fps_con'],
verbose=config['verbose'],
char_info_module=char_info_module,
sim_char_file=sim_char_file,
ref_motion_scale=ref_motion_scale,
self_collision=self_collision,
contactable_body=config['early_term'].get('falldown_contactable_body'),
actuation=actuation,
)
self._pb_client = self._base_env._pb_client
self._dt_con = 1.0/config['fps_con']
''' Copy some of frequently used attributes from the base environemnt '''
self._num_agent = self._base_env._num_agent
assert self._num_agent == len(base_motion_file)
self._sim_agent = [self._base_env._agent[i] for i in range(self._num_agent)]
self._v_up = self._base_env._v_up
''' State '''
self._state_choices = [Env.StateChoice.from_string(s) for s in config['state']['choices']]
''' Early Terminations '''
self._early_term_choices = [Env.EarlyTermChoice.from_string(s) for s in config['early_term']['choices']]
self._reward_fn_def = config['reward']['fn_def']
self._reward_fn_map = config['reward']['fn_map']
self._reward_names = [self.get_reward_names(
self._reward_fn_def[self._reward_fn_map[i]]) for i in range(self._num_agent)]
'''
Check the existence of reward definitions, which are defined in our reward map
'''
assert len(self._reward_fn_map) == self._num_agent
for key in self._reward_fn_map:
assert key in self._reward_fn_def.keys()
self._verbose = config['verbose']
if Env.EarlyTermChoice.LowReward in self._early_term_choices:
self._et_low_reward_thres = config['early_term']['low_reward_thres']
self._rew_queue = self._num_agent * [None]
for i in range(self._num_agent):
self._rew_queue[i] = deque(maxlen=int(1.0/self._dt_con))
''' The environment automatically terminates after 'sim_window' seconds '''
if Env.EarlyTermChoice.SimWindow in self._early_term_choices:
self._sim_window_time = config['early_term']['sim_window_time']
'''
The environment continues for "eoe_margin" seconds after end-of-episode is set by TRUE.
This is useful for making the controller work for boundaries of reference motions
'''
self._eoe_margin = config['early_term']['eoe_margin']
self._action_type = Env.ActionMode.from_string(config['action']['type'])
''' Base motion defines the initial posture (like t-pose) '''
self._base_motion = []
for i in range(self._num_agent):
m = bvh.load(file=base_motion_file[i],
motion=MotionWithVelocity(),
scale=1.0,
load_skel=True,
load_motion=True,
v_up_skel=self._sim_agent[i]._char_info.v_up,
v_face_skel=self._sim_agent[i]._char_info.v_face,
v_up_env=self._sim_agent[i]._char_info.v_up_env)
m = MotionWithVelocity.from_motion(m)
self._base_motion.append(m)
''' Create Kinematic Agents '''
self._kin_agent = []
for i in range(self._num_agent):
self._kin_agent.append(
sim_agent.SimAgent(pybullet_client=self._base_env._pb_client,
model_file=sim_char_file[i],
char_info=self._sim_agent[i]._char_info,
ref_scale=ref_motion_scale[i],
self_collision=self_collision[i],
kinematic_only=True,
verbose=config['verbose']))
'''
Define the action space of this environment.
Here I used a 'normalizer' where 'real' values correspond to joint angles,
and 'norm' values correspond to the output value of NN policy.
The reason why it is used is that NN policy somtimes could output values that
are within much larger or narrow range than we need for the environment.
For example, if we apply tanh activation function at the last layer of NN,
the output are always within (-1, 1), but we need bigger values for joint angles
because 1 corresponds only to 57.3 degree.
'''
self._action_space = []
for i in range(self._num_agent):
dim = self._sim_agent[i].get_num_dofs()
normalizer = math.Normalizer(
real_val_max=config['action']['range_max']*np.ones(dim),
real_val_min=config['action']['range_min']*np.ones(dim),
norm_val_max=config['action']['range_max_pol']*np.ones(dim),
norm_val_min=config['action']['range_min_pol']*np.ones(dim),
apply_clamp=True)
self._action_space.append(normalizer)
self._com_vel = self._num_agent * [None]
for i in range(self._num_agent):
self._com_vel[i] = deque(maxlen=int(1.0/self._dt_con))
'''
Any necessary information needed for training this environment.
This can be set by calling "set_learning_info".
'''
self._learning_info = {}
self.add_noise = config['add_noise']