in env_humanoid_base.py [0:0]
def step(self, action):
self.callback_step_prev()
''' Collect data for reward computation before the current step'''
rew_data_prev = [self.reward_data(i) for i in range(self._num_agent)]
assert len(action) == self._num_agent
for i in range(self._num_agent):
if isinstance(action[i], Pose):
self._target_pose[i] = action[i]
elif isinstance(action[i], np.ndarray):
self._target_pose[i] = self.compute_target_pose(i, action[i])
else:
print(type(action[i]))
raise NotImplementedError
for i in range(self._num_agent):
self._com_vel[i].append(self._sim_agent[i].get_com_and_com_vel()[1])
''' Update simulation '''
self._base_env.step(self._target_pose)
self.callback_step_after()
''' Collect data for reward computation after the current step'''
rew_data_next = [self.reward_data(i) for i in range(self._num_agent)]
'''
Check conditions for end-of-episode.
If 'eoe_margin' is larger than zero, the environment will continue for some time.
'''
if not self._end_of_episode_intermediate:
eoe_reason = []
for i in range(self._num_agent):
eoe_reason += self.inspect_end_of_episode_per_agent(i)
if Env.EarlyTermChoice.TaskEnd in self._early_term_choices:
eoe_reason += self.inspect_end_of_episode_task()
self._end_of_episode_intermediate = len(eoe_reason) > 0
self._end_of_episode_reason_intermediate = eoe_reason
if self._end_of_episode_intermediate:
self._time_elapsed_after_end_of_episode += self._dt_con
if self._time_elapsed_after_end_of_episode >= self._eoe_margin:
self._end_of_episode = True
self._end_of_episode_reason = self._end_of_episode_reason_intermediate
''' Compute rewards '''
rews, infos = [], []
for i in range(self._num_agent):
r, rd = self.reward(i, rew_data_prev, rew_data_prev, action)
rews.append(r)
info = {
'eoe_reason': self._end_of_episode_reason,
'rew_info': rd,
'learning_info': self._learning_info
}
infos.append(info)
if Env.EarlyTermChoice.LowReward in self._early_term_choices:
self._rew_queue[i].append(r)
self.print_log_in_step()
return rews, infos