def step()

in env_humanoid_base.py [0:0]


    def step(self, action):

        self.callback_step_prev()

        ''' Collect data for reward computation before the current step'''
        rew_data_prev = [self.reward_data(i) for i in range(self._num_agent)]

        assert len(action) == self._num_agent
        
        for i in range(self._num_agent):
            if isinstance(action[i], Pose):
                self._target_pose[i] = action[i]
            elif isinstance(action[i], np.ndarray):
                self._target_pose[i] = self.compute_target_pose(i, action[i])
            else:
                print(type(action[i]))
                raise NotImplementedError
        
        for i in range(self._num_agent):
            self._com_vel[i].append(self._sim_agent[i].get_com_and_com_vel()[1])
        
        ''' Update simulation '''
        self._base_env.step(self._target_pose)

        self.callback_step_after()

        ''' Collect data for reward computation after the current step'''
        rew_data_next = [self.reward_data(i) for i in range(self._num_agent)]

        ''' 
        Check conditions for end-of-episode. 
        If 'eoe_margin' is larger than zero, the environment will continue for some time.
        '''
        
        if not self._end_of_episode_intermediate:
            eoe_reason = []
            for i in range(self._num_agent):
                eoe_reason += self.inspect_end_of_episode_per_agent(i)
            if Env.EarlyTermChoice.TaskEnd in self._early_term_choices:
                eoe_reason += self.inspect_end_of_episode_task()

            self._end_of_episode_intermediate = len(eoe_reason) > 0
            self._end_of_episode_reason_intermediate = eoe_reason

        if self._end_of_episode_intermediate:
            self._time_elapsed_after_end_of_episode += self._dt_con
            if self._time_elapsed_after_end_of_episode >= self._eoe_margin:
                self._end_of_episode = True
                self._end_of_episode_reason = self._end_of_episode_reason_intermediate

        ''' Compute rewards '''
        
        rews, infos = [], []
        for i in range(self._num_agent):
            r, rd = self.reward(i, rew_data_prev, rew_data_prev, action)
            rews.append(r)
            info = {
                'eoe_reason': self._end_of_episode_reason,
                'rew_info': rd,
                'learning_info': self._learning_info
            }
            infos.append(info)
            if Env.EarlyTermChoice.LowReward in self._early_term_choices:
                self._rew_queue[i].append(r)

        self.print_log_in_step()
        
        return rews, infos