def step()

in hucc/envs/ctrlgs.py [0:0]


    def step(self, action):
        def distance_to_goal():
            gs = self.proj(self.goal_featurizer(), self._features)
            d = self.goal - gs
            for i, f in enumerate(self._features):
                if f in self.goal_space['twist_feats']:
                    # Wrap around projected pi/-pi for distance
                    d[i] = (
                        np.remainder(
                            (self.goal[i] - gs[i]) + self.proj_pi,
                            2 * self.proj_pi,
                        )
                        - self.proj_pi
                    )
            return np.linalg.norm(d, ord=2)

        d_prev = distance_to_goal()
        next_obs, reward, done, info = super().step(action)
        d_new = distance_to_goal()

        info['potential'] = d_prev - d_new
        info['distance'] = d_new
        info['reached_goal'] = info['distance'] < self.precision
        if self.reward == 'potential':
            reward = info['potential']
        elif self.reward == 'potential2':
            reward = d_prev - self.gamma * d_new
        elif self.reward == 'potential3':
            reward = 1.0 if info['reached_goal'] else 0.0
            reward += d_prev - self.gamma * d_new
        elif self.reward == 'potential4':
            reward = (d_prev - d_new) / self._d_initial
        elif self.reward == 'distance':
            reward = -info['distance']
        elif self.reward == 'sparse':
            reward = 1.0 if info['reached_goal'] else 0.0
        else:
            raise ValueError(f'Unknown reward: {self.reward}')
        reward -= self.ctrl_cost * np.square(action).sum()

        info['EpisodeContinues'] = True
        if info['reached_goal'] == True and not self.full_episodes:
            done = True
        info['time'] = self._step
        self._step += 1
        if self._step >= self.max_steps:
            done = True
        elif (
            not info['reached_goal'] and self.np_random.random() < self.reset_p
        ):
            info['RandomReset'] = True
            done = True

        if not self.allow_fallover and self.fell_over():
            reward = self.fallover_penalty
            done = True
            self._do_hard_reset = True
            info['reached_goal'] = False
            info['fell_over'] = True
        if done and (
            self._do_hard_reset
            or (self._reset_counter % self.hard_reset_interval == 0)
        ):
            del info['EpisodeContinues']
        if done:
            info['LastStepOfTask'] = True

        if done and 'EpisodeContinues' in info and self.implicit_soft_resets:
            need_hard_reset = self._do_hard_reset or (
                self.hard_reset_interval > 0
                and self._reset_counter % self.hard_reset_interval == 0
            )
            if not need_hard_reset:
                # Do implicit resets, let episode continue
                next_obs = self.reset()
                done = False
                del info['EpisodeContinues']
                info['SoftReset'] = True

        info['features'] = self._features_s
        return next_obs, reward, done, info