in hucc/envs/ctrlgs.py [0:0]
def step(self, action):
def distance_to_goal():
gs = self.proj(self.goal_featurizer(), self._features)
d = self.goal - gs
for i, f in enumerate(self._features):
if f in self.goal_space['twist_feats']:
# Wrap around projected pi/-pi for distance
d[i] = (
np.remainder(
(self.goal[i] - gs[i]) + self.proj_pi,
2 * self.proj_pi,
)
- self.proj_pi
)
return np.linalg.norm(d, ord=2)
d_prev = distance_to_goal()
next_obs, reward, done, info = super().step(action)
d_new = distance_to_goal()
info['potential'] = d_prev - d_new
info['distance'] = d_new
info['reached_goal'] = info['distance'] < self.precision
if self.reward == 'potential':
reward = info['potential']
elif self.reward == 'potential2':
reward = d_prev - self.gamma * d_new
elif self.reward == 'potential3':
reward = 1.0 if info['reached_goal'] else 0.0
reward += d_prev - self.gamma * d_new
elif self.reward == 'potential4':
reward = (d_prev - d_new) / self._d_initial
elif self.reward == 'distance':
reward = -info['distance']
elif self.reward == 'sparse':
reward = 1.0 if info['reached_goal'] else 0.0
else:
raise ValueError(f'Unknown reward: {self.reward}')
reward -= self.ctrl_cost * np.square(action).sum()
info['EpisodeContinues'] = True
if info['reached_goal'] == True and not self.full_episodes:
done = True
info['time'] = self._step
self._step += 1
if self._step >= self.max_steps:
done = True
elif (
not info['reached_goal'] and self.np_random.random() < self.reset_p
):
info['RandomReset'] = True
done = True
if not self.allow_fallover and self.fell_over():
reward = self.fallover_penalty
done = True
self._do_hard_reset = True
info['reached_goal'] = False
info['fell_over'] = True
if done and (
self._do_hard_reset
or (self._reset_counter % self.hard_reset_interval == 0)
):
del info['EpisodeContinues']
if done:
info['LastStepOfTask'] = True
if done and 'EpisodeContinues' in info and self.implicit_soft_resets:
need_hard_reset = self._do_hard_reset or (
self.hard_reset_interval > 0
and self._reset_counter % self.hard_reset_interval == 0
)
if not need_hard_reset:
# Do implicit resets, let episode continue
next_obs = self.reset()
done = False
del info['EpisodeContinues']
info['SoftReset'] = True
info['features'] = self._features_s
return next_obs, reward, done, info