in gym_hil/envs/panda_arrange_boxes_gym_env.py [0:0]
def step(self, action: np.ndarray) -> Tuple[Dict[str, np.ndarray], float, bool, bool, Dict[str, Any]]:
"""Take a step in the environment."""
# Apply the action to the robot
self.apply_action(action)
# Compute observation, reward and termination
obs = self._compute_observation()
rew = self._compute_reward()
success = self._is_success()
if self.reward_type == "sparse":
success = rew == 1.0
# Check if block is outside bounds
block_pos = self._data.sensor("block1_pos").data
exceeded_bounds = np.any(block_pos[:2] < (_SAMPLING_BOUNDS[0] - self.block_range - 0.05)) or np.any(
block_pos[:2] > (_SAMPLING_BOUNDS[1] + self.block_range + 0.05)
)
terminated = bool(success or exceeded_bounds)
return obs, rew, terminated, False, {"succeed": success}