in gym_hil/envs/panda_arrange_boxes_gym_env.py [0:0]
def _compute_reward(self) -> float:
"""Compute the current reward based on block-target distances."""
block_sensors, target_sensors = self._get_sensors()
distances = [
np.linalg.norm(block.data - target.data)
for block, target in zip(block_sensors, target_sensors, strict=False)
]
if self.reward_type == "dense":
return sum(np.exp(-20 * d) for d in distances)
else:
return float(all(d < 0.03 for d in distances))