def _compute_reward()

in gym_hil/envs/panda_arrange_boxes_gym_env.py [0:0]


    def _compute_reward(self) -> float:
        """Compute the current reward based on block-target distances."""
        block_sensors, target_sensors = self._get_sensors()
        distances = [
            np.linalg.norm(block.data - target.data)
            for block, target in zip(block_sensors, target_sensors, strict=False)
        ]

        if self.reward_type == "dense":
            return sum(np.exp(-20 * d) for d in distances)
        else:
            return float(all(d < 0.03 for d in distances))