in gym_hil/envs/panda_pick_gym_env.py [0:0]
def _compute_reward(self) -> float:
"""Compute reward based on current state."""
block_pos = self._data.sensor("block_pos").data
if self.reward_type == "dense":
tcp_pos = self._data.sensor("2f85/pinch_pos").data
dist = np.linalg.norm(block_pos - tcp_pos)
r_close = np.exp(-20 * dist)
r_lift = (block_pos[2] - self._z_init) / (self._z_success - self._z_init)
r_lift = np.clip(r_lift, 0.0, 1.0)
return 0.3 * r_close + 0.7 * r_lift
else:
lift = block_pos[2] - self._z_init
return float(lift > 0.1)