in gym_pusht/envs/pusht.py [0:0]
def step(self, action):
self.n_contact_points = 0
n_steps = int(1 / (self.dt * self.control_hz))
self._last_action = action
for _ in range(n_steps):
# Step PD control
# self.agent.velocity = self.k_p * (act - self.agent.position) # P control works too.
acceleration = self.k_p * (action - self.agent.position) + self.k_v * (
Vec2d(0, 0) - self.agent.velocity
)
self.agent.velocity += acceleration * self.dt
# Step physics
self.space.step(self.dt)
# Compute reward
coverage = self._get_coverage()
reward = np.clip(coverage / self.success_threshold, 0.0, 1.0)
terminated = is_success = coverage > self.success_threshold
observation = self.get_obs()
info = self._get_info()
info["is_success"] = is_success
info["coverage"] = coverage
truncated = False
return observation, reward, terminated, truncated, info