in ml3/envs/mountain_car.py [0:0]
def step(self, action):
position = self.cur_pos
velocity = self.cur_vel
velocity = velocity + (
self.g * self.m * np.cos(3 * position) + (action / self.m) - (self.k * velocity)) * self.delta_t
position = position + (velocity * self.delta_t)
if (velocity > self.max_speed): velocity = self.max_speed
if (velocity < -self.max_speed): velocity = -self.max_speed
if (position > self.max_position): position = self.max_position
if (position < self.min_position): position = self.min_position
if (position == self.min_position and velocity < 0): velocity = 0
new_state = np.array([position, velocity])
self.cur_pos = position
self.cur_vel = velocity
reward = 0
if new_state[0] >= 0.5:
reward = 100
return np.array([self.cur_pos, self.cur_vel]), reward