in robot_ws/src/rl_agent/markov/environments/meiro_runner_env.py [0:0]
def infer_reward_state(self,action):
#Wait until we have an image from the LIDAR.
while not self.ranges:
time.sleep(SLEEP_WAITING_FOR_IMAGE_TIME_IN_SECOND)
steering = float(action[0])
throttle = float(action[1])
#Fit data size to training image
size = len(self.ranges)
x = np.linspace(0, size-1, TRAINING_IMAGE_SIZE)
xp = np.arange(size)
state = np.clip(np.interp(x, xp, self.ranges), 0, LIDAR_SCAN_MAX_DISTANCE)
state[np.isnan(state)] = LIDAR_SCAN_MAX_DISTANCE
#Find min distance
min_distance = np.amin(state)
reward = 0
if self.last_position_x >= FINISH_LINE:
print("Congratulations! You passed the finish line!")
if self.steps == 0:
reward = 0.0
done = False
else:
reward = FINISHED / self.steps
done = True
elif min_distance < CRASH_DISTANCE:
# Robot likely hit the wall
reward = CRASHED
done = True
else:
# When robot is close to the wall, give score based by how far from the wall.
# doubled the score when robot is leaving from the wall.
if min_distance < 0.19:
if min_distance < 0.15:
reward = 0.05
elif min_distance < 0.17:
reward = 0.15
else:
reward = 0.35
if min_distance - self.last_min_distance - 0.01 > 0:
reward *= 2
done = False
else:
# While the robot is away enough from the wall, give throttle value as score.
# (Max throttle 0.1 => 1.0 for score)
reward = throttle * 10.0
done = False
# leave footstep marker to the place robot has passed through.
footstep_marker = self.calc_footsteps_mark_position(self.x, self.y)
if not self.last_footsteps_mark_position == footstep_marker:
# if the robot had been already walked through that area more than twice, treat it as crashing.
if self.footsteps_marker[footstep_marker[0]][footstep_marker[1]] > 1:
reward = CRASHED
done = True
# if the robot had been already walked through that area, reduce the reward.
elif self.footsteps_marker[footstep_marker[0]][footstep_marker[1]] > 0:
reward = reward * 0.01
self.footsteps_marker[footstep_marker[0]][footstep_marker[1]] += 1
self.last_footsteps_mark_position = footstep_marker
self.reward_in_episode += reward
print('Step No=%.2f' % self.steps,
'Reward=%.2f' % reward,
'Distance from finish line=%f' % abs(FINISH_LINE - self.x))
self.reward = reward
self.done = done
self.next_state = state
self.last_min_distance = min_distance
self.last_position_x = self.x
self.last_position_y = self.y