in simulation_ws/src/rl-agent/markov/environments/training_env.py [0:0]
def reward_function(self):
'''
:return: reward as float
done as boolean
'''
reward = 0
base_reward = 2
multiplier = 0
done = False
if self.steps > 0:
# Check for episode ending events first
# ###########################################
# Has LIDAR registered a hit
if self.collision_threshold <= CRASH_DISTANCE:
print("Rover has sustained sideswipe damage")
return 0, True # No reward
# Have the gravity sensors registered too much G-force
if self.collision:
print("Rover has collided with an object")
return 0, True # No reward
# Has the rover reached the max steps
if self.power_supply_range < 1:
print("Rover's power supply has been drained (MAX Steps reached")
return 0, True # No reward
# Has the Rover reached the Checkpoint
if self.last_position_x >= CHECKPOINT_X and self.last_position_y >= CHECKPOINT_Y:
print("Congratulations! The rover has reached the checkpoint!")
multiplier = FINISHED_REWARD
reward = (base_reward * multiplier) / self.steps # <-- incentivize to reach checkpoint in fewest steps
return reward, True
# If it has not reached the check point is it still on the map?
if self.x < (STAGE_X_MIN - .45) or self.x > (STAGE_X_MAX + .45):
print("Rover has left the mission map!")
return 0, True
if self.y < (STAGE_Y_MIN - .45) or self.y > (STAGE_Y_MAX + .45):
print("Rover has left the mission map!")
return 0, True
# No Episode ending events - continue to calculate reward
if self.last_position_x >= MIDPOINT_X and self.last_position_y >= MIDPOINT_Y: # Rover is past the midpoint
# Determine if Rover already received one time reward for reaching the midpoint
if not self.reached_midpoint:
self.reached_midpoint = True
print("Congratulations! The rover has reached the midpoint!")
multiplier = MIDPOINT_REWARD
reward = (base_reward * multiplier)/ self.steps # <-- incentivize to reach mid-point in fewest steps
return reward, False
# To reach this point in the function the Rover has either not yet reached the mid-point OR has already gotten the one time reward for reaching midpoint
# multiply the reward based on the Rover's proximity to the Checkpoint
waypoint_interval = INITIAL_DISTANCE_TO_CHECKPOINT / 5
marker = [waypoint_interval,(waypoint_interval * 2),(waypoint_interval * 3),(waypoint_interval * 4)]
# Get the Base multiplier
if self.current_distance_to_checkpoint <= marker[0]:
multiplier = 5
elif self.current_distance_to_checkpoint <= marker[1] and self.current_distance_to_checkpoint > marker[0]:
multiplier = 4
elif self.current_distance_to_checkpoint <= marker[2] and self.current_distance_to_checkpoint > marker[1]:
multiplier = 3
elif self.current_distance_to_checkpoint <= marker[3] and self.current_distance_to_checkpoint > marker[2]:
multiplier = 2
else:
multiplier = 1
# Incentivize the rover to stay away from objects
if self.collision_threshold >= 2.0: # very safe distance
multiplier = multiplier + 1
elif self.collision_threshold < 2.0 and self.collision_threshold >= 1.5: # pretty safe
multiplier = multiplier + .5
elif self.collision_threshold < 1.5 and self.collision_threshold >= 1.0: # just enough time to turn
multiplier = multiplier + .25
else:
multiplier = multiplier # probably going to hit something and get a zero reward
# Incentize the rover to move towards the Checkpoint and not away from the checkpoint
if not self.closer_to_checkpoint:
if multiplier > 0:
# Cut the multiplier in half
multiplier = multiplier/2
reward = base_reward * multiplier
return reward, done