def reward_function()

in simulation_ws/src/rl-agent/markov/environments/training_env.py [0:0]
77 lines of code
34 McCabe index (conditional complexity)

    def reward_function(self):
        '''
        :return: reward as float
                 done as boolean
        '''
        reward = 0
        base_reward = 2
        multiplier = 0
        done = False
        
        
        if self.steps > 0:
            
            # Check for episode ending events first
            # ###########################################
            
            # Has LIDAR registered a hit
            if self.collision_threshold <= CRASH_DISTANCE:
                print("Rover has sustained sideswipe damage")
                return 0, True # No reward
            
            # Have the gravity sensors registered too much G-force
            if self.collision:
                print("Rover has collided with an object")
                return 0, True # No reward
            
            # Has the rover reached the max steps
            if self.power_supply_range < 1:
                print("Rover's power supply has been drained (MAX Steps reached")
                return 0, True # No reward
            
            # Has the Rover reached the Checkpoint
            if self.last_position_x >= CHECKPOINT_X and self.last_position_y >= CHECKPOINT_Y:
                print("Congratulations! The rover has reached the checkpoint!")
                multiplier = FINISHED_REWARD
                reward = (base_reward * multiplier) / self.steps # <-- incentivize to reach checkpoint in fewest steps
                return reward, True
            
            # If it has not reached the check point is it still on the map?
            if self.x < (STAGE_X_MIN - .45) or self.x > (STAGE_X_MAX + .45):
                print("Rover has left the mission map!")
                return 0, True
                
                
            if self.y < (STAGE_Y_MIN - .45) or self.y > (STAGE_Y_MAX + .45):
                print("Rover has left the mission map!")
                return 0, True
            
            
            # No Episode ending events - continue to calculate reward
            
            if self.last_position_x >= MIDPOINT_X and self.last_position_y >= MIDPOINT_Y: # Rover is past the midpoint
                # Determine if Rover already received one time reward for reaching the midpoint
                if not self.reached_midpoint:  
                    self.reached_midpoint = True
                    print("Congratulations! The rover has reached the midpoint!")
                    multiplier = MIDPOINT_REWARD 
                    reward = (base_reward * multiplier)/ self.steps # <-- incentivize to reach mid-point in fewest steps
                    return reward, False
                
            
            # To reach this point in the function the Rover has either not yet reached the mid-point OR has already gotten the one time reward for reaching midpoint
               
            # multiply the reward based on the Rover's proximity to the Checkpoint
            waypoint_interval = INITIAL_DISTANCE_TO_CHECKPOINT / 5 
           
            marker = [waypoint_interval,(waypoint_interval * 2),(waypoint_interval * 3),(waypoint_interval * 4)]
                
            # Get the Base multiplier
            if self.current_distance_to_checkpoint <= marker[0]:
                multiplier = 5
            elif self.current_distance_to_checkpoint <= marker[1] and self.current_distance_to_checkpoint > marker[0]:
                multiplier = 4
            elif self.current_distance_to_checkpoint <= marker[2] and self.current_distance_to_checkpoint > marker[1]:
                multiplier = 3
            elif self.current_distance_to_checkpoint <= marker[3] and self.current_distance_to_checkpoint > marker[2]:
                multiplier = 2
            else:
                multiplier = 1
            
            # Incentivize the rover to stay away from objects
            if self.collision_threshold >= 2.0:      # very safe distance
                multiplier = multiplier + 1
            elif self.collision_threshold < 2.0 and self.collision_threshold >= 1.5: # pretty safe
                multiplier = multiplier + .5
            elif self.collision_threshold < 1.5 and self.collision_threshold >= 1.0: # just enough time to turn
                multiplier = multiplier + .25
            else:
                multiplier = multiplier # probably going to hit something and get a zero reward
            
            # Incentize the rover to move towards the Checkpoint and not away from the checkpoint
            if not self.closer_to_checkpoint:
                if multiplier > 0:
                    # Cut the multiplier in half
                    multiplier = multiplier/2
                    
            reward = base_reward * multiplier
            
        
        return reward, done