def reward_function()

in simulation_ws/src/rl-agent/markov/environments/mars_env.py [0:0]


    def reward_function(self):
        '''
        :return: reward as float
                 done as boolean
        '''
        
        # Corner boundaries of the world (in Meters)
        STAGE_X_MIN = -44.0
        STAGE_Y_MIN = -25.0
        STAGE_X_MAX = 15.0
        STAGE_Y_MAX = 22.0
        
        
        GUIDERAILS_X_MIN = -46
        GUIDERAILS_X_MAX = 1
        GUIDERAILS_Y_MIN = -6
        GUIDERAILS_Y_MAX = 4
        
        
        # WayPoints to checkpoint
        WAYPOINT_1_X = -10
        WAYPOINT_1_Y = -4
        
        WAYPOINT_2_X = -17
        WAYPOINT_2_Y = 3
        
        WAYPOINT_3_X = -34
        WAYPOINT_3_Y = 3
        
        # REWARD Multipliers
        FINISHED_REWARD = 10000
        WAYPOINT_1_REWARD = 1000
        WAYPOINT_2_REWARD = 2000
        WAYPOINT_3_REWARD = 3000

        reward = 0
        base_reward = 2
        multiplier = 0
        done = False
        
        
        if self.steps > 0:
            
            # Check for episode ending events first
            # ###########################################
            
            # Has LIDAR registered a hit
            if self.collision_threshold <= CRASH_DISTANCE:
                print("Rover has sustained sideswipe damage")
                return 0, True # No reward
            
            # Have the gravity sensors registered too much G-force
            if self.collision:
                print("Rover has collided with an object")
                return 0, True # No reward
            
            # Has the rover reached the max steps
            if self.power_supply_range < 1:
                print("Rover's power supply has been drained (MAX Steps reached")
                return 0, True # No reward
            
            # Has the Rover reached the destination
            if self.last_position_x >= CHECKPOINT_X and self.last_position_y >= CHECKPOINT_Y:
                print("Congratulations! The rover has reached the checkpoint!")
                multiplier = FINISHED_REWARD
                reward = (base_reward * multiplier) / self.steps # <-- incentivize to reach checkpoint in fewest steps
                return reward, True
            
            # If it has not reached the check point is it still on the map?
            if self.x < (GUIDERAILS_X_MIN - .45) or self.x > (GUIDERAILS_X_MAX + .45):
                print("Rover has left the mission map!")
                return 0, True
                
                
            if self.y < (GUIDERAILS_Y_MIN - .45) or self.y > (GUIDERAILS_Y_MAX + .45):
                print("Rover has left the mission map!")
                return 0, True
            
            
            # No Episode ending events - continue to calculate reward
            
            if self.last_position_x <= WAYPOINT_1_X and self.last_position_y <= WAYPOINT_1_Y: # Rover is past the midpoint
                # Determine if Rover already received one time reward for reaching this waypoint
                if not self.reached_waypoint_1:  
                    self.reached_waypoint_1 = True
                    print("Congratulations! The rover has reached waypoint 1!")
                    multiplier = 1 
                    reward = (WAYPOINT_1_REWARD * multiplier)/ self.steps # <-- incentivize to reach way-point in fewest steps
                    return reward, False
            
            if self.last_position_x <= WAYPOINT_2_X and self.last_position_y >= WAYPOINT_2_Y: # Rover is past the midpoint
                # Determine if Rover already received one time reward for reaching this waypoint
                if not self.reached_waypoint_2:  
                    self.reached_waypoint_2 = True
                    print("Congratulations! The rover has reached waypoint 2!")
                    multiplier = 1 
                    reward = (WAYPOINT_2_REWARD * multiplier)/ self.steps # <-- incentivize to reach way-point in fewest steps
                    return reward, False
                    
            if self.last_position_x <= WAYPOINT_3_X and self.last_position_y >= WAYPOINT_3_Y: # Rover is past the midpoint
                # Determine if Rover already received one time reward for reaching this waypoint
                if not self.reached_waypoint_3:  
                    self.reached_waypoint_3 = True
                    print("Congratulations! The rover has reached waypoint 3!")
                    multiplier = 1 
                    reward = (WAYPOINT_3_REWARD * multiplier)/ self.steps # <-- incentivize to reach way-point in fewest steps
                    return reward, False
                    
            
            # To reach this point in the function the Rover has either not yet reached the way-points OR has already gotten the one time reward for reaching the waypoint(s)
               
            # multiply the reward based on the Rover's proximity to the Checkpoint
            waypoint_interval = INITIAL_DISTANCE_TO_CHECKPOINT / 5 
           
            marker = [waypoint_interval,(waypoint_interval * 2),(waypoint_interval * 3),(waypoint_interval * 4)]
                
            # Get the Base multiplier
            if self.current_distance_to_checkpoint <= marker[0]:
                multiplier = 5
            elif self.current_distance_to_checkpoint <= marker[1] and self.current_distance_to_checkpoint > marker[0]:
                multiplier = 4
            elif self.current_distance_to_checkpoint <= marker[2] and self.current_distance_to_checkpoint > marker[1]:
                multiplier = 3
            elif self.current_distance_to_checkpoint <= marker[3] and self.current_distance_to_checkpoint > marker[2]:
                multiplier = 2
            else:
                multiplier = 1
            
            # Incentivize the rover to stay away from objects
            if self.collision_threshold >= 2.0:      # very safe distance
                multiplier = multiplier + 1
            elif self.collision_threshold < 2.0 and self.collision_threshold >= 1.5: # pretty safe
                multiplier = multiplier + .5
            elif self.collision_threshold < 1.5 and self.collision_threshold >= 1.0: # just enough time to turn
                multiplier = multiplier + .25
            else:
                multiplier = multiplier # probably going to hit something and get a zero reward
            
            # Incentize the rover to move towards the Checkpoint and not away from the checkpoint
            if not self.closer_to_checkpoint:
                if multiplier > 0:
                    # Cut the multiplier in half
                    multiplier = multiplier/2
                    
            reward = base_reward * multiplier
            
        
        return reward, done