def infer_reward_state()

in robot_ws/src/rl_agent/markov/environments/meiro_runner_env.py [0:0]


    def infer_reward_state(self,action):
        #Wait until we have an image from the LIDAR.
        while not self.ranges:
            time.sleep(SLEEP_WAITING_FOR_IMAGE_TIME_IN_SECOND)

        steering = float(action[0])
        throttle = float(action[1])

        #Fit data size to training image
        size = len(self.ranges)
        x = np.linspace(0, size-1, TRAINING_IMAGE_SIZE)
        xp = np.arange(size)
        state = np.clip(np.interp(x, xp, self.ranges), 0, LIDAR_SCAN_MAX_DISTANCE)
        state[np.isnan(state)] = LIDAR_SCAN_MAX_DISTANCE

        #Find min distance 
        min_distance = np.amin(state)

        reward = 0
        if self.last_position_x >= FINISH_LINE:
            print("Congratulations! You passed the finish line!")
            if self.steps == 0:
                reward = 0.0
                done = False
            else:
                reward = FINISHED / self.steps
                done = True
                
        elif min_distance < CRASH_DISTANCE:
            # Robot likely hit the wall
            reward = CRASHED
            done = True
            
        else:
            # When robot is close to the wall, give score based by how far from the wall. 
            # doubled the score when robot is leaving from the wall.
            if min_distance < 0.19:
                
                if min_distance < 0.15:
                    reward = 0.05                
                elif min_distance < 0.17:
                    reward = 0.15                
                else:
                    reward = 0.35
                    
                if min_distance - self.last_min_distance - 0.01 > 0:
                    reward *= 2
                    
                done = False
            else:
                # While the robot is away enough from the wall, give throttle value as score. 
                # (Max throttle 0.1 => 1.0 for score)
                reward = throttle * 10.0 
                done = False

        # leave footstep marker to the place robot has passed through.
        footstep_marker = self.calc_footsteps_mark_position(self.x, self.y)
        if not self.last_footsteps_mark_position == footstep_marker:
            # if the robot had been already walked through that area more than twice, treat it as crashing.
            if self.footsteps_marker[footstep_marker[0]][footstep_marker[1]] > 1:
                reward = CRASHED
                done = True
            # if the robot had been already walked through that area, reduce the reward.
            elif self.footsteps_marker[footstep_marker[0]][footstep_marker[1]] > 0:
                reward = reward * 0.01
                
            self.footsteps_marker[footstep_marker[0]][footstep_marker[1]] += 1
            self.last_footsteps_mark_position = footstep_marker
            
        self.reward_in_episode += reward
        print('Step No=%.2f' % self.steps,
              'Reward=%.2f' % reward,
              'Distance from finish line=%f' % abs(FINISH_LINE - self.x))                    
                
        self.reward = reward
        self.done = done
        self.next_state = state
        
        self.last_min_distance = min_distance
        self.last_position_x = self.x
        self.last_position_y = self.y