load_robots_txt

in lib/crawler/coordinator.rb [174:201]


    def load_robots_txt(domain)
      crawl_task = Crawler::Data::CrawlTask.new(
        url: domain.robots_txt_url,
        type: :robots_txt,
        depth: 1
      )
      crawl_task.authorization_header = config.http_header_service.authorization_header_for_url(crawl_task.url)
      crawl_result = execute_task(crawl_task, follow_redirects: true)

      
      if crawl_result.is_a?(Crawler::Data::CrawlResult::RedirectError)
        system_logger.warn(
          "Treating a robots.txt redirect error for #{domain} as a 404 response: #{crawl_result.error}"
        )
        crawl_result = Crawler::Data::CrawlResult::Error.new(
          url: crawl_result.url,
          error: crawl_result.error,
          status_code: 404
        )
      elsif crawl_result.error?
        system_logger.warn("Error while fetching robots.txt for #{domain}: #{crawl_result.error}")
      else
        system_logger.debug("Fetched robots.txt for #{domain} from '#{crawl_result.url}'")
      end

      crawl_result
    end