generate_crawl_result

in lib/crawler/http_executor.rb [201:270]


    def generate_crawl_result(crawl_task:, response:)
      result_args = {
        url: crawl_task.url,
        status_code: response.code,
        content_type: response['content-type'],
        start_time: response.request_start_time,
        end_time: response.request_end_time
      }

      
      
      
      if response.redirect?
        return handle_redirect(
          crawl_task:,
          response:,
          result_args:
        )
      end

      
      
      
      if response.error?
        return Crawler::Data::CrawlResult::Error.new(
          **result_args.merge(error: response.reason_phrase)
        )
      end

      
      response_body = response.body(
        max_response_size: config.max_response_size,
        request_timeout: config.request_timeout,
        default_encoding: Encoding.find(config.default_encoding)
      )

      
      if crawl_task.robots_txt?
        return Crawler::Data::CrawlResult::RobotsTxt.new(
          **result_args.merge(content: response_body)
        )
      end

      
      case response.mime_type
      when *SUPPORTED_MIME_TYPES[:html]
        generate_html_crawl_result(
          crawl_task:,
          response:,
          response_body:
        )
      when *content_extractable_file_mime_types
        generate_content_extractable_file_crawl_result(
          crawl_task:,
          response:,
          response_body:
        )
      when *SUPPORTED_MIME_TYPES[:xml]
        generate_xml_sitemap_crawl_result(
          crawl_task:,
          response:,
          response_body:
        )
      else
        Crawler::Data::CrawlResult::UnsupportedContentType.new(**result_args)
      end
    ensure
      response.release_connection
    end