in lib/crawler/http_executor.rb [201:270]
def generate_crawl_result(crawl_task:, response:)
result_args = {
url: crawl_task.url,
status_code: response.code,
content_type: response['content-type'],
start_time: response.request_start_time,
end_time: response.request_end_time
}
if response.redirect?
return handle_redirect(
crawl_task:,
response:,
result_args:
)
end
if response.error?
return Crawler::Data::CrawlResult::Error.new(
**result_args.merge(error: response.reason_phrase)
)
end
response_body = response.body(
max_response_size: config.max_response_size,
request_timeout: config.request_timeout,
default_encoding: Encoding.find(config.default_encoding)
)
if crawl_task.robots_txt?
return Crawler::Data::CrawlResult::RobotsTxt.new(
**result_args.merge(content: response_body)
)
end
case response.mime_type
when *SUPPORTED_MIME_TYPES[:html]
generate_html_crawl_result(
crawl_task:,
response:,
response_body:
)
when *content_extractable_file_mime_types
generate_content_extractable_file_crawl_result(
crawl_task:,
response:,
response_body:
)
when *SUPPORTED_MIME_TYPES[:xml]
generate_xml_sitemap_crawl_result(
crawl_task:,
response:,
response_body:
)
else
Crawler::Data::CrawlResult::UnsupportedContentType.new(**result_args)
end
ensure
response.release_connection
end