in lib/crawler/url_validator/url_request_check_concern.rb [12:99]
def validate_url_request
@url_crawl_result = http_executor.run(
Crawler::Data::CrawlTask.new(
url:,
type: :content,
depth: 1
)
)
details = {
status_code: url_crawl_result.status_code,
content_type: url_crawl_result.content_type,
request_time_msec: (url_crawl_result.duration * 1000).to_i
}
status = url_crawl_result.status_code
case status
when 200
validation_ok(:url_request, "Successfully fetched #{url}: HTTP #{status}.", details)
when 204
validation_fail(:url_request, "The Web server at #{url} returned no content (HTTP 204).", details)
when 301, 302, 303, 307, 308
redirect_validation_result(details)
when 305
validation_fail(:url_request, <<~MESSAGE, details)
The web server at
This may mean that you're trying to index an internal (intranet) server.
Read more at: https://www.elastic.co/guide/en/enterprise-search/current/crawler-private-network-cloud.html.
MESSAGE
when 401
unauthorized_validation_result(details)
when 403
validation_fail(:url_request, <<~MESSAGE, details)
The web server at #{url} denied us permission to view that page (HTTP 403).
This website may require a user name and password.
Read more at: https://www.elastic.co/guide/en/enterprise-search/current/crawler-managing.html#crawler-managing-authentication.
MESSAGE
when 404
validation_fail(:url_request, <<~MESSAGE, details)
The web server at #{url} says that there is no web page at that location (HTTP 404).
MESSAGE
when 407
validation_fail(:url_request, <<~MESSAGE, details)
The web server at #{url} is configured to require an HTTP proxy for access (HTTP 407).
This may mean that you're trying to index an internal (intranet) server.
Read more at: https://www.elastic.co/guide/en/enterprise-search/current/crawler-private-network-cloud.html.
MESSAGE
when 429
validation_fail(:url_request, <<~MESSAGE, details)
The web server at
rate-limiting (HTTP 429).
MESSAGE
when 451
validation_fail(:url_request, <<~MESSAGE, details)
The web server at
MESSAGE
when 400...499
validation_fail(:url_request, "Failed to fetch #{url}: HTTP #{status}.", details)
when 500...598
validation_fail(:url_request, "Transient error fetching #{url}: HTTP #{status}.", details)
when 599
validation_fail(:url_request, <<~MESSAGE, details)
Unexpected error fetching
MESSAGE
else
validation_fail(:url_request, <<~MESSAGE, details)
Unexpected HTTP status while fetching
MESSAGE
end
end