validate_url_request

in lib/crawler/url_validator/url_request_check_concern.rb [12:99]


    def validate_url_request 
      
      @url_crawl_result = http_executor.run(
        Crawler::Data::CrawlTask.new(
          url:,
          type: :content,
          depth: 1
        )
      )

      
      details = {
        status_code: url_crawl_result.status_code,
        content_type: url_crawl_result.content_type,
        request_time_msec: (url_crawl_result.duration * 1000).to_i
      }

      
      status = url_crawl_result.status_code
      case status
      when 200
        validation_ok(:url_request, "Successfully fetched #{url}: HTTP #{status}.", details)

      when 204
        validation_fail(:url_request, "The Web server at #{url} returned no content (HTTP 204).", details)

      when 301, 302, 303, 307, 308
        redirect_validation_result(details)

      when 305
        validation_fail(:url_request, <<~MESSAGE, details)
          The web server at 
          This may mean that you're trying to index an internal (intranet) server.
          Read more at: https://www.elastic.co/guide/en/enterprise-search/current/crawler-private-network-cloud.html.
        MESSAGE

      when 401
        unauthorized_validation_result(details)

      when 403
        validation_fail(:url_request, <<~MESSAGE, details)
          The web server at #{url} denied us permission to view that page (HTTP 403).
          This website may require a user name and password.
          Read more at: https://www.elastic.co/guide/en/enterprise-search/current/crawler-managing.html#crawler-managing-authentication.
        MESSAGE

      when 404
        validation_fail(:url_request, <<~MESSAGE, details)
          The web server at #{url} says that there is no web page at that location (HTTP 404).
        MESSAGE

      when 407
        validation_fail(:url_request, <<~MESSAGE, details)
          The web server at #{url} is configured to require an HTTP proxy for access (HTTP 407).
          This may mean that you're trying to index an internal (intranet) server.
          Read more at: https://www.elastic.co/guide/en/enterprise-search/current/crawler-private-network-cloud.html.
        MESSAGE

      when 429
        validation_fail(:url_request, <<~MESSAGE, details)
          The web server at 
          rate-limiting (HTTP 429).
        MESSAGE

      when 451
        validation_fail(:url_request, <<~MESSAGE, details)
          The web server at 
        MESSAGE

      when 400...499
        validation_fail(:url_request, "Failed to fetch #{url}: HTTP #{status}.", details)

      when 500...598
        validation_fail(:url_request, "Transient error fetching #{url}: HTTP #{status}.", details)

      when 599
        validation_fail(:url_request, <<~MESSAGE, details)
          Unexpected error fetching 
          
        MESSAGE

      else
        validation_fail(:url_request, <<~MESSAGE, details)
          Unexpected HTTP status while fetching 
        MESSAGE
      end
    end