discover_url_outcome

in lib/crawler/rule_engine/base.rb [21:37]


      def discover_url_outcome(url)
        raise ArgumentError, 'Needs a Crawler::Data::URL object' unless url.is_a?(Crawler::Data::URL)

        unless domain_allowlist.include?(url.domain)
          return denied_outcome(:domain_filter_denied, domains: domain_allowlist)
        end

        robots_txt_outcome = robots_txt_service.url_disallowed_outcome(url)
        if robots_txt_outcome.disallowed?
          return denied_outcome(:robots_txt_disallowed, robots_txt_outcome.disallow_message)
        end

        return allowed_outcome unless crawl_rules[url.domain_name]&.any?

        crawl_rules_outcome(url)
      end