in lib/crawler/rule_engine/base.rb [21:37]
def discover_url_outcome(url)
raise ArgumentError, 'Needs a Crawler::Data::URL object' unless url.is_a?(Crawler::Data::URL)
unless domain_allowlist.include?(url.domain)
return denied_outcome(:domain_filter_denied, domains: domain_allowlist)
end
robots_txt_outcome = robots_txt_service.url_disallowed_outcome(url)
if robots_txt_outcome.disallowed?
return denied_outcome(:robots_txt_disallowed, robots_txt_outcome.disallow_message)
end
return allowed_outcome unless crawl_rules[url.domain_name]&.any?
crawl_rules_outcome(url)
end