in lib/crawler/robots_txt_service.rb [42:59]
def url_disallowed_outcome(url)
domain = url.domain
unless registered?(domain)
raise MissingRobotsTxt, "No robots.txt has yet been registered for the domain #{domain}"
end
parser = store.fetch(domain.to_s)
if parser.allow_none?
DisallowedOutcome.new(true, parser.allow_none_reason)
elsif parser.allowed?(url.path)
DisallowedOutcome.new(false, nil)
else
DisallowedOutcome.new(true, 'Disallowed by robots.txt')
end
end