url_disallowed_outcome

in lib/crawler/robots_txt_service.rb [42:59]


    def url_disallowed_outcome(url)
      domain = url.domain

      unless registered?(domain)
        raise MissingRobotsTxt, "No robots.txt has yet been registered for the domain #{domain}"
      end

      parser = store.fetch(domain.to_s)

      if parser.allow_none?
        DisallowedOutcome.new(true, parser.allow_none_reason)
      elsif parser.allowed?(url.path)
        DisallowedOutcome.new(false, nil)
      else
        DisallowedOutcome.new(true, 'Disallowed by robots.txt')
      end
    end