spec/lib/crawler/url_validator/crawl_rules_check_spec.rb (49 lines of code) (raw):
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License 2.0;
# you may not use this file except in compliance with the Elastic License 2.0.
#
# frozen_string_literal: true
# Mock class definitions
module Crawler
module RuleEngine
class Elasticsearch < Crawler::RuleEngine::Base
def crawl_rules_outcome(url) end
end
end
end
RSpec.describe(Crawler::UrlValidator) do
let(:valid_url) { Crawler::Data::URL.parse('http://example.com') }
let(:domain_allowlist) { ['example.com'] }
let(:crawl_config) { double('CrawlConfig', domain_allowlist:) }
let(:validator) { described_class.new(url: valid_url, crawl_config:) }
let(:rule_engine) { double('Crawler::RuleEngine::Elasticsearch') }
let(:outcome) { double('Outcome', allowed?: allowed, details: { rule: }) }
let(:rule) { double('Rule', source: 'some_rule_source') }
describe '#validate_crawl_rules' do
before do
allow(Crawler::RuleEngine::Elasticsearch).to receive(:new).with(crawl_config).and_return(rule_engine)
allow(rule_engine).to receive(:crawl_rules_outcome).with(validator.normalized_url).and_return(outcome)
allow(validator).to receive(:validation_ok)
allow(validator).to receive(:validation_fail)
end
context 'when the URL is allowed by a crawl rule' do
let(:allowed) { true }
it 'calls validation_ok' do
validator.validate_crawl_rules
expect(validator)
.to have_received(:validation_ok)
end
end
context 'when the URL is denied by a crawl rule' do
let(:allowed) { false }
it 'calls validation_fail' do
validator.validate_crawl_rules
expect(validator)
.to have_received(:validation_fail)
end
end
context 'when the URL is denied because it did not match any rules' do
let(:allowed) { false }
let(:rule) { nil }
it 'calls validation_fail' do
validator.validate_crawl_rules
expect(validator)
.to have_received(:validation_fail)
end
end
end
end