in spec/support/faux/faux_crawl.rb [151:202]
def configure_crawl
config = {
crawl_id: crawl_id,
auth: auth,
user_agent: user_agent,
domains: [
{
url: url,
seed_urls: seed_urls,
sitemap_urls: sitemap_urls
}
],
binary_content_extraction_enabled: content_extraction.fetch(:enabled),
binary_content_extraction_mime_types: content_extraction.fetch(:mime_types),
output_sink: :mock,
results_collection: results,
http_auth_allowed: true,
loopback_allowed: true,
private_networks_allowed: true,
url_queue: url_queue
}
config.merge!(timeouts)
config[:default_encoding] = default_encoding if default_encoding
if enterprise_search?
config[:crawl_rules] = domain_allowlist.map do |domain|
{
policy: 'allow',
url_pattern: "\\A#{Regexp.escape(domain)}"
}
end
config[:deduplication_settings] = domain_allowlist.map do |domain|
{
fields: SharedTogo::Crawler.default_deduplication_fields,
url_pattern: "\\A#{Regexp.escape(domain)}"
}
end
end
crawler_module = enterprise_search? ? ::Crawler::LocoMoco : ::Crawler
results.crawl_config = crawler_module::API::Config.new(config)
results.crawl = crawler_module::API::Crawl.new(results.crawl_config)
end