in lib/crawler/coordinator.rb [626:657]
def add_url_to_backlog(url:, type:, source_type:, crawl_depth:, source_url:, redirect_chain: []) # rubocop:disable Metrics/ParameterLists
crawl_queue.push(
Crawler::Data::CrawlTask.new(
url:,
type:,
depth: crawl_depth,
redirect_chain:
)
)
events.url_seed(
url:,
source_url:,
type:,
source_type:,
crawl_depth:
)
rescue Crawler::Data::UrlQueue::TransientError => e
# We couldn't visit the URL, so let's remove it from the seen URLs list
seen_urls.delete(url)
# Doing this on debug level not to flood the logs when the queue is full
# The queue itself will log about its state on the warning log level
system_logger.debug("Failed to add a crawler task into the processing queue: #{e}")
events.url_discover_denied(
url:,
source_url:,
crawl_depth:,
deny_reason: :queue_full
)
end