in lib/crawler/coordinator.rb [482:509]
def extract_and_enqueue_sitemap_links(crawl_task, crawl_result)
result = crawl_result.extract_links
limit_reached, error = result.values_at(:limit_reached, :error)
system_logger.warn("Too many links in a sitemap '#{crawl_result.url}': #{error}") if limit_reached
%i[sitemap content].each do |link_type|
extracted_links = result.fetch(:links).fetch(link_type)
good_links = Set.new
extracted_links.each do |link|
unless link.valid?
system_logger.warn(
"Failed to parse a #{link_type} link '#{link.link}' from sitemap '#{crawl_result.url}': #{link.error}"
)
next
end
good_links << link.to_url
end
add_urls_to_backlog(
urls: good_links,
type: link_type,
source_type: :sitemap,
source_url: crawl_task.url,
crawl_depth: crawl_task.depth
)
end
end