extract_and_enqueue_sitemap_links

in lib/crawler/coordinator.rb [482:509]


    def extract_and_enqueue_sitemap_links(crawl_task, crawl_result)
      result = crawl_result.extract_links
      limit_reached, error = result.values_at(:limit_reached, :error)
      system_logger.warn("Too many links in a sitemap '#{crawl_result.url}': #{error}") if limit_reached

      %i[sitemap content].each do |link_type|
        extracted_links = result.fetch(:links).fetch(link_type)
        good_links = Set.new
        extracted_links.each do |link|
          unless link.valid?
            system_logger.warn(
              "Failed to parse a #{link_type} link '#{link.link}' from sitemap '#{crawl_result.url}': #{link.error}"
            )
            next
          end
          good_links << link.to_url
        end

        add_urls_to_backlog(
          urls: good_links,
          type: link_type,
          source_type: :sitemap,
          source_url: crawl_task.url,
          crawl_depth: crawl_task.depth 
        )
      end
    end