extract_and_enqueue_html_links

in lib/crawler/coordinator.rb [446:480]


    def extract_and_enqueue_html_links(crawl_task, crawl_result)
      canonical_link = crawl_result.canonical_link
      if canonical_link
        
        
        
        
        if canonical_link.valid?
          add_urls_to_backlog(
            urls: [canonical_link.to_url],
            type: :content,
            source_type: :canonical_url,
            source_url: crawl_task.url,
            crawl_depth: crawl_task.depth
          )
        else
          system_logger.warn(
            "Failed to parse canonical URL '#{canonical_link.link}' on '#{crawl_result.url}': #{canonical_link.error}"
          )
        end
      end

      
      links = extract_links(crawl_result, crawl_depth: crawl_task.depth + 1)
      return unless links.any?

      add_urls_to_backlog(
        urls: links,
        type: :content,
        source_type: :organic,
        source_url: crawl_task.url,
        crawl_depth: crawl_task.depth + 1
      )
    end