in lib/crawler/coordinator.rb [446:480]
def extract_and_enqueue_html_links(crawl_task, crawl_result)
canonical_link = crawl_result.canonical_link
if canonical_link
if canonical_link.valid?
add_urls_to_backlog(
urls: [canonical_link.to_url],
type: :content,
source_type: :canonical_url,
source_url: crawl_task.url,
crawl_depth: crawl_task.depth
)
else
system_logger.warn(
"Failed to parse canonical URL '#{canonical_link.link}' on '#{crawl_result.url}': #{canonical_link.error}"
)
end
end
links = extract_links(crawl_result, crawl_depth: crawl_task.depth + 1)
return unless links.any?
add_urls_to_backlog(
urls: links,
type: :content,
source_type: :organic,
source_url: crawl_task.url,
crawl_depth: crawl_task.depth + 1
)
end