in lib/crawler/coordinator.rb [511:537]
def extract_links(crawl_result, crawl_depth:)
extracted_links = crawl_result.extract_links(limit: config.max_extracted_links_count)
links, limit_reached = extracted_links.values_at(:links, :limit_reached)
system_logger.warn("Too many links on the page '#{crawl_result.url}'") if limit_reached
Set.new.tap do |good_links|
links.each do |link|
unless link.valid?
system_logger.warn("Failed to parse a link '#{link.link}' on '#{crawl_result.url}': #{link.error}")
next
end
if link.rel_nofollow? || crawl_result.meta_nofollow?
events.url_discover_denied(
url: link.to_url,
source_url: crawl_result.url,
crawl_depth:,
deny_reason: :nofollow
)
next
end
good_links << link.to_url
end
end
end