in lib/crawler/coordinator.rb [382:420]
def process_crawl_result(crawl_task, crawl_result)
crawl_task_progress(crawl_task, 'processing result')
start_time = Time.now
duration = Benchmark.measure { extract_and_enqueue_links(crawl_task, crawl_result) } unless @url_test
end_time = Time.now
output_crawl_result_outcome = rule_engine.output_crawl_result_outcome(crawl_result)
extracted_event = {
url: crawl_result.url,
type: :allowed,
start_time:,
end_time:,
duration:,
outcome: :success
}
if output_crawl_result_outcome.denied?
extracted_event.merge!(
type: :denied,
deny_reason: output_crawl_result_outcome.deny_reason,
message: output_crawl_result_outcome.message
)
elsif crawl_result.redirect?
crawl_task_progress(crawl_task, 'skipping ingestion of redirect')
extracted_event[:message] = "Crawler was redirected to #{crawl_result.location}"
elsif crawl_task.content?
crawl_task_progress(crawl_task, 'ingesting the result')
outcome = output_crawl_result(crawl_result)
extracted_event.merge!(outcome)
end
events.url_extracted(**extracted_event)
end