in lib/crawler/api/crawl.rb [86:115]
def start!
events.crawl_start(
url_queue_items: crawl_queue.length,
seen_urls: seen_urls.count
)
ingestion_stats = coordinator.run_crawl!
record_overall_outcome(coordinator.crawl_results)
rescue StandardError => e
log_exception(e, 'Unexpected error while running the crawl')
record_outcome(
outcome: :failure,
message: 'Unexpected error while running the crawl, check system logs for details'
)
ensure
if allow_resume?
system_logger.info('Not removing the crawl queue to allow the crawl to resume later')
crawl_queue.save
seen_urls.save
else
system_logger.info('Releasing resources used by the crawl...')
crawl_queue.delete
seen_urls.clear
print_final_crawl_status
print_crawl_ingestion_results(ingestion_stats) if config.output_sink.to_s == 'elasticsearch'
end
end