in lib/crawler/output_sink/elasticsearch.rb [125:146]
def purge(crawl_start_time)
query = {
_source: ['url'],
query: {
range: {
last_crawled_at: {
lt: crawl_start_time.rfc3339
}
}
}
}.deep_stringify_keys
system_logger.info('Deleting docs for pages that were not accessible during the purge crawl.')
system_logger.debug("Full delete query: #{query}")
client.indices.refresh(index: [index_name])
response = client.delete_by_query(index: [index_name], body: query)
system_logger.debug("Delete by query response: #{response}")
@deleted = response['deleted']
end