in parquet_flask/aws/es_middleware.py [0:0]
def query_pages(self, dsl, querying_index=None) -> dict:
"""
:param dsl:
:param querying_index:
:return: dict | {"total": 0, "items": []}
"""
if 'sort' not in dsl:
raise ValueError('missing `sort` in DSL. Make sure sorting is unique')
index = self.__validate_index(querying_index)
dsl['size'] = 10000 # replacing with the maximum size to minimize number of scrolls
params = {
'index': index,
'size': 10000,
'body': dsl,
}
LOGGER.debug(f'dsl: {dsl}')
first_batch = self._engine.search(**params)
current_size = len(first_batch['hits']['hits'])
total_size = current_size
while current_size > 0:
dsl['search_after'] = first_batch['hits']['hits'][-1]['sort']
paged_result = self._engine.search(**params)
current_size = len(paged_result['hits']['hits'])
total_size += current_size
first_batch['hits']['hits'].extend(paged_result['hits']['hits'])
return {
'total': len(first_batch['hits']['hits']),
'items': first_batch['hits']['hits'],
}