in utils/es.py [0:0]
def get_and_place_documents(client: Elasticsearch, data_stream: str, dir_name: str, dimensions_values: {},
dimensions_missing: [], n: int, number_of_docs):
"""
Given the dimensions, place the documents in the directory.
:param client: ES client.
:param data_stream: Name of the data stream.
:param dir_name: Name of the parent directory.
:param dimensions_values: Values for the dimension fields that exist in the document.
:param dimensions_missing: List of dimension fields missing in the document.
:param n: Number of the directory inside the parent directory. Example: 1 would create dir_name/1.
:param number_of_docs: Number of documents to get with that set of dimensions.
:param index_name: Index name in which documents are stored.
"""
query = build_query(dimensions_values, dimensions_missing)
res = client.search(index=data_stream, query=query, sort={"@timestamp": "asc"}, size=number_of_docs)
dir_for_docs = os.path.join(dir_name, str(n))
os.mkdir(dir_for_docs)
for doc in res["hits"]["hits"]:
name = doc["_id"] + ".json"
with open(os.path.join(dir_for_docs, name), 'w') as file:
json.dump(doc, file, indent=4)