in utils/es.py [0:0]
def get_missing_docs_info(client: Elasticsearch, data_stream: str, display_docs: int, dir,
get_overlapping_files: bool, copy_docs_per_dimension: int):
"""
Display the dimensions of the first @display_docs documents.
If @get_overlapping_files is set to True, then @copy_docs_per_dimension documents will be placed in a directory
(if the directory does not exist!).
:param client: ES client.
:param display_docs: number of documents to display.
:param dir: name of the directory.
:param get_overlapping_files: true if you want to place fields in the directory, false otherwise.
:param copy_docs_per_dimension: number of documents to get for a set of dimensions.
:param docs_index: name of the index with the documents.
"""
if get_overlapping_files:
if os.path.exists(dir):
print("WARNING: The directory {} exists. Please delete it. Documents will not be placed.\n".format(dir))
get_overlapping_files = False
else:
os.mkdir(dir)
n = 1
body = {'size': display_docs, 'query': {'match_all': {}}}
res = client.search(index=overwritten_docs_index, body=body)
dimensions = time_series_fields["dimension"]
print("The timestamp and dimensions of the first {} overwritten documents are:".format(display_docs))
for doc in res["hits"]["hits"]:
if get_overlapping_files:
dimensions_values = {"@timestamp": doc["_source"]["@timestamp"]}
dimensions_missing = []
print("- Timestamp {}:".format(doc["_source"]["@timestamp"]))
for dimension in dimensions:
el = doc["_source"]
keys = dimension.split(".")
for key in keys:
if key not in el:
el = "(Missing value)"
break
el = el[key]
print("\t{}: {}".format(dimension, el))
if get_overlapping_files:
if el != "(Missing value)":
dimensions_values[dimension] = el
else:
dimensions_missing.append(dimension)
if get_overlapping_files:
get_and_place_documents(client, data_stream, dir, dimensions_values, dimensions_missing, n, copy_docs_per_dimension)
n += 1