in utils/es.py [0:0]
def place_documents(client: Elasticsearch, index_name: str, folder_docs: str):
"""
Place all documents from folder to an index.
:param client: ES client.
:param index_name: name of the index to add the documents.
:param folder_docs: path to the folder with the documents to add.
"""
print("Placing documents on the index {name}...".format(name=index_name))
if not client.indices.exists(index=index_name):
print("Index {name} does not exist. Program will end.".format(name=index_name))
exit(0)
if not os.path.isdir(folder_docs):
print("Folder {} does not exist. Documents cannot be placed. Program will end.".format(folder_docs))
exit(0)
for doc in os.listdir(folder_docs):
doc_path = os.path.join(folder_docs, doc)
if os.path.isfile(doc_path):
add_doc_from_file(client, index_name, doc_path)
# From Elastic docs: Use the refresh API to explicitly make all operations performed on one or more indices since
# the last refresh available for search. If the request targets a data stream, it refreshes the stream’s backing
# indices.
client.indices.refresh(index=index_name)
resp = client.search(index=index_name, query={"match_all": {}})
n_docs = resp['hits']['total']['value']
print("Successfully placed {} documents on the index {name}.\n".format(n_docs, name=index_name))