in hugegraph-llm/src/hugegraph_llm/utils/vector_index_utils.py [0:0]
def read_documents(input_file, input_text):
if input_text:
texts = [input_text]
elif input_file:
texts = []
for file in input_file:
full_path = file.name
if full_path.endswith(".txt"):
with open(full_path, "r", encoding="utf-8") as f:
texts.append(f.read())
elif full_path.endswith(".docx"):
text = ""
doc = docx.Document(full_path)
for para in doc.paragraphs:
text += para.text
text += "\n"
texts.append(text)
elif full_path.endswith(".pdf"):
# TODO: support PDF file
raise gr.Error("PDF will be supported later! Try to upload text/docx now")
else:
raise gr.Error("Please input txt or docx file.")
else:
raise gr.Error("Please input text or upload file.")
return texts