in sample_app/cerebral_genai/code/rag-on-edge-vectorDB/modules/VDBModule/function/NormalizeText.py [0:0]
def get_doc_content(self, pdf_file):
item_list = []
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page in pdf_reader.pages:
pagesitems = self.normalize_text_to_page_item(page)
for pagesitem in pagesitems:
page_text = pagesitem.strip()
if page_text == "" or page_text.isdigit():
continue
line = self.normalize_text_to_itemtext(page_text)
item_list.append(line)
return item_list