def get_doc_content()

in sample_app/cerebral_genai/code/rag-on-edge-vectorDB/modules/VDBModule/function/NormalizeText.py [0:0]


    def get_doc_content(self, pdf_file):
        item_list = []
        pdf_reader = PyPDF2.PdfReader(pdf_file)  
        for page in pdf_reader.pages:  
            pagesitems = self.normalize_text_to_page_item(page)
            for pagesitem in pagesitems:
                page_text = pagesitem.strip()
                if page_text == "" or page_text.isdigit():
                    continue      
                line = self.normalize_text_to_itemtext(page_text)
                item_list.append(line)
        return item_list