def __init__()

in src/models/rag.py [0:0]


    def __init__(self, chunk_list: List[Dict[str, str]] = [], 
                       storage_path: str = './data/textbooks/rag_storage',
                       emb_model_path: str = "local:../../models/rag_embedding/bge-m3",
                       chunk_size: int = 1024, similarity_top_k: int = 3, hybrid_search: bool = False, 
                       reranker_path: Optional[str] = None, rerank_top_n: int = 3,
                       **kwargs):
        os.makedirs(storage_path, exist_ok=True)
        if len(os.listdir(storage_path)) == 0:
            assert len(chunk_list) > 0
            documents = [Document(text=chunk['data'], doc_id=chunk['idx']) for chunk in chunk_list]
            node_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=0)
            nodes = node_parser.get_nodes_from_documents(documents)
            self.index = VectorStoreIndex(nodes, embed_model=emb_model_path, show_progress=True, **kwargs)
            self.index.storage_context.persist(storage_path)
        else:
            print('Loading LlamaIndex Storage ...')
            t0 = time()
            storage_context = StorageContext.from_defaults(persist_dir=storage_path)
            self.index = load_index_from_storage(storage_context, embed_model=emb_model_path)
            print(f'Done in {time() - t0:.1} seconds.')

        self.retriever: VectorIndexRetriever = self.index.as_retriever(similarity_top_k=similarity_top_k)
        if hybrid_search:
            nodes = list(self.index.storage_context.docstore.docs.values())
            self.keyword_index = SimpleKeywordTableIndex(nodes, show_progress=True)
            self.keyword_retriever: KeywordTableSimpleRetriever = \
                self.keyword_index.as_retriever(num_chunks_per_query=similarity_top_k)
        else:
            self.keyword_retriever = None

        if reranker_path:
            self.rerank = SentenceTransformerRerank(top_n=rerank_top_n, model=reranker_path)
        else:
            self.rerank = None