in gemini/sample-apps/llamaindex-rag/backend/indexing/run_parse_embed_index.py [0:0]
def create_qa_index(li_docs, docstore, embed_model, llm):
"""creates index of hypothetical questions"""
qa_index, qa_endpoint = get_or_create_existing_index(
QA_INDEX_NAME, QA_ENDPOINT_NAME, APPROXIMATE_NEIGHBORS_COUNT
)
qa_vector_store = VertexAIVectorStore(
project_id=PROJECT_ID,
region=LOCATION,
index_id=qa_index.name, # Use .name instead of .resource_name
endpoint_id=qa_endpoint.name,
gcs_bucket_name=DOCSTORE_BUCKET_NAME,
)
qa_extractor = QuestionsAnsweredExtractor(
llm, questions=5, prompt_template=QA_EXTRACTION_PROMPT
)
async def extract_batch(li_docs):
return await tqdm_asyncio.gather(
*[qa_extractor._aextract_questions_from_node(doc) for doc in li_docs]
)
loop = asyncio.get_event_loop()
metadata_list = loop.run_until_complete(extract_batch(li_docs))
program = LLMTextCompletionProgram.from_defaults(
output_cls=QuesionsAnswered,
prompt_template_str=QA_PARSER_PROMPT,
verbose=True,
)
async def parse_batch(metadata_list):
return await asyncio.gather(
*[program.acall(questions_list=x) for x in metadata_list],
return_exceptions=True,
)
parsed_questions = loop.run_until_complete(parse_batch(metadata_list))
loop.close()
q_docs = []
for doc, questions in zip(li_docs, parsed_questions):
if isinstance(questions, Exception):
logger.info(f"Unparsable questions exception {questions}")
continue
else:
for q in questions.questions_list:
logger.info(f"Question extracted: {q}")
q_doc = Document(text=q)
q_doc.relationships[NodeRelationship.SOURCE] = RelatedNodeInfo(
node_id=doc.doc_id
)
q_docs.append(q_doc)
docstore.add_documents(li_docs)
storage_context = StorageContext.from_defaults(
docstore=docstore, vector_store=qa_vector_store
)
VectorStoreIndex(
nodes=q_docs,
storage_context=storage_context,
embed_model=embed_model,
llm=llm,
)