def create_qa_index()

in gemini/sample-apps/llamaindex-rag/backend/indexing/run_parse_embed_index.py [0:0]


def create_qa_index(li_docs, docstore, embed_model, llm):
    """creates index of hypothetical questions"""
    qa_index, qa_endpoint = get_or_create_existing_index(
        QA_INDEX_NAME, QA_ENDPOINT_NAME, APPROXIMATE_NEIGHBORS_COUNT
    )
    qa_vector_store = VertexAIVectorStore(
        project_id=PROJECT_ID,
        region=LOCATION,
        index_id=qa_index.name,  # Use .name instead of .resource_name
        endpoint_id=qa_endpoint.name,
        gcs_bucket_name=DOCSTORE_BUCKET_NAME,
    )
    qa_extractor = QuestionsAnsweredExtractor(
        llm, questions=5, prompt_template=QA_EXTRACTION_PROMPT
    )

    async def extract_batch(li_docs):
        return await tqdm_asyncio.gather(
            *[qa_extractor._aextract_questions_from_node(doc) for doc in li_docs]
        )

    loop = asyncio.get_event_loop()
    metadata_list = loop.run_until_complete(extract_batch(li_docs))

    program = LLMTextCompletionProgram.from_defaults(
        output_cls=QuesionsAnswered,
        prompt_template_str=QA_PARSER_PROMPT,
        verbose=True,
    )

    async def parse_batch(metadata_list):
        return await asyncio.gather(
            *[program.acall(questions_list=x) for x in metadata_list],
            return_exceptions=True,
        )

    parsed_questions = loop.run_until_complete(parse_batch(metadata_list))

    loop.close()

    q_docs = []
    for doc, questions in zip(li_docs, parsed_questions):
        if isinstance(questions, Exception):
            logger.info(f"Unparsable questions exception {questions}")
            continue
        else:
            for q in questions.questions_list:
                logger.info(f"Question extracted: {q}")
                q_doc = Document(text=q)
                q_doc.relationships[NodeRelationship.SOURCE] = RelatedNodeInfo(
                    node_id=doc.doc_id
                )
                q_docs.append(q_doc)
    docstore.add_documents(li_docs)
    storage_context = StorageContext.from_defaults(
        docstore=docstore, vector_store=qa_vector_store
    )
    VectorStoreIndex(
        nodes=q_docs,
        storage_context=storage_context,
        embed_model=embed_model,
        llm=llm,
    )