in demo-python/code/community-integration/ragas/lib/utils.py [0:0]
def create_sample_skillset(
search_indexer_client: SearchIndexerClient,
index_name: str,
azure_openai_endpoint: str,
azure_openai_ada002_embedding_deployment: str,
azure_openai_3_large_embedding_deployment: str,
azure_openai_key: Optional[str] = None
):
# Create a skillset
skillset_name = f"{index_name}-skillset"
split_skill = SplitSkill(
description="Split skill to chunk documents",
text_split_mode="pages",
context="/document",
maximum_page_length=2000,
page_overlap_length=500,
inputs=[
InputFieldMappingEntry(name="text", source="/document/content"),
],
outputs=[
OutputFieldMappingEntry(name="textItems", target_name="pages")
],
)
embedding_ada_002_skill = AzureOpenAIEmbeddingSkill(
description="Skill to generate ada 002 embeddings via Azure OpenAI",
context="/document/pages/*",
resource_uri=azure_openai_endpoint,
deployment_id=azure_openai_ada002_embedding_deployment,
api_key=azure_openai_key,
model_name="text-embedding-ada-002",
inputs=[
InputFieldMappingEntry(name="text", source="/document/pages/*"),
],
outputs=[
OutputFieldMappingEntry(name="embedding", target_name="vector_ada002")
],
)
embedding_3_large_skill = AzureOpenAIEmbeddingSkill(
description="Skill to generate ada 002 embeddings via Azure OpenAI",
context="/document/pages/*",
resource_uri=azure_openai_endpoint,
deployment_id=azure_openai_3_large_embedding_deployment,
api_key=azure_openai_key,
model_name="text-embedding-3-large",
inputs=[
InputFieldMappingEntry(name="text", source="/document/pages/*"),
],
outputs=[
OutputFieldMappingEntry(name="embedding", target_name="vector_3_large")
],
)
index_projections = SearchIndexerIndexProjections(
selectors=[
SearchIndexerIndexProjectionSelector(
target_index_name=index_name,
parent_key_field_name="parent_id",
source_context="/document/pages/*",
mappings=[
InputFieldMappingEntry(name="chunk", source="/document/pages/*"),
InputFieldMappingEntry(name="vector_ada002", source="/document/pages/*/vector_ada002"),
InputFieldMappingEntry(name="vector_3_large", source="/document/pages/*/vector_3_large"),
InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"),
],
),
],
parameters=SearchIndexerIndexProjectionsParameters(
projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS
),
)
skillset = SearchIndexerSkillset(
name=skillset_name,
description="Skillset to chunk documents and generating embeddings",
skills=[split_skill, embedding_3_large_skill, embedding_ada_002_skill],
index_projections=index_projections,
)
return search_indexer_client.create_or_update_skillset(skillset)