in demo-python/code/data-chunking/lib/common.py [0:0]
def create_search_skillset(
skillset_name,
index_name,
azure_openai_endpoint,
azure_openai_embedding_deployment_id,
azure_openai_key=None,
text_split_mode='pages',
maximum_page_length=2000,
page_overlap_length=500):
return SearchIndexerSkillset(
name=skillset_name,
skills=[
SplitSkill(
name="Text Splitter",
default_language_code="en",
text_split_mode=text_split_mode,
maximum_page_length=maximum_page_length,
page_overlap_length=page_overlap_length,
context="/document",
inputs=[
InputFieldMappingEntry(
name="text",
source="/document/content"
)
],
outputs=[
OutputFieldMappingEntry(
name="textItems",
target_name="pages"
)
]
),
AzureOpenAIEmbeddingSkill(
name="Embeddings",
resource_uri=azure_openai_endpoint,
deployment_id=azure_openai_embedding_deployment_id,
api_key=azure_openai_key, # Optional if using RBAC authentication
context="/document/pages/*",
inputs=[
InputFieldMappingEntry(
name="text",
source="/document/pages/*"
)
],
outputs=[
OutputFieldMappingEntry(
name="embedding",
target_name="vector"
)
]
)
],
index_projections=SearchIndexerIndexProjections(
selectors=[
SearchIndexerIndexProjectionSelector(
target_index_name=index_name,
parent_key_field_name="parent_id",
source_context="/document/pages/*",
mappings=[
InputFieldMappingEntry(
name="chunk",
source="/document/pages/*"
),
InputFieldMappingEntry(
name="vector",
source="/document/pages/*/vector"
),
InputFieldMappingEntry(
name="title",
source="/document/metadata_storage_name"
)
]
)
],
parameters=SearchIndexerIndexProjectionsParameters(projection_mode="skipIndexingParentDocuments")
)
)