def create_search_skillset()

in demo-python/code/data-chunking/lib/common.py [0:0]


def create_search_skillset(
        skillset_name,
        index_name,
        azure_openai_endpoint,
        azure_openai_embedding_deployment_id,
        azure_openai_key=None,
        text_split_mode='pages',
        maximum_page_length=2000,
        page_overlap_length=500):
    return SearchIndexerSkillset(
        name=skillset_name,
        skills=[
            SplitSkill(
                name="Text Splitter",
                default_language_code="en",
                text_split_mode=text_split_mode,
                maximum_page_length=maximum_page_length,
                page_overlap_length=page_overlap_length,
                context="/document",
                inputs=[
                    InputFieldMappingEntry(
                        name="text",
                        source="/document/content"
                    )
                ],
                outputs=[
                    OutputFieldMappingEntry(
                        name="textItems",
                        target_name="pages"
                    )
                ]
            ),
            AzureOpenAIEmbeddingSkill(
                name="Embeddings",
                resource_uri=azure_openai_endpoint,
                deployment_id=azure_openai_embedding_deployment_id,
                api_key=azure_openai_key, # Optional if using RBAC authentication
                context="/document/pages/*",
                inputs=[
                    InputFieldMappingEntry(
                        name="text",
                        source="/document/pages/*"
                    )
                ],
                outputs=[
                    OutputFieldMappingEntry(
                        name="embedding",
                        target_name="vector"
                    )
                ]
            )
        ],
        index_projections=SearchIndexerIndexProjections(
            selectors=[
                SearchIndexerIndexProjectionSelector(
                    target_index_name=index_name,
                    parent_key_field_name="parent_id",
                    source_context="/document/pages/*",
                    mappings=[
                        InputFieldMappingEntry(
                            name="chunk",
                            source="/document/pages/*"
                        ),
                        InputFieldMappingEntry(
                            name="vector",
                            source="/document/pages/*/vector"
                        ),
                        InputFieldMappingEntry(
                            name="title",
                            source="/document/metadata_storage_name"
                        )
                    ]
                )
            ],
            parameters=SearchIndexerIndexProjectionsParameters(projection_mode="skipIndexingParentDocuments")
        )
    )