def create_or_update_skillset()

in demo-python/code/indexers/document-intelligence-custom-skill/scripts/setup_search_service.py [0:0]


def create_or_update_skillset(search_indexer_client: SearchIndexerClient, document_skill_url: str, split_skill_url: str):
    document_skill = WebApiSkill(
        description="Document intelligence skill to extract content from documents",
        context="/document",
        uri=document_skill_url,
        timeout=timedelta(seconds=230),
        batch_size=1,
        degree_of_parallelism=1,
        inputs=[
            InputFieldMappingEntry(name="metadata_storage_path", source="/document/metadata_storage_path"),
            InputFieldMappingEntry(name="metadata_storage_sas_token", source="/document/metadata_storage_sas_token"),
            InputFieldMappingEntry(name="mode", source='="markdown"')
        ],
        outputs=[
            OutputFieldMappingEntry(name="content", target_name="file_markdown_content")
        ]
    )

    vectorizer_resource_uri = os.environ["AZURE_OPENAI_ENDPOINT"]
    vectorizer_deployment = os.environ["AZURE_OPENAI_EMB_DEPLOYMENT"]
    vectorizer_model = os.environ["AZURE_OPENAI_EMB_MODEL"]
    vectorizer_dimensions = os.environ["AZURE_OPENAI_EMB_MODEL_DIMENSIONS"]
    split_skill = WebApiSkill(
        description="Markdown split skill to extract chunks from documents",
        context="/document",
        uri=split_skill_url,
        timeout=timedelta(seconds=230),
        batch_size=1,
        degree_of_parallelism=1,
        inputs=[  
            InputFieldMappingEntry(name="content", source="/document/file_markdown_content"),
            InputFieldMappingEntry(name="encoderModelName", source=f'="{vectorizer_model}"'),
            InputFieldMappingEntry(name="chunkSize", source=f'=512'),
            InputFieldMappingEntry(name="chunkOverlap", source=f'=128')
        ],  
        outputs=[  
            OutputFieldMappingEntry(name="chunks", target_name="chunks")
        ]
    )


    embedding_skill = AzureOpenAIEmbeddingSkill(  
        description="Skill to generate embeddings via an Azure OpenAI endpoint",  
        context="/document/chunks/*",
        resource_uri=vectorizer_resource_uri,
        deployment_id=vectorizer_deployment,
        model_name=vectorizer_model,
        dimensions=vectorizer_dimensions,
        inputs=[
            InputFieldMappingEntry(name="text", source="/document/chunks/*/content"),  
        ],  
        outputs=[  
            OutputFieldMappingEntry(name="embedding", target_name="vector")  
        ]
    )
    
    index_projections = SearchIndexerIndexProjections(  
        selectors=[  
            SearchIndexerIndexProjectionSelector(  
                target_index_name=sample_index_name,  
                parent_key_field_name="parent_id",  
                source_context="/document/chunks/*",  
                mappings=[  
                    InputFieldMappingEntry(name="chunk", source="/document/chunks/*/content"),
                    InputFieldMappingEntry(name="vector", source="/document/chunks/*/vector"),
                    InputFieldMappingEntry(name="chunk_headers", source="/document/chunks/*/headers"),
                    InputFieldMappingEntry(name="title", source="/document/metadata_storage_name")
                ],  
            )
        ],  
        parameters=SearchIndexerIndexProjectionsParameters(  
            projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS  
        ),
    )  
    
    skillset = SearchIndexerSkillset(  
        name=sample_skillset_name,  
        description="Skillset to use document intelligence, chunk documents and generating embeddings",  
        skills=[document_skill, split_skill, embedding_skill],  
        index_projections=index_projections,  
    )
    search_indexer_client.create_or_update_skillset(skillset)