in demo-python/code/custom-vectorizer/scripts/setup_search_service.py [0:0]
def create_or_update_skillset(search_indexer_client: SearchIndexerClient, custom_vectorizer_url: str):
split_skill = SplitSkill(
description="Split skill to chunk documents",
text_split_mode="pages",
context="/document",
maximum_page_length=300,
page_overlap_length=20,
inputs=[
InputFieldMappingEntry(name="text", source="/document/content"),
],
outputs=[
OutputFieldMappingEntry(name="textItems", target_name="pages")
],
)
embedding_skill = WebApiSkill(
description="Skill to generate embeddings via a custom endpoint",
context="/document/pages/*",
uri=custom_vectorizer_url,
inputs=[
InputFieldMappingEntry(name="text", source="/document/pages/*"),
],
outputs=[
OutputFieldMappingEntry(name="vector", target_name="vector")
],
)
index_projections = SearchIndexerIndexProjections(
selectors=[
SearchIndexerIndexProjectionSelector(
target_index_name=sample_index_name,
parent_key_field_name="parent_id",
source_context="/document/pages/*",
mappings=[
InputFieldMappingEntry(name="chunk", source="/document/pages/*"),
InputFieldMappingEntry(name="vector", source="/document/pages/*/vector"),
InputFieldMappingEntry(name="title", source="/document/metadata_storage_name"),
],
),
],
parameters=SearchIndexerIndexProjectionsParameters(
projection_mode=IndexProjectionMode.SKIP_INDEXING_PARENT_DOCUMENTS
),
)
skillset = SearchIndexerSkillset(
name=sample_skillset_name,
description="Skillset to chunk documents and generating embeddings",
skills=[split_skill, embedding_skill],
index_projections=index_projections,
)
result = search_indexer_client.create_or_update_skillset(skillset)