private static SearchIndexerSkillset PrepGetSkillset()

in src/ai/commands/search_command.cs [341:445]


        private static SearchIndexerSkillset PrepGetSkillset(string skillsetName, string aiServicesApiKey, string embeddingsEndpoint, string embeddingsDeployment, string embeddingsModelName, string embeddingsApiKey, string idFieldName, string contentFieldName, string vectorFieldName, SearchIndex datasourceIndex)
        {
            const int maximumPageLength = 2000;
            const int pageOverlapLength = 500;

            var useOcr = !string.IsNullOrEmpty(aiServicesApiKey);

            var ocrSkill = new OcrSkill(
                new List<InputFieldMappingEntry> {
                    new InputFieldMappingEntry("image") { Source = "/document/normalized_images/*" }
                },
                new List<OutputFieldMappingEntry> {
                    new OutputFieldMappingEntry("text") { TargetName = "text"}
                }) {
                    Context = "/document/normalized_images/*",
                    ShouldDetectOrientation = true
                };

            var ocrMergeSkill = new MergeSkill(
                new List<InputFieldMappingEntry> {
                    new InputFieldMappingEntry("text") { Source = "/document/content" },
                    new InputFieldMappingEntry("itemsToInsert") { Source = "/document/normalized_images/*/text" },
                    new InputFieldMappingEntry("offsets") { Source = "/document/normalized_images/*/contentOffset" }
                },
                new List<OutputFieldMappingEntry> {
                    new OutputFieldMappingEntry("mergedText") { TargetName = "mergedText"}
                }) {
                    Context = "/document",
                    InsertPreTag = " ",
                    InsertPostTag = " "
                };

            var splitSkill = new SplitSkill(
                new List<InputFieldMappingEntry>
                {
                    new InputFieldMappingEntry("text") { Source = useOcr ? "/document/mergedText" : "/document/content" }
                },
                new List<OutputFieldMappingEntry> {
                    new OutputFieldMappingEntry("textItems") { TargetName = "pages"}
                })
                {
                    DefaultLanguageCode = SplitSkillLanguage.En,
                    TextSplitMode = TextSplitMode.Pages,
                    MaximumPageLength = maximumPageLength,
                    PageOverlapLength = pageOverlapLength,
                    Context = "/document",
                };

            var azureOpenAIEmbeddingSkill = new AzureOpenAIEmbeddingSkill(
                new List<InputFieldMappingEntry>
                {
                    new InputFieldMappingEntry("text") { Source = "/document/pages/*" }
                },
                new List<OutputFieldMappingEntry>
                {
                    new OutputFieldMappingEntry("embedding") { TargetName = "vector" }
                })
                {
                    Context = "/document/pages/*",
                    ResourceUri = new Uri(embeddingsEndpoint),
                    ApiKey = embeddingsApiKey,
                    DeploymentId = embeddingsDeployment,
                    ModelName = embeddingsModelName,
                };

            var skills = useOcr
                ? new List<SearchIndexerSkill> { ocrSkill, ocrMergeSkill, splitSkill, azureOpenAIEmbeddingSkill }
                : new List<SearchIndexerSkill> { splitSkill, azureOpenAIEmbeddingSkill };

            var indexProjections = new SearchIndexerIndexProjections(
                new List<SearchIndexerIndexProjectionSelector>
                {
                    new SearchIndexerIndexProjectionSelector(
                        datasourceIndex.Name,
                        parentKeyFieldName: idFieldName,
                        sourceContext: "/document/pages/*",
                        mappings: new List<InputFieldMappingEntry>
                        {
                            new InputFieldMappingEntry(contentFieldName)
                            {
                                Source = "/document/pages/*"
                            },
                            new InputFieldMappingEntry(vectorFieldName)
                            {
                                Source = "/document/pages/*/vector"
                            }
                        })
                })
                {
                    Parameters = new SearchIndexerIndexProjectionsParameters()
                    {
                        ProjectionMode = IndexProjectionMode.SkipIndexingParentDocuments
                    }
                };

            var skillset = new SearchIndexerSkillset(skillsetName, skills)
            {
                IndexProjections = indexProjections,
                CognitiveServicesAccount = useOcr
                    ? new CognitiveServicesAccountKey(aiServicesApiKey)
                    : new DefaultCognitiveServicesAccount()
            };

            return skillset;
        }