in demo-dotnet/DotNetIntegratedVectorizationDemo/Program.cs [63:171]
internal static async Task SetupAndRunIndexer(Configuration configuration, SearchIndexClient indexClient, SearchIndexerClient indexerClient, OpenAIClient openAIClient)
{
// Create an Index
Console.WriteLine("Creating/Updating the index...");
var index = GetSampleIndex(configuration);
await indexClient.CreateOrUpdateIndexAsync(index);
Console.WriteLine("Index Created/Updated!");
// Create a Data Source Connection
Console.WriteLine("Creating/Updating the data source connection...");
var dataSource = new SearchIndexerDataSourceConnection(
$"{configuration.IndexName}-blob",
SearchIndexerDataSourceType.AzureBlob,
connectionString: configuration.BlobConnectionString,
container: new SearchIndexerDataContainer(configuration.BlobContainerName));
indexerClient.CreateOrUpdateDataSourceConnection(dataSource);
Console.WriteLine("Data Source Created/Updated!");
// Create a Skillset
Console.WriteLine("Creating/Updating the skillset...");
var skillset = new SearchIndexerSkillset($"{configuration.IndexName}-skillset", new List<SearchIndexerSkill>
{
// Add required skills here
new SplitSkill(
new List<InputFieldMappingEntry>
{
new InputFieldMappingEntry("text") { Source = "/document/content" }
},
new List<OutputFieldMappingEntry>
{
new OutputFieldMappingEntry("textItems") { TargetName = "pages" }
})
{
Context = "/document",
TextSplitMode = TextSplitMode.Pages,
MaximumPageLength = 2000,
PageOverlapLength = 500,
},
new AzureOpenAIEmbeddingSkill(
new List<InputFieldMappingEntry>
{
new InputFieldMappingEntry("text") { Source = "/document/pages/*" }
},
new List<OutputFieldMappingEntry>
{
new OutputFieldMappingEntry("embedding") { TargetName = "vector" }
}
)
{
Context = "/document/pages/*",
ResourceUri = new Uri(configuration.AzureOpenAIEndpoint),
ApiKey = configuration.AzureOpenAIApiKey,
DeploymentId = configuration.AzureOpenAIEmbeddingDeployedModel,
}
})
{
IndexProjections = new SearchIndexerIndexProjections(new[]
{
new SearchIndexerIndexProjectionSelector(configuration.IndexName, parentKeyFieldName: "parent_id", sourceContext: "/document/pages/*", mappings: new[]
{
new InputFieldMappingEntry("chunk")
{
Source = "/document/pages/*"
},
new InputFieldMappingEntry("vector")
{
Source = "/document/pages/*/vector"
},
new InputFieldMappingEntry("title")
{
Source = "/document/metadata_storage_name"
}
})
})
{
Parameters = new SearchIndexerIndexProjectionsParameters
{
ProjectionMode = IndexProjectionMode.SkipIndexingParentDocuments
}
}
};
await indexerClient.CreateOrUpdateSkillsetAsync(skillset).ConfigureAwait(false);
Console.WriteLine("Skillset Created/Updated!");
// Create an Indexer
Console.WriteLine("Creating/Updating the indexer...");
var indexer = new SearchIndexer($"{configuration.IndexName}-indexer", dataSource.Name, configuration.IndexName)
{
Description = "Indexer to chunk documents, generate embeddings, and add to the index",
Schedule = new IndexingSchedule(TimeSpan.FromDays(1))
{
StartTime = DateTimeOffset.Now
},
Parameters = new IndexingParameters()
{
BatchSize = 1,
MaxFailedItems = 0,
MaxFailedItemsPerBatch = 0,
},
SkillsetName = skillset.Name,
};
await indexerClient.CreateOrUpdateIndexerAsync(indexer).ConfigureAwait(false);
Console.WriteLine("Indexer Created/Updated!");
// Run Indexer
Console.WriteLine("Running the indexer...");
await indexerClient.RunIndexerAsync(indexer.Name).ConfigureAwait(false);
Console.WriteLine("Indexer is Running!");
}