demo-dotnet/DotNetIntegratedVectorizationDemo/Program.cs (296 lines of code) (raw):

using Azure; using Azure.AI.OpenAI; using Azure.Identity; using Azure.Search.Documents; using Azure.Search.Documents.Indexes; using Azure.Search.Documents.Indexes.Models; using Azure.Search.Documents.Models; using Microsoft.Extensions.Configuration; namespace DotNetIntegratedVectorizationDemo { class Program { /// <summary> /// .NET Integrated Vectorization Demo /// </summary> /// <param name="setupAndRunIndexer">Sets up integrated vectorization indexer with a skillset.</param> /// <param name="query">Optional text of the search query. By default no query is run. Unless --textOnly is specified, this query is automatically vectorized.</param> /// <param name="filter">Optional filter of the search query. By default no filter is applied</param> /// <param name="k">How many results to return if running a query.</param> /// <param name="exhaustive">Optional, specifies if the query skips using the index and computes the true nearest neighbors. Can only be used with vector or hybrid queries.</param> /// <param name="textOnly">Optional, specifies if the query is vectorized before searching. If true, only the text indexed is used for search.</param> /// <param name="hybrid">Optional, specifies if the query combines text and vector results.</param> /// <param name="semantic">Optional, specifies if the semantic reranker is used to rerank results from the query.</param> static async Task Main(bool setupAndRunIndexer, string query = null, string filter = null, int k = 3, bool exhaustive = false, bool textOnly = false, bool hybrid = false, bool semantic = false) { var configuration = new Configuration(); new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddEnvironmentVariables() .AddJsonFile("local.settings.json") .Build() .Bind(configuration); if (textOnly && hybrid) { throw new ArgumentException("Cannot specify textOnly with hybrid", nameof(textOnly)); } if (exhaustive && textOnly) { throw new ArgumentException("Cannot specify exhaustive with textOnly", nameof(exhaustive)); } configuration.Validate(); var defaultCredential = new DefaultAzureCredential(); var openAIClient = InitializeOpenAIClient(configuration, defaultCredential); var indexClient = InitializeSearchIndexClient(configuration, defaultCredential); var indexerClient = InitializeSearchIndexerClient(configuration, defaultCredential); var searchClient = indexClient.GetSearchClient(configuration.IndexName); if (setupAndRunIndexer) { await SetupAndRunIndexer(configuration, indexClient, indexerClient, openAIClient); } if (!string.IsNullOrEmpty(query)) { await Search(searchClient, query, k, filter, exhaustive, textOnly, hybrid, semantic); } } internal static async Task SetupAndRunIndexer(Configuration configuration, SearchIndexClient indexClient, SearchIndexerClient indexerClient, OpenAIClient openAIClient) { // Create an Index Console.WriteLine("Creating/Updating the index..."); var index = GetSampleIndex(configuration); await indexClient.CreateOrUpdateIndexAsync(index); Console.WriteLine("Index Created/Updated!"); // Create a Data Source Connection Console.WriteLine("Creating/Updating the data source connection..."); var dataSource = new SearchIndexerDataSourceConnection( $"{configuration.IndexName}-blob", SearchIndexerDataSourceType.AzureBlob, connectionString: configuration.BlobConnectionString, container: new SearchIndexerDataContainer(configuration.BlobContainerName)); indexerClient.CreateOrUpdateDataSourceConnection(dataSource); Console.WriteLine("Data Source Created/Updated!"); // Create a Skillset Console.WriteLine("Creating/Updating the skillset..."); var skillset = new SearchIndexerSkillset($"{configuration.IndexName}-skillset", new List<SearchIndexerSkill> { // Add required skills here new SplitSkill( new List<InputFieldMappingEntry> { new InputFieldMappingEntry("text") { Source = "/document/content" } }, new List<OutputFieldMappingEntry> { new OutputFieldMappingEntry("textItems") { TargetName = "pages" } }) { Context = "/document", TextSplitMode = TextSplitMode.Pages, MaximumPageLength = 2000, PageOverlapLength = 500, }, new AzureOpenAIEmbeddingSkill( new List<InputFieldMappingEntry> { new InputFieldMappingEntry("text") { Source = "/document/pages/*" } }, new List<OutputFieldMappingEntry> { new OutputFieldMappingEntry("embedding") { TargetName = "vector" } } ) { Context = "/document/pages/*", ResourceUri = new Uri(configuration.AzureOpenAIEndpoint), ApiKey = configuration.AzureOpenAIApiKey, DeploymentId = configuration.AzureOpenAIEmbeddingDeployedModel, } }) { IndexProjections = new SearchIndexerIndexProjections(new[] { new SearchIndexerIndexProjectionSelector(configuration.IndexName, parentKeyFieldName: "parent_id", sourceContext: "/document/pages/*", mappings: new[] { new InputFieldMappingEntry("chunk") { Source = "/document/pages/*" }, new InputFieldMappingEntry("vector") { Source = "/document/pages/*/vector" }, new InputFieldMappingEntry("title") { Source = "/document/metadata_storage_name" } }) }) { Parameters = new SearchIndexerIndexProjectionsParameters { ProjectionMode = IndexProjectionMode.SkipIndexingParentDocuments } } }; await indexerClient.CreateOrUpdateSkillsetAsync(skillset).ConfigureAwait(false); Console.WriteLine("Skillset Created/Updated!"); // Create an Indexer Console.WriteLine("Creating/Updating the indexer..."); var indexer = new SearchIndexer($"{configuration.IndexName}-indexer", dataSource.Name, configuration.IndexName) { Description = "Indexer to chunk documents, generate embeddings, and add to the index", Schedule = new IndexingSchedule(TimeSpan.FromDays(1)) { StartTime = DateTimeOffset.Now }, Parameters = new IndexingParameters() { BatchSize = 1, MaxFailedItems = 0, MaxFailedItemsPerBatch = 0, }, SkillsetName = skillset.Name, }; await indexerClient.CreateOrUpdateIndexerAsync(indexer).ConfigureAwait(false); Console.WriteLine("Indexer Created/Updated!"); // Run Indexer Console.WriteLine("Running the indexer..."); await indexerClient.RunIndexerAsync(indexer.Name).ConfigureAwait(false); Console.WriteLine("Indexer is Running!"); } internal static OpenAIClient InitializeOpenAIClient(Configuration configuration, DefaultAzureCredential defaultCredential) { if (!string.IsNullOrEmpty(configuration.AzureOpenAIApiKey)) { return new OpenAIClient(new Uri(configuration.AzureOpenAIEndpoint), new AzureKeyCredential(configuration.AzureOpenAIApiKey)); } return new OpenAIClient(new Uri(configuration.AzureOpenAIEndpoint), defaultCredential); } internal static SearchIndexClient InitializeSearchIndexClient(Configuration configuration, DefaultAzureCredential defaultCredential) { if (!string.IsNullOrEmpty(configuration.AdminKey)) { return new SearchIndexClient(new Uri(configuration.ServiceEndpoint), new AzureKeyCredential(configuration.AdminKey)); } return new SearchIndexClient(new Uri(configuration.ServiceEndpoint), defaultCredential); } internal static SearchIndexerClient InitializeSearchIndexerClient(Configuration configuration, DefaultAzureCredential defaultCredential) { if (!string.IsNullOrEmpty(configuration.AdminKey)) { return new SearchIndexerClient(new Uri(configuration.ServiceEndpoint), new AzureKeyCredential(configuration.AdminKey)); } return new SearchIndexerClient(new Uri(configuration.ServiceEndpoint), defaultCredential); } internal static SearchIndex GetSampleIndex(Configuration configuration) { const string vectorSearchHnswProfile = "my-vector-profile"; const string vectorSearchExhasutiveKnnProfile = "myExhaustiveKnnProfile"; const string vectorSearchHnswConfig = "myHnsw"; const string vectorSearchExhaustiveKnnConfig = "myExhaustiveKnn"; const string vectorSearchVectorizer = "myOpenAIVectorizer"; const string semanticSearchConfig = "my-semantic-config"; const int modelDimensions = 1536; SearchIndex searchIndex = new(configuration.IndexName) { VectorSearch = new() { Profiles = { new VectorSearchProfile(vectorSearchHnswProfile, vectorSearchHnswConfig) { Vectorizer = vectorSearchVectorizer }, new VectorSearchProfile(vectorSearchExhasutiveKnnProfile, vectorSearchExhaustiveKnnConfig) }, Algorithms = { new HnswAlgorithmConfiguration(vectorSearchHnswConfig), new ExhaustiveKnnAlgorithmConfiguration(vectorSearchExhaustiveKnnConfig) }, Vectorizers = { new AzureOpenAIVectorizer(vectorSearchVectorizer) { AzureOpenAIParameters = new AzureOpenAIParameters() { ResourceUri = new Uri(configuration.AzureOpenAIEndpoint), ApiKey = configuration.AzureOpenAIApiKey, DeploymentId = configuration.AzureOpenAIEmbeddingDeployedModel, } } } }, SemanticSearch = new() { Configurations = { new SemanticConfiguration(semanticSearchConfig, new() { TitleField = new SemanticField(fieldName: "title"), ContentFields = { new SemanticField(fieldName: "chunk") }, }) }, }, Fields = { new SearchableField("parent_id") { IsFilterable = true, IsSortable = true, IsFacetable = true }, new SearchableField("chunk_id") { IsKey = true, IsFilterable = true, IsSortable = true, IsFacetable = true, AnalyzerName = LexicalAnalyzerName.Keyword }, new SearchableField("title"), new SearchableField("chunk"), new SearchField("vector", SearchFieldDataType.Collection(SearchFieldDataType.Single)) { IsSearchable = true, VectorSearchDimensions = modelDimensions, VectorSearchProfileName = vectorSearchHnswProfile }, new SearchableField("category") { IsFilterable = true, IsSortable = true, IsFacetable = true }, }, }; return searchIndex; } internal static async Task Search(SearchClient searchClient, string query, int k = 3, string filter = null, bool textOnly = false, bool exhaustive = false, bool hybrid = false, bool semantic = false) { // Perform the vector similarity search var searchOptions = new SearchOptions { Filter = filter, Size = k, Select = { "title", "chunk_id", "chunk", }, IncludeTotalCount = true }; if (!textOnly) { searchOptions.VectorSearch = new() { Queries = { new VectorizableTextQuery(text: query) { KNearestNeighborsCount = k, Fields = { "vector" }, Exhaustive = exhaustive } }, }; } if (semantic) { searchOptions.QueryType = SearchQueryType.Semantic; searchOptions.SemanticSearch = new SemanticSearchOptions { SemanticConfigurationName = "my-semantic-config", QueryCaption = new QueryCaption(QueryCaptionType.Extractive), QueryAnswer = new QueryAnswer(QueryAnswerType.Extractive) }; } string queryText = (textOnly || hybrid || semantic) ? query : null; SearchResults<SearchDocument> response = await searchClient.SearchAsync<SearchDocument>(queryText, searchOptions); if (response.SemanticSearch?.Answers?.Count > 0) { Console.WriteLine("Query Answers:"); foreach (QueryAnswerResult answer in response.SemanticSearch.Answers) { Console.WriteLine($"Answer Highlights: {answer.Highlights}"); Console.WriteLine($"Answer Text: {answer.Text}"); } } await foreach (SearchResult<SearchDocument> result in response.GetResultsAsync()) { Console.WriteLine($"Title: {result.Document["title"]}"); Console.WriteLine($"Score: {result.Score}\n"); Console.WriteLine($"Content: {result.Document["chunk"]}"); if (result.SemanticSearch?.Captions?.Count > 0) { QueryCaptionResult firstCaption = result.SemanticSearch.Captions[0]; Console.WriteLine($"First Caption Highlights: {firstCaption.Highlights}"); Console.WriteLine($"First Caption Text: {firstCaption.Text}"); } } Console.WriteLine($"Total Results: {response.TotalCount}"); } } }