demo-dotnet/DotNetVectorDemo/Program.cs (298 lines of code) (raw):

using System; using System.CommandLine; using System.Text.Json; using Azure; using Azure.AI.OpenAI; using Azure.Identity; using Azure.Search.Documents; using Azure.Search.Documents.Indexes; using Azure.Search.Documents.Indexes.Models; using Azure.Search.Documents.Models; using DotNetIntegratedVectorizationDemo; using Microsoft.Extensions.Configuration; using OpenAI; using OpenAI.Embeddings; using System.Reflection; namespace DotNetVectorDemo { class Program { public const string SampleVectorDocumentsPath = "vector-sample.json"; /// <summary> /// .NET Vector demo /// </summary> /// <param name="setupIndex">Indexes sample documents. text-embedding-3-small embeddings with a dimension of 1024 are used</param> /// <param name="query">Optional text of the search query. By default no query is run. Unless --textOnly is specified, this query is automatically vectorized.</param> /// <param name="filter">Optional filter of the search query. By default no filter is applied</param> /// <param name="k">How many nearest neighbors to use for vector search. Defaults to 50</param> /// <param name="top">How nany results to return. Defaults to 3</param> /// <param name="exhaustive">Optional, specifies if the query skips using the index and computes the true nearest neighbors. Can only be used with vector or hybrid queries.</param> /// <param name="textOnly">Optional, specifies if the query is vectorized before searching. If true, only the text indexed is used for search.</param> /// <param name="hybrid">Optional, specifies if the query combines text and vector results.</param> /// <param name="semantic">Optional, specifies if the semantic reranker is used to rerank results from the query.</param> /// <param name="debug">Optional, specifies if debug output is included from the query. Only valid values are disabled (default), semantic, or vector</param> static async Task Main(bool setupIndex, string query = null, string filter = null, int k = 50, int top = 3, bool exhaustive = false, bool textOnly = false, bool hybrid = false, bool semantic = false, string debug = "disabled") { var configuration = new Configuration(); new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddEnvironmentVariables() .AddJsonFile("local.settings.json") .Build() .Bind(configuration); if (textOnly && hybrid) { throw new ArgumentException("Cannot specify textOnly with hybrid", nameof(textOnly)); } if (exhaustive && textOnly) { throw new ArgumentException("Cannot specify exhaustive with textOnly", nameof(exhaustive)); } if (debug != "disabled" && debug != "semantic" && debug != "vector") { throw new ArgumentException("Debug must be disabled (default), semantic, or vector"); } configuration.Validate(); var defaultCredential = new DefaultAzureCredential(); var azureOpenAIClient = InitializeOpenAIClient(configuration, defaultCredential); var indexClient = InitializeSearchIndexClient(configuration, defaultCredential); var searchClient = indexClient.GetSearchClient(configuration.IndexName); if (setupIndex) { await SetupIndexAsync(configuration, indexClient); await UploadSampleDocumentsAsync(configuration, searchClient, SampleVectorDocumentsPath); } if (!string.IsNullOrEmpty(query)) { await Search(searchClient, query, k, top, filter, exhaustive, textOnly, hybrid, semantic, debug); } } internal static AzureOpenAIClient InitializeOpenAIClient(Configuration configuration, DefaultAzureCredential defaultCredential) { if (!string.IsNullOrEmpty(configuration.AzureOpenAIApiKey)) { return new AzureOpenAIClient(new Uri(configuration.AzureOpenAIEndpoint), new AzureKeyCredential(configuration.AzureOpenAIApiKey)); } return new AzureOpenAIClient(new Uri(configuration.AzureOpenAIEndpoint), defaultCredential); } internal static SearchIndexClient InitializeSearchIndexClient(Configuration configuration, DefaultAzureCredential defaultCredential) { if (!string.IsNullOrEmpty(configuration.AdminKey)) { return new SearchIndexClient(new Uri(configuration.ServiceEndpoint), new AzureKeyCredential(configuration.AdminKey)); } return new SearchIndexClient(new Uri(configuration.ServiceEndpoint), defaultCredential); } internal static async Task Search(SearchClient searchClient, string query, int k = 50, int top = 3, string filter = null, bool textOnly = false, bool exhaustive = false, bool hybrid = false, bool semantic = false, string debug = "disabled") { // Perform the vector similarity search var searchOptions = new SearchOptions { Filter = filter, Size = top, Select = { "title", "id", "content", }, IncludeTotalCount = true }; if (!textOnly) { searchOptions.VectorSearch = new() { Queries = { new VectorizableTextQuery(text: query) { KNearestNeighborsCount = k, Fields = { "titleVector" }, Exhaustive = exhaustive }, new VectorizableTextQuery(text: query) { KNearestNeighborsCount = k, Fields = { "contentVector" }, Exhaustive = exhaustive }, }, }; } if (semantic) { searchOptions.QueryType = SearchQueryType.Semantic; searchOptions.SemanticSearch = new SemanticSearchOptions { SemanticConfigurationName = "my-semantic-config", QueryCaption = new QueryCaption(QueryCaptionType.Extractive), QueryAnswer = new QueryAnswer(QueryAnswerType.Extractive), }; } if (!string.IsNullOrEmpty(debug) && debug != "disabled") { if (!semantic) { searchOptions.SemanticSearch = new SemanticSearchOptions(); } searchOptions.SemanticSearch.Debug = new QueryDebugMode(debug); } string queryText = (textOnly || hybrid || semantic) ? query : null; SearchResults<SearchDocument> response = await searchClient.SearchAsync<SearchDocument>(queryText, searchOptions); if (response.SemanticSearch?.Answers?.Count > 0) { Console.WriteLine("Query Answers:"); foreach (QueryAnswerResult answer in response.SemanticSearch.Answers) { Console.WriteLine($"Answer Highlights: {answer.Highlights}"); Console.WriteLine($"Answer Text: {answer.Text}"); } } await foreach (SearchResult<SearchDocument> result in response.GetResultsAsync()) { Console.WriteLine($"Title: {result.Document["title"]}"); Console.WriteLine($"Score: {result.Score}\n"); Console.WriteLine($"Content: {result.Document["content"]}"); if (result.SemanticSearch?.Captions?.Count > 0) { QueryCaptionResult firstCaption = result.SemanticSearch.Captions[0]; Console.WriteLine($"First Caption Highlights: {firstCaption.Highlights}"); Console.WriteLine($"First Caption Text: {firstCaption.Text}"); } DocumentDebugInfo debugInfo = result.DocumentDebugInfo?.FirstOrDefault(); if (debugInfo != null) { if (debugInfo.Semantic != null) { var getFieldMessage = (QueryResultDocumentSemanticField field) => $"Field {field.Name}, State {field.State}"; if (debugInfo.Semantic.TitleField != null) { Console.WriteLine($"Title {getFieldMessage(debugInfo.Semantic.TitleField)}"); } if (debugInfo.Semantic.ContentFields != null) { foreach (var contentField in debugInfo.Semantic.ContentFields) { Console.WriteLine($"Content {getFieldMessage(contentField)}"); } } if (debugInfo.Semantic.KeywordFields != null) { foreach (var keywordField in debugInfo.Semantic.KeywordFields) { Console.WriteLine($"Keyword {getFieldMessage(keywordField)}"); } } } if (debugInfo.Vectors?.Subscores != null) { if (debugInfo.Vectors.Subscores.DocumentBoost != null) { Console.WriteLine($"Document Boost: {debugInfo.Vectors.Subscores.DocumentBoost}"); } if (debugInfo.Vectors.Subscores.Text != null) { Console.WriteLine($"Document Text Score: {debugInfo.Vectors.Subscores.Text.SearchScore}"); } int index = 1; foreach (IDictionary<string, SingleVectorFieldResult> querySubscore in debugInfo.Vectors.Subscores.Vectors) { Console.WriteLine($"Vector Query {index} Debug Info:"); foreach (KeyValuePair<string, SingleVectorFieldResult> fieldSubscore in querySubscore) { Console.WriteLine($"Vector Field: {fieldSubscore.Key}"); Console.WriteLine($"Vector Field @search.score: {fieldSubscore.Value.SearchScore}"); Console.WriteLine($"Vector Field similarity: {fieldSubscore.Value.VectorSimilarity}"); } index++; } } } } Console.WriteLine($"Total Results: {response.TotalCount}"); } internal static async Task SetupIndexAsync(Configuration configuration, SearchIndexClient indexClient) { const string vectorSearchHnswProfile = "my-vector-profile"; const string vectorSearchHnswConfig = "myHnsw"; const string vectorSearchVectorizer = "myOpenAIVectorizer"; const string semanticSearchConfig = "my-semantic-config"; SearchIndex searchIndex = new(configuration.IndexName) { VectorSearch = new() { Profiles = { new VectorSearchProfile(vectorSearchHnswProfile, vectorSearchHnswConfig) { VectorizerName = vectorSearchVectorizer } }, Algorithms = { new HnswAlgorithmConfiguration(vectorSearchHnswConfig) }, Vectorizers = { new AzureOpenAIVectorizer(vectorSearchVectorizer) { Parameters = new AzureOpenAIVectorizerParameters { ResourceUri = new Uri(configuration.AzureOpenAIEndpoint), ModelName = configuration.AzureOpenAIEmbeddingModel, DeploymentName = configuration.AzureOpenAIEmbeddingDeployment } } } }, SemanticSearch = new() { Configurations = { new SemanticConfiguration(semanticSearchConfig, new() { TitleField = new SemanticField("title"), ContentFields = { new SemanticField("content") }, KeywordsFields = { new SemanticField("category") } }) }, }, Fields = { new SimpleField("id", SearchFieldDataType.String) { IsKey = true, IsFilterable = true, IsSortable = true, IsFacetable = true }, new SearchableField("title") { IsFilterable = true, IsSortable = true }, new SearchableField("content") { IsFilterable = true }, new SearchField("titleVector", SearchFieldDataType.Collection(SearchFieldDataType.Single)) { IsSearchable = true, VectorSearchDimensions = int.Parse(configuration.AzureOpenAIEmbeddingDimensions), VectorSearchProfileName = vectorSearchHnswProfile }, new SearchField("contentVector", SearchFieldDataType.Collection(SearchFieldDataType.Single)) { IsSearchable = true, VectorSearchDimensions = int.Parse(configuration.AzureOpenAIEmbeddingDimensions), VectorSearchProfileName = vectorSearchHnswProfile }, new SearchableField("category") { IsFilterable = true, IsSortable = true, IsFacetable = true } } }; await indexClient.CreateOrUpdateIndexAsync(searchIndex); } internal static async Task UploadSampleDocumentsAsync(Configuration configuration, SearchClient searchClient, string sampleDocumentsPath) { string sampleDocumentContent = File.ReadAllText(sampleDocumentsPath); var sampleDocuments = JsonSerializer.Deserialize<List<Dictionary<string, object>>>(sampleDocumentContent); var options = new SearchIndexingBufferedSenderOptions<Dictionary<string, object>> { KeyFieldAccessor = (o) => o["id"].ToString() }; using SearchIndexingBufferedSender<Dictionary<string, object>> bufferedSender = new(searchClient, options); await bufferedSender.UploadDocumentsAsync(sampleDocuments); await bufferedSender.FlushAsync(); } /// <summary> /// Generates embeddings for sample documents and saves the output to a specified path. /// </summary> /// <param name="configuration">The configuration settings for the Azure OpenAI service.</param> /// <param name="azureOpenAIClient">The AzureOpenAIClient instance for embedding generation.</param> /// <param name="inputSampleDocumentPath">The file path of the input sample document containing JSON content.</param> /// <param name="outputSampleDocumentPath">The file path where the output with embeddings will be saved.</param> internal static async Task GenerateAndSaveSampleDocumentsAsync(Configuration configuration, AzureOpenAIClient azureOpenAIClient, string inputSampleDocumentPath, string outputSampleDocumentPath) { string sampleDocumentContent = File.ReadAllText(inputSampleDocumentPath); var sampleDocuments = JsonSerializer.Deserialize<List<Dictionary<string, object>>>(sampleDocumentContent); EmbeddingClient embeddingClient = azureOpenAIClient.GetEmbeddingClient(configuration.AzureOpenAIEmbeddingDeployment); var embeddingOptions = new EmbeddingGenerationOptions { Dimensions = int.Parse(configuration.AzureOpenAIEmbeddingDimensions) }; foreach (Dictionary<string, object> sampleDocument in sampleDocuments) { string title = sampleDocument["title"]?.ToString() ?? string.Empty; string content = sampleDocument["content"]?.ToString() ?? string.Empty; OpenAIEmbedding titleEmbedding = await embeddingClient.GenerateEmbeddingAsync(title, embeddingOptions); OpenAIEmbedding contentEmbedding = await embeddingClient.GenerateEmbeddingAsync(content, embeddingOptions); sampleDocument["titleVector"] = titleEmbedding.ToFloats(); sampleDocument["contentVector"] = contentEmbedding.ToFloats(); } string serializedSampleDocuments = JsonSerializer.Serialize(sampleDocuments); File.WriteAllText(outputSampleDocumentPath, serializedSampleDocuments); } } }