src/WebJobs.Extensions.OpenAI/Embeddings/EmbeddingsBaseAttribute.cs (24 lines of code) (raw):

// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. using Microsoft.Azure.WebJobs.Description; using Microsoft.Azure.WebJobs.Extensions.OpenAI.Models; namespace Microsoft.Azure.WebJobs.Extensions.OpenAI.Embeddings; /// <summary> /// Input binding attribute for converting function trigger input into OpenAI embeddings. /// </summary> /// <remarks> /// More information on OpenAI embeddings can be found at /// https://platform.openai.com/docs/guides/embeddings/what-are-embeddings. /// </remarks> [Binding] [AttributeUsage(AttributeTargets.Parameter)] public class EmbeddingsBaseAttribute : Attribute { /// <summary> /// Initializes a new instance of the <see cref="EmbeddingsBaseAttribute"/> class with the specified input. /// </summary> /// <param name="input">The input source containing the data to generate embeddings for.</param> /// <param name="inputType">The type of the input.</param> /// <exception cref="ArgumentNullException">Thrown if <paramref name="input"/> is <c>null</c>.</exception> public EmbeddingsBaseAttribute(string input, InputType inputType) { this.Input = string.IsNullOrEmpty(input) ? throw new ArgumentException("Input cannot be null or empty.", nameof(input)) : input; this.InputType = inputType; } /// <summary> /// Gets or sets the name of the configuration section for AI service connectivity settings. /// </summary> /// <remarks> /// This property specifies the name of the configuration section that contains connection details for the AI service. /// /// For Azure OpenAI: /// - If specified, looks for "Endpoint" and "Key" values in this configuration section /// - If not specified or the section doesn't exist, falls back to environment variables: /// AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_KEY /// - For user-assigned managed identity authentication, configuration section is required /// /// For OpenAI: /// - For OpenAI service (non-Azure), set the OPENAI_API_KEY environment variable. /// </remarks> public string AIConnectionName { get; set; } = ""; /// <summary> /// Gets or sets the ID of the model to use. /// </summary> /// <remarks> /// Changing the default embeddings model is a breaking change, since any changes will be stored in a vector database for lookup. Changing the default model can cause the lookups to start misbehaving if they don't match the data that was previously ingested into the vector database. /// </remarks> [AutoResolve] public string EmbeddingsModel { get; set; } = OpenAIModels.DefaultEmbeddingsModel; /// <summary> /// Gets or sets the maximum number of characters to chunk the input into. /// </summary> /// <remarks> /// <para> /// At the time of writing, the maximum input tokens allowed for second-generation input embedding models /// like <c>text-embedding-ada-002</c> is 8191. 1 token is ~4 chars in English, which translates to roughly 32K /// characters of English input that can fit into a single chunk. /// </para> /// </remarks> public int MaxChunkLength { get; set; } = 8 * 1024; // REVIEW: Is 8K a good default? /// <summary> /// Gets or sets the maximum number of characters to overlap between chunks. /// </summary> public int MaxOverlap { get; set; } = 128; /// <summary> /// Gets the input to generate embeddings for. /// </summary> [AutoResolve] public string Input { get; } /// <summary> /// Gets the type of the input. /// </summary> public InputType InputType { get; } }