specification/ai/data-plane/ModelInference/openapi/2025-04-01/openapi.yaml (1,439 lines of code) (raw):
openapi: 3.0.0
info:
title: AI Model Inference
version: '2025-04-01'
tags: []
paths:
/chat/completions:
post:
operationId: getChatCompletions
description: |-
Gets chat completions for the provided chat messages.
Completions support a wide variety of tasks and generate text that continues from or "completes"
provided prompt data. The method makes a REST API call to the `/chat/completions` route
on the given endpoint.
parameters:
- $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
- name: extra-parameters
in: header
required: false
description: |-
Controls what happens if extra parameters, undefined by the REST API,
are passed in the JSON request payload.
This sets the HTTP request header `extra-parameters`.
schema:
$ref: '#/components/schemas/ExtraParameters'
responses:
'200':
description: The request has succeeded.
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletions'
default:
description: An unexpected error response.
headers:
x-ms-error-code:
required: false
description: String error code indicating what went wrong.
schema:
type: string
content:
application/json:
schema:
$ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionsOptions'
description: The parameters of the chat completions request.
/embeddings:
post:
operationId: getEmbeddings
description: |-
Return the embedding vectors for given text prompts.
The method makes a REST API call to the `/embeddings` route on the given endpoint.
parameters:
- $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
- name: extra-parameters
in: header
required: false
description: |-
Controls what happens if extra parameters, undefined by the REST API,
are passed in the JSON request payload.
This sets the HTTP request header `extra-parameters`.
schema:
$ref: '#/components/schemas/ExtraParameters'
responses:
'200':
description: The request has succeeded.
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsResult'
default:
description: An unexpected error response.
headers:
x-ms-error-code:
required: false
description: String error code indicating what went wrong.
schema:
type: string
content:
application/json:
schema:
$ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsOptions'
description: The parameters of the embeddings request.
/images/embeddings:
post:
operationId: getImageEmbeddings
description: |-
Return the embedding vectors for given images.
The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
parameters:
- $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
- name: extra-parameters
in: header
required: false
description: |-
Controls what happens if extra parameters, undefined by the REST API,
are passed in the JSON request payload.
This sets the HTTP request header `extra-parameters`.
schema:
$ref: '#/components/schemas/ExtraParameters'
responses:
'200':
description: The request has succeeded.
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsResult'
default:
description: An unexpected error response.
headers:
x-ms-error-code:
required: false
description: String error code indicating what went wrong.
schema:
type: string
content:
application/json:
schema:
$ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ImageEmbeddingsOptions'
description: The parameters of the image embeddings request.
/info:
get:
operationId: getModelInfo
description: |-
Returns information about the AI model deployed.
The method makes a REST API call to the `/info` route on the given endpoint.
This method will only work when using Serverless API, Managed Compute, or Model .
inference endpoint. Azure OpenAI endpoints don't support i.
parameters:
- $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter'
- name: model
in: query
required: false
description: The model deployment name you want information from.
schema:
type: string
explode: false
responses:
'200':
description: The request has succeeded.
content:
application/json:
schema:
$ref: '#/components/schemas/ModelInfo'
default:
description: An unexpected error response.
headers:
x-ms-error-code:
required: false
description: String error code indicating what went wrong.
schema:
type: string
content:
application/json:
schema:
$ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse'
security:
- ApiKeyAuth: []
- BearerAuth: []
- OAuth2Auth:
- https://cognitiveservices.azure.com/.default
components:
parameters:
Azure.Core.Foundations.ApiVersionParameter:
name: api-version
in: query
required: true
description: The API version to use for this operation.
schema:
type: string
minLength: 1
explode: false
schemas:
AudioContentFormat:
anyOf:
- type: string
- type: string
enum:
- wav
- mp3
description: A representation of the possible audio formats for audio.
Azure.Core.Foundations.Error:
type: object
required:
- code
- message
properties:
code:
type: string
description: One of a server-defined set of error codes.
message:
type: string
description: A human-readable representation of the error.
target:
type: string
description: The target of the error.
details:
type: array
items:
$ref: '#/components/schemas/Azure.Core.Foundations.Error'
description: An array of details about specific errors that led to this reported error.
innererror:
allOf:
- $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
description: An object containing more specific information than the current object about the error.
description: The error object.
Azure.Core.Foundations.ErrorResponse:
type: object
required:
- error
properties:
error:
allOf:
- $ref: '#/components/schemas/Azure.Core.Foundations.Error'
description: The error object.
description: A response containing error details.
Azure.Core.Foundations.InnerError:
type: object
properties:
code:
type: string
description: One of a server-defined set of error codes.
innererror:
allOf:
- $ref: '#/components/schemas/Azure.Core.Foundations.InnerError'
description: Inner error.
description: An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/microsoft/api-guidelines/blob/vNext/azure/Guidelines.md#handling-errors.
ChatChoice:
type: object
required:
- index
- finish_reason
- message
properties:
index:
type: integer
format: int32
description: The ordered index associated with this chat completions choice.
finish_reason:
allOf:
- $ref: '#/components/schemas/CompletionsFinishReason'
nullable: true
description: The reason that this chat completions choice completed its generated.
readOnly: true
message:
allOf:
- $ref: '#/components/schemas/ChatResponseMessage'
description: The chat message for a given chat completions prompt.
readOnly: true
description: |-
The representation of a single prompt completion as part of an overall chat completions request.
Generally, `n` choices are generated per provided prompt with a default value of 1.
Token limits and other settings may limit the number of choices generated.
ChatCompletions:
type: object
required:
- id
- object
- created
- model
- choices
- usage
properties:
id:
type: string
description: A unique identifier associated with this chat completions response.
object:
type: string
enum:
- chat.completion
description: The response object type, which is always `chat.completion`.
created:
type: integer
format: unixtime
description: |-
The first timestamp associated with generation activity for this completions response,
represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
readOnly: true
model:
type: string
description: The model used for the chat completion.
readOnly: true
choices:
type: array
items:
$ref: '#/components/schemas/ChatChoice'
minItems: 1
description: |-
The collection of completions choices associated with this completions response.
Generally, `n` choices are generated per provided prompt with a default value of 1.
Token limits and other settings may limit the number of choices generated.
readOnly: true
usage:
allOf:
- $ref: '#/components/schemas/CompletionsUsage'
description: ' Usage information for tokens processed and generated as part of this completions operation.'
readOnly: true
description: |-
Representation of the response data from a chat completions request.
Completions support a wide variety of tasks and generate text that continues from or "completes"
provided prompt data.
ChatCompletionsAudio:
type: object
required:
- id
- expires_at
- data
- transcript
properties:
id:
type: string
description: |2-
Unique identifier for the audio response. This value can be used in chat history messages instead of passing
the full audio object.
readOnly: true
expires_at:
type: integer
format: unixtime
description: |-
The Unix timestamp (in seconds) at which the audio piece expires and can't be any longer referenced by its ID in
multi-turn conversations.
readOnly: true
data:
type: string
description: Base64 encoded audio data
readOnly: true
format:
allOf:
- $ref: '#/components/schemas/AudioContentFormat'
description: |-
The format of the audio content. If format is not provided, it will match the format used in the
input audio request.
readOnly: true
transcript:
type: string
description: The transcript of the audio file.
readOnly: true
description: A representation of the audio generated by the model.
ChatCompletionsModality:
anyOf:
- type: string
- type: string
enum:
- text
- audio
description: The modalities that the model is allowed to use for the chat completions response.
ChatCompletionsNamedToolChoice:
type: object
required:
- type
- function
properties:
type:
type: string
enum:
- function
description: The type of the tool. Currently, only `function` is supported.
function:
allOf:
- $ref: '#/components/schemas/ChatCompletionsNamedToolChoiceFunction'
description: The function that should be called.
description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
ChatCompletionsNamedToolChoiceFunction:
type: object
required:
- name
properties:
name:
type: string
description: The name of the function that should be called.
description: A tool selection of a specific, named function tool that will limit chat completions to using the named function.
ChatCompletionsOptions:
type: object
required:
- messages
properties:
messages:
type: array
items:
$ref: '#/components/schemas/ChatRequestMessage'
minItems: 1
description: |-
The collection of context messages associated with this chat completions request.
Typical usage begins with a chat message for the System role that provides instructions for
the behavior of the assistant, followed by alternating messages between the User and
Assistant roles.
frequency_penalty:
type: number
format: float
minimum: -2
maximum: 2
description: |-
A value that influences the probability of generated tokens appearing based on their cumulative
frequency in generated text.
Positive values will make tokens less likely to appear as their frequency increases and
decrease the likelihood of the model repeating the same statements verbatim.
Supported range is [-2, 2].
default: 0
stream:
type: boolean
description: A value indicating whether chat completions should be streamed for this request.
presence_penalty:
type: number
format: float
minimum: -2
maximum: 2
description: |-
A value that influences the probability of generated tokens appearing based on their existing
presence in generated text.
Positive values will make tokens less likely to appear when they already exist and increase the
model's likelihood to output new topics.
Supported range is [-2, 2].
default: 0
temperature:
type: number
format: float
minimum: 0
maximum: 1
description: |-
The sampling temperature to use that controls the apparent creativity of generated completions.
Higher values will make output more random while lower values will make results more focused
and deterministic.
It is not recommended to modify temperature and top_p for the same completions request as the
interaction of these two settings is difficult to predict.
Supported range is [0, 1].
default: 0.7
top_p:
type: number
format: float
minimum: 0
maximum: 1
description: |-
An alternative to sampling with temperature called nucleus sampling. This value causes the
model to consider the results of tokens with the provided probability mass. As an example, a
value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
considered.
It is not recommended to modify temperature and top_p for the same completions request as the
interaction of these two settings is difficult to predict.
Supported range is [0, 1].
default: 1
max_tokens:
type: integer
format: int32
minimum: 0
description: The maximum number of tokens to generate.
response_format:
allOf:
- $ref: '#/components/schemas/ChatCompletionsResponseFormat'
description: |-
An object specifying the format that the model must output.
Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
stop:
type: array
items:
type: string
minItems: 1
description: A collection of textual sequences that will end completions generation.
tools:
type: array
items:
$ref: '#/components/schemas/ChatCompletionsToolDefinition'
minItems: 1
description: |-
A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
may response with a function call request and provide the input arguments in JSON format for that function.
tool_choice:
anyOf:
- $ref: '#/components/schemas/ChatCompletionsToolChoicePreset'
- $ref: '#/components/schemas/ChatCompletionsNamedToolChoice'
description: If specified, the model will configure which of the provided tools it can use for the chat completions response.
seed:
type: integer
format: int64
description: |-
If specified, the system will make a best effort to sample deterministically such that repeated requests with the
same seed and parameters should return the same result. Determinism is not guaranteed.
model:
type: string
description: ID of the specific AI model to use, if more than one model is available on the endpoint.
modalities:
type: array
items:
$ref: '#/components/schemas/ChatCompletionsModality'
description: |-
The modalities that the model is allowed to use for the chat completions response. The default modality
is `text`. Indicating an unsupported modality combination results in an 422 error.
additionalProperties: {}
description: |-
The configuration information for a chat completions request.
Completions support a wide variety of tasks and generate text that continues from or "completes"
provided prompt data.
ChatCompletionsResponseFormat:
type: object
required:
- type
properties:
type:
type: string
description: The response format type to use for chat completions.
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/ChatCompletionsResponseFormatText'
json_object: '#/components/schemas/ChatCompletionsResponseFormatJsonObject'
json_schema: '#/components/schemas/ChatCompletionsResponseFormatJsonSchema'
description: |-
Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.
Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
via a system or user message.
ChatCompletionsResponseFormatJsonObject:
type: object
required:
- type
properties:
type:
type: string
enum:
- json_object
description: "Response format type: always 'json_object' for this object."
allOf:
- $ref: '#/components/schemas/ChatCompletionsResponseFormat'
description: |-
A response format for Chat Completions that restricts responses to emitting valid JSON objects.
Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
via a system or user message.
ChatCompletionsResponseFormatJsonSchema:
type: object
required:
- type
- json_schema
properties:
type:
type: string
enum:
- json_schema
description: 'The type of response format being defined: `json_schema`'
json_schema:
allOf:
- $ref: '#/components/schemas/ChatCompletionsResponseFormatJsonSchemaDefinition'
description: The definition of the required JSON schema in the response, and associated metadata.
allOf:
- $ref: '#/components/schemas/ChatCompletionsResponseFormat'
description: |-
A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a
JSON schema specified by the caller.
ChatCompletionsResponseFormatJsonSchemaDefinition:
type: object
required:
- name
- schema
properties:
name:
type: string
description: The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
schema:
type: object
additionalProperties: {}
description: The definition of the JSON schema
description:
type: string
description: A description of the response format, used by the AI model to determine how to generate responses in this format.
strict:
type: boolean
description: |-
Whether to enable strict schema adherence when generating the output.
If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of
JSON Schema is supported when `strict` is `true`.
default: false
description: The definition of the required JSON schema in the response, and associated metadata.
ChatCompletionsResponseFormatText:
type: object
required:
- type
properties:
type:
type: string
enum:
- text
description: "Response format type: always 'text' for this object."
allOf:
- $ref: '#/components/schemas/ChatCompletionsResponseFormat'
description: A response format for Chat Completions that emits text responses. This is the default response format.
ChatCompletionsToolCall:
type: object
required:
- id
- type
- function
properties:
id:
type: string
description: The ID of the tool call.
type:
type: string
enum:
- function
description: The type of tool call. Currently, only `function` is supported.
function:
allOf:
- $ref: '#/components/schemas/FunctionCall'
description: The details of the function call requested by the AI model.
description: A function tool call requested by the AI model.
ChatCompletionsToolChoicePreset:
anyOf:
- type: string
- type: string
enum:
- auto
- none
- required
description: Represents a generic policy for how a chat completions tool may be selected.
ChatCompletionsToolDefinition:
type: object
required:
- type
- function
properties:
type:
type: string
enum:
- function
description: The type of the tool. Currently, only `function` is supported.
function:
allOf:
- $ref: '#/components/schemas/FunctionDefinition'
description: The function definition details for the function tool.
description: The definition of a chat completions tool that can call a function.
ChatMessageAudioContentItem:
type: object
required:
- type
- audio_url
properties:
type:
type: string
enum:
- audio_url
description: "The discriminated object type: always 'image_url' for this type."
audio_url:
allOf:
- $ref: '#/components/schemas/ChatMessageAudioUrl'
description: An internet location, which must be accessible to the model, from which the audio may be retrieved.
allOf:
- $ref: '#/components/schemas/ChatMessageContentItem'
description: A structured chat content item containing an audio reference.
ChatMessageAudioUrl:
type: object
required:
- url
properties:
url:
type: string
description: The URL of the audio.
description: An internet location from which the model may retrieve an audio.
ChatMessageContentItem:
type: object
required:
- type
properties:
type:
type: string
description: The discriminated object type.
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/ChatMessageTextContentItem'
image_url: '#/components/schemas/ChatMessageImageContentItem'
audio_url: '#/components/schemas/ChatMessageAudioContentItem'
input_audio: '#/components/schemas/ChatMessageInputAudioContentItem'
description: An abstract representation of a structured content item within a chat message.
ChatMessageImageContentItem:
type: object
required:
- type
- image_url
properties:
type:
type: string
enum:
- image_url
description: "The discriminated object type: always 'image_url' for this type."
image_url:
allOf:
- $ref: '#/components/schemas/ChatMessageImageUrl'
description: An internet location, which must be accessible to the model,from which the image may be retrieved.
allOf:
- $ref: '#/components/schemas/ChatMessageContentItem'
description: A structured chat content item containing an image reference.
ChatMessageImageDetailLevel:
anyOf:
- type: string
- type: string
enum:
- auto
- low
- high
description: A representation of the possible image detail levels for image-based chat completions message content.
ChatMessageImageUrl:
type: object
required:
- url
properties:
url:
type: string
description: The URL of the image.
detail:
allOf:
- $ref: '#/components/schemas/ChatMessageImageDetailLevel'
description: |-
The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and
accuracy.
description: An internet location from which the model may retrieve an image.
ChatMessageInputAudio:
type: object
required:
- data
- format
properties:
data:
type: string
description: Base64 encoded audio data
format:
allOf:
- $ref: '#/components/schemas/AudioContentFormat'
description: The audio format of the audio content.
description: The details of an audio chat message content part.
ChatMessageInputAudioContentItem:
type: object
required:
- type
- format
properties:
type:
type: string
enum:
- input_audio
description: "The discriminated object type: always 'input_audio' for this type."
format:
allOf:
- $ref: '#/components/schemas/AudioContentFormat'
description: The audio format of the audio reference.
allOf:
- $ref: '#/components/schemas/ChatMessageContentItem'
description: A structured chat content item containing an audio content.
ChatMessageTextContentItem:
type: object
required:
- type
- text
properties:
type:
type: string
enum:
- text
description: "The discriminated object type: always 'text' for this type."
text:
type: string
description: The content of the message.
allOf:
- $ref: '#/components/schemas/ChatMessageContentItem'
description: A structured chat content item containing plain text.
ChatRequestAssistantMessage:
type: object
required:
- role
properties:
role:
type: string
enum:
- assistant
description: The chat role associated with this message, which is always 'assistant' for assistant messages.
content:
type: string
description: The content of the message.
tool_calls:
type: array
items:
$ref: '#/components/schemas/ChatCompletionsToolCall'
description: |-
The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
completions request to resolve as configured.
audio:
allOf:
- $ref: '#/components/schemas/ChatRequestAudioReference'
description: ' The audio generated by a previous response in a multi-turn conversation.'
allOf:
- $ref: '#/components/schemas/ChatRequestMessage'
description: A request chat message representing response or action from the assistant.
ChatRequestAudioReference:
type: object
required:
- id
properties:
id:
type: string
description: ' Unique identifier for the audio response. This value corresponds to the id of a previous audio completion.'
description: A reference to an audio response generated by the model.
ChatRequestMessage:
type: object
required:
- role
properties:
role:
allOf:
- $ref: '#/components/schemas/ChatRole'
description: The chat role associated with this message.
discriminator:
propertyName: role
mapping:
system: '#/components/schemas/ChatRequestSystemMessage'
user: '#/components/schemas/ChatRequestUserMessage'
assistant: '#/components/schemas/ChatRequestAssistantMessage'
tool: '#/components/schemas/ChatRequestToolMessage'
description: An abstract representation of a chat message as provided in a request.
ChatRequestSystemMessage:
type: object
required:
- role
- content
properties:
role:
type: string
enum:
- system
description: The chat role associated with this message, which is always 'system' for system messages.
content:
type: string
description: The contents of the system message.
allOf:
- $ref: '#/components/schemas/ChatRequestMessage'
description: |-
A request chat message containing system instructions that influence how the model will generate a chat completions
response.
ChatRequestToolMessage:
type: object
required:
- role
- tool_call_id
properties:
role:
type: string
enum:
- tool
description: The chat role associated with this message, which is always 'tool' for tool messages.
content:
type: string
description: The content of the message.
tool_call_id:
type: string
description: The ID of the tool call resolved by the provided content.
allOf:
- $ref: '#/components/schemas/ChatRequestMessage'
description: A request chat message representing requested output from a configured tool.
ChatRequestUserMessage:
type: object
required:
- role
- content
properties:
role:
type: string
enum:
- user
description: The chat role associated with this message, which is always 'user' for user messages.
content:
anyOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/ChatMessageContentItem'
description: The contents of the user message, with available input types varying by selected model.
allOf:
- $ref: '#/components/schemas/ChatRequestMessage'
description: A request chat message representing user input to the assistant.
ChatResponseMessage:
type: object
required:
- role
- content
properties:
role:
allOf:
- $ref: '#/components/schemas/ChatRole'
description: The chat role associated with the message.
readOnly: true
content:
type: string
nullable: true
description: The content of the message.
readOnly: true
reasoning_content:
type: string
description: The reasoning content the model used for generating the response
readOnly: true
tool_calls:
type: array
items:
$ref: '#/components/schemas/ChatCompletionsToolCall'
description: |-
The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
completions request to resolve as configured.
readOnly: true
audio:
allOf:
- $ref: '#/components/schemas/ChatCompletionsAudio'
description: ' The audio generated by the model as a response to the messages if the model is configured to generate audio.'
readOnly: true
description: A representation of a chat message as received in a response.
ChatRole:
anyOf:
- type: string
- type: string
enum:
- system
- developer
- user
- assistant
- tool
description: A description of the intended purpose of a message within a chat completions interaction.
CompletionsFinishReason:
anyOf:
- type: string
- type: string
enum:
- stop
- length
- content_filter
- tool_calls
description: Representation of the manner in which a completions response concluded.
CompletionsUsage:
type: object
required:
- completion_tokens
- prompt_tokens
- total_tokens
properties:
completion_tokens:
type: integer
format: int32
description: The number of tokens generated across all completions emissions.
readOnly: true
prompt_tokens:
type: integer
format: int32
description: The number of tokens in the provided prompts for the completions request.
readOnly: true
total_tokens:
type: integer
format: int32
description: The total number of tokens processed for the completions request and response.
readOnly: true
completion_tokens_details:
allOf:
- $ref: '#/components/schemas/CompletionsUsageDetails'
description: Breakdown of tokens used in a completion.
readOnly: true
prompt_tokens_details:
allOf:
- $ref: '#/components/schemas/PromptUsageDetails'
description: Breakdown of tokens used in the prompt/chat history.
readOnly: true
description: |-
Representation of the token counts processed for a completions request.
Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
other consumers.
CompletionsUsageDetails:
type: object
required:
- audio_tokens
- reasoning_tokens
- total_tokens
properties:
audio_tokens:
type: integer
format: int32
description: The number of tokens corresponding to audio input.
readOnly: true
reasoning_tokens:
type: integer
format: int32
description: The number of tokens corresponding to reasoning.
readOnly: true
total_tokens:
type: integer
format: int32
description: The total number of tokens processed for the completions request and response.
readOnly: true
description: A breakdown of tokens used in a completion.
EmbeddingEncodingFormat:
anyOf:
- type: string
- type: string
enum:
- base64
- binary
- float
- int8
- ubinary
- uint8
description: |-
Specifies the types of embeddings to generate. Compressed embeddings types like `uint8`, `int8`, `ubinary` and
`binary`, may reduce storage costs without sacrificing the integrity of the data. Returns a 422 error if the
model doesn't support the value or parameter. Read the model's documentation to know the values supported by
the your model.
EmbeddingInputType:
anyOf:
- type: string
- type: string
enum:
- text
- query
- document
description: Represents the input types used for embedding search.
EmbeddingItem:
type: object
required:
- embedding
- index
- object
properties:
embedding:
type: array
items:
type: number
format: float
description: |-
List of embedding values for the input prompt. These represent a measurement of the
vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
readOnly: true
index:
type: integer
format: int32
description: Index of the prompt to which the EmbeddingItem corresponds.
readOnly: true
object:
type: string
enum:
- embedding
description: The object type of this embeddings item. Will always be `embedding`.
description: Representation of a single embeddings relatedness comparison.
EmbeddingsOptions:
type: object
required:
- input
properties:
input:
type: array
items:
type: string
description: |-
Input text to embed, encoded as a string or array of tokens.
To embed multiple inputs in a single request, pass an array
of strings or array of token arrays.
dimensions:
type: integer
format: int32
description: |-
Optional. The number of dimensions the resulting output embeddings should have.
Passing null causes the model to use its default value.
Returns a 422 error if the model doesn't support the value or parameter.
encoding_format:
allOf:
- $ref: '#/components/schemas/EmbeddingEncodingFormat'
description: Optional. The desired format for the returned embeddings.
input_type:
allOf:
- $ref: '#/components/schemas/EmbeddingInputType'
description: |-
Optional. The type of the input.
Returns a 422 error if the model doesn't support the value or parameter.
model:
type: string
description: ID of the specific AI model to use, if more than one model is available on the endpoint.
additionalProperties: {}
description: The configuration information for an embeddings request.
EmbeddingsResult:
type: object
required:
- id
- data
- usage
- object
- model
properties:
id:
type: string
description: Unique identifier for the embeddings result.
readOnly: true
data:
type: array
items:
$ref: '#/components/schemas/EmbeddingItem'
description: Embedding values for the prompts submitted in the request.
readOnly: true
usage:
allOf:
- $ref: '#/components/schemas/EmbeddingsUsage'
description: Usage counts for tokens input using the embeddings API.
readOnly: true
object:
type: string
enum:
- list
description: The object type of the embeddings result. Will always be `list`.
model:
type: string
description: The model ID used to generate this result.
readOnly: true
description: |-
Representation of the response data from an embeddings request.
Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
recommendations, and other similar scenarios.
EmbeddingsUsage:
type: object
required:
- prompt_tokens
- total_tokens
properties:
prompt_tokens:
type: integer
format: int32
description: Number of tokens in the request.
readOnly: true
total_tokens:
type: integer
format: int32
description: |-
Total number of tokens transacted in this request/response. Should equal the
number of tokens in the request.
readOnly: true
description: Measurement of the amount of tokens used in this request and response.
ExtraParameters:
anyOf:
- type: string
- type: string
enum:
- error
- drop
- pass-through
description: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.
FunctionCall:
type: object
required:
- name
- arguments
properties:
name:
type: string
description: The name of the function to call.
readOnly: true
arguments:
type: string
description: |-
The arguments to call the function with, as generated by the model in JSON format.
Note that the model does not always generate valid JSON, and may hallucinate parameters
not defined by your function schema. Validate the arguments in your code before calling
your function.
readOnly: true
description: The name and arguments of a function that should be called, as generated by the model.
FunctionDefinition:
type: object
required:
- name
properties:
name:
type: string
description: The name of the function to be called.
description:
type: string
description: |-
A description of what the function does. The model will use this description when selecting the function and
interpreting its parameters.
parameters:
type: object
additionalProperties: {}
description: The parameters the function accepts, described as a JSON Schema object.
description: The definition of a caller-specified function that chat completions may invoke in response to matching user input.
ImageEmbeddingInput:
type: object
required:
- image
properties:
image:
type: string
description: 'The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`.'
text:
type: string
description: |-
Optional. The text input to feed into the model (like DINO, CLIP).
Returns a 422 error if the model doesn't support the value or parameter.
description: Represents an image with optional text.
ImageEmbeddingsOptions:
type: object
required:
- input
properties:
input:
type: array
items:
$ref: '#/components/schemas/ImageEmbeddingInput'
description: |-
Input image to embed. To embed multiple inputs in a single request, pass an array.
The input must not exceed the max input tokens for the model.
dimensions:
type: integer
format: int32
description: |-
Optional. The number of dimensions the resulting output embeddings should have.
Passing null causes the model to use its default value.
Returns a 422 error if the model doesn't support the value or parameter.
encoding_format:
allOf:
- $ref: '#/components/schemas/EmbeddingEncodingFormat'
description: |-
Optional. The number of dimensions the resulting output embeddings should have.
Passing null causes the model to use its default value.
Returns a 422 error if the model doesn't support the value or parameter.
input_type:
allOf:
- $ref: '#/components/schemas/EmbeddingInputType'
description: |-
Optional. The type of the input.
Returns a 422 error if the model doesn't support the value or parameter.
model:
type: string
description: ID of the specific AI model to use, if more than one model is available on the endpoint.
additionalProperties: {}
description: The configuration information for an image embeddings request.
ModelInfo:
type: object
required:
- model_name
- model_type
- model_provider_name
properties:
model_name:
type: string
description: 'The name of the AI model. For example: `Phi21`'
readOnly: true
model_type:
allOf:
- $ref: '#/components/schemas/ModelType'
description: The type of the AI model. A Unique identifier for the profile.
readOnly: true
model_provider_name:
type: string
description: 'The model provider name. For example: `Microsoft`'
readOnly: true
description: Represents some basic information about the AI model.
ModelType:
anyOf:
- type: string
- type: string
enum:
- embeddings
- chat-completion
description: The type of AI model
PromptUsageDetails:
type: object
required:
- audio_tokens
- cached_tokens
properties:
audio_tokens:
type: integer
format: int32
description: The number of tokens corresponding to audio input.
readOnly: true
cached_tokens:
type: integer
format: int32
description: The total number of tokens cached.
readOnly: true
description: A breakdown of tokens used in the prompt/chat history.
StreamingChatChoiceUpdate:
type: object
required:
- index
- finish_reason
- delta
properties:
index:
type: integer
format: int32
description: The ordered index associated with this chat completions choice.
finish_reason:
allOf:
- $ref: '#/components/schemas/CompletionsFinishReason'
nullable: true
description: The reason that this chat completions choice completed its generated.
readOnly: true
delta:
allOf:
- $ref: '#/components/schemas/StreamingChatResponseMessageUpdate'
description: An update to the chat message for a given chat completions prompt.
readOnly: true
description: |-
Represents an update to a single prompt completion when the service is streaming updates
using Server Sent Events (SSE).
Generally, `n` choices are generated per provided prompt with a default value of 1.
Token limits and other settings may limit the number of choices generated.
StreamingChatCompletionsUpdate:
type: object
required:
- id
- object
- created
- model
- choices
properties:
id:
type: string
description: A unique identifier associated with this chat completions response.
object:
type: string
enum:
- chat.completion
description: The response object type, which is always `chat.completion`.
created:
type: integer
format: unixtime
description: |-
The first timestamp associated with generation activity for this completions response,
represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
readOnly: true
model:
type: string
description: The model used for the chat completion.
readOnly: true
choices:
type: array
items:
$ref: '#/components/schemas/StreamingChatChoiceUpdate'
minItems: 1
description: |-
An update to the collection of completion choices associated with this completions response.
Generally, `n` choices are generated per provided prompt with a default value of 1.
Token limits and other settings may limit the number of choices generated.
readOnly: true
usage:
allOf:
- $ref: '#/components/schemas/CompletionsUsage'
description: Usage information for tokens processed and generated as part of this completions operation.
readOnly: true
description: |-
Represents a response update to a chat completions request, when the service is streaming updates
using Server Sent Events (SSE).
Completions support a wide variety of tasks and generate text that continues from or "completes"
provided prompt data.
StreamingChatResponseMessageUpdate:
type: object
properties:
role:
allOf:
- $ref: '#/components/schemas/ChatRole'
description: The chat role associated with the message. If present, should always be 'assistant'
readOnly: true
content:
type: string
description: The content of the message.
readOnly: true
reasoning_content:
type: string
description: The reasoning content the model used for generating the response
readOnly: true
tool_calls:
type: array
items:
$ref: '#/components/schemas/StreamingChatResponseToolCallUpdate'
description: |-
The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
completions request to resolve as configured.
readOnly: true
description: A representation of a chat message update as received in a streaming response.
StreamingChatResponseToolCallUpdate:
type: object
required:
- id
- function
properties:
id:
type: string
description: The ID of the tool call.
readOnly: true
function:
allOf:
- $ref: '#/components/schemas/FunctionCall'
description: Updates to the function call requested by the AI model.
readOnly: true
description: An update to the function tool call information requested by the AI model.
Versions:
type: string
enum:
- 2024-05-01-preview
- '2025-04-01'
- '2025-05-01'
- 2025-05-15-preview
description: The AI.Model service versions.
securitySchemes:
ApiKeyAuth:
type: apiKey
in: header
name: api-key
BearerAuth:
type: http
scheme: Bearer
OAuth2Auth:
type: oauth2
flows:
implicit:
authorizationUrl: https://login.microsoftonline.com/common/oauth2/v2.0/authorize
scopes:
https://cognitiveservices.azure.com/.default: ''
servers:
- url: https://{resource}.services.ai.azure.com/api/models
description: AI Model Inference
variables:
resource:
default: ''
description: The Azure AI Services resource name, for example 'my-resource'