From 5a8a4e3e78a2b0b2ff73cdc8088ccc16807488b8 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:52:27 -0400 Subject: [PATCH 01/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 28 ++++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 19ded68aaec1..e5b3ee918c76 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -39,12 +39,12 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.ChatRole, Access.public); @@access(AI.Model.CompletionsFinishReason, Access.public); @@access(AI.Model.CompletionsUsage, Access.public); -@@access(AI.Model.EmbeddingEncodingFormat, Access.public, "python"); -@@access(AI.Model.EmbeddingInput, Access.public, "python"); -@@access(AI.Model.EmbeddingInputType, Access.public, "python"); -@@access(AI.Model.EmbeddingItem, Access.public, "python"); -@@access(AI.Model.EmbeddingsResult, Access.public, "python"); -@@access(AI.Model.EmbeddingsUsage, Access.public, "python"); +@@access(AI.Model.EmbeddingEncodingFormat, Access.public); +@@access(AI.Model.EmbeddingInput, Access.public); +@@access(AI.Model.EmbeddingInputType, Access.public); +@@access(AI.Model.EmbeddingItem, Access.public); +@@access(AI.Model.EmbeddingsResult, Access.public); +@@access(AI.Model.EmbeddingsUsage, Access.public); @@access(AI.Model.FunctionCall, Access.public); @@access(AI.Model.FunctionDefinition, Access.public); @@access(AI.Model.ModelInfo, Access.public); @@ -54,10 +54,10 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.ChatMessageImageContentItem, Access.public); @@access(AI.Model.ChatMessageImageUrl, Access.public); @@access(AI.Model.ChatMessageImageDetailLevel, Access.public); -@@access(AI.Model.StreamingChatCompletionsUpdate, Access.public, "python"); -@@access(AI.Model.StreamingChatChoiceUpdate, Access.public, "python"); -@@access(AI.Model.StreamingChatResponseMessageUpdate, Access.public, "python"); -@@access(AI.Model.StreamingChatResponseToolCallUpdate, Access.public, "python"); +@@access(AI.Model.StreamingChatCompletionsUpdate, Access.public); +@@access(AI.Model.StreamingChatChoiceUpdate, Access.public); +@@access(AI.Model.StreamingChatResponseMessageUpdate, Access.public); +@@access(AI.Model.StreamingChatResponseToolCallUpdate, Access.public); // The operators need to be hidden, since we hand-write the public versions of those to // 1. Add chat completions streaming (to getChatCompletions operator) @@ -66,7 +66,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.getChatCompletions, Access.internal); @@access(AI.Model.getEmbeddings, Access.internal); @@access(AI.Model.getImageEmbeddings, Access.internal); -@@access(AI.Model.getModelInfo, Access.internal, "python"); +@@access(AI.Model.getModelInfo, Access.internal); // We use shorter names in the Python client library @@clientName(AI.Model.ChatRequestSystemMessage, "SystemMessage", "python"); @@ -101,8 +101,7 @@ interface Client1 { { name: "EmbeddingsClient", service: AI.Model, - }, - "python" + } ) interface Client2 { embed is AI.Model.getEmbeddings; @@ -113,8 +112,7 @@ interface Client2 { { name: "ImageEmbeddingsClient", service: AI.Model, - }, - "python" + } ) interface Client3 { embed is AI.Model.getImageEmbeddings; From bfe3365b98c9e9dca7abe9a971882741c7708a57 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:54:01 -0400 Subject: [PATCH 02/38] Update chat_completions.tsp --- specification/ai/ModelClient/models/chat_completions.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 7d12fd9f52b8..2055b43cb3cf 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -9,7 +9,7 @@ using Azure.ClientGenerator.Core; namespace AI.Model; -alias ChatCompletionsOptions = { +model ChatCompletionsOptions = { @doc(""" The collection of context messages associated with this chat completions request. Typical usage begins with a chat message for the System role that provides instructions for From c81997242251c842ef5ed3110ad6388dde49cd72 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:55:27 -0400 Subject: [PATCH 03/38] Update tspconfig.yaml --- specification/ai/ModelClient/tspconfig.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index a43f47381cc8..5a2f03c3615a 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -29,3 +29,7 @@ options: clear-output-folder: true model-namespace: false flavor: azure + "@azure-tools/typespec-java": + package-dir: "azure-ai-inference" + namespace: com.azure.ai.inference + flavor: azure From 4bc47ae220d30299af3c56a062e917ea6a6db2ac Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:02:10 -0400 Subject: [PATCH 04/38] Update chat_completions.tsp --- specification/ai/ModelClient/models/chat_completions.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 2055b43cb3cf..d7c91b605008 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -9,7 +9,7 @@ using Azure.ClientGenerator.Core; namespace AI.Model; -model ChatCompletionsOptions = { +model ChatCompletionsOptions { @doc(""" The collection of context messages associated with this chat completions request. Typical usage begins with a chat message for the System role that provides instructions for From 0491ed5c44ecde29ff289cf961daaa46a964eb77 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:47:30 -0400 Subject: [PATCH 05/38] Update chat_completions.tsp --- specification/ai/ModelClient/models/chat_completions.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index d7c91b605008..134ab8b73775 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -457,7 +457,7 @@ model FunctionDefinition { #suppress "@azure-tools/typespec-azure-core/bad-record-type" @doc("The parameters the function accepts, described as a JSON Schema object.") - parameters?: Record; + parameters?: unknown; } @doc(""" From 5ef43a1baf96f201b1c1751ecbfb5f772404fe33 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Tue, 27 Aug 2024 20:53:47 -0400 Subject: [PATCH 06/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index e5b3ee918c76..64764c3d28a8 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -66,7 +66,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.getChatCompletions, Access.internal); @@access(AI.Model.getEmbeddings, Access.internal); @@access(AI.Model.getImageEmbeddings, Access.internal); -@@access(AI.Model.getModelInfo, Access.internal); +@@access(AI.Model.getModelInfo, Access.public, "java"); // We use shorter names in the Python client library @@clientName(AI.Model.ChatRequestSystemMessage, "SystemMessage", "python"); From 01059a3a0949c7be396636f36d0f2266910fa3eb Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Tue, 27 Aug 2024 21:49:29 -0400 Subject: [PATCH 07/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 64764c3d28a8..c277c39ef524 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -67,6 +67,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.getEmbeddings, Access.internal); @@access(AI.Model.getImageEmbeddings, Access.internal); @@access(AI.Model.getModelInfo, Access.public, "java"); +@@clientName(AI.Model.ChatCompletionsResponseFormatJSON, "ChatCompletionsResponseFormatJson", "java"); // We use shorter names in the Python client library @@clientName(AI.Model.ChatRequestSystemMessage, "SystemMessage", "python"); From 65a2758c831f4619fbd44588723cd96bb498eade Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 28 Aug 2024 08:28:17 -0400 Subject: [PATCH 08/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index c277c39ef524..19b08cf68000 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -66,7 +66,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.getChatCompletions, Access.internal); @@access(AI.Model.getEmbeddings, Access.internal); @@access(AI.Model.getImageEmbeddings, Access.internal); -@@access(AI.Model.getModelInfo, Access.public, "java"); +@@access(AI.Model.getModelInfo, Access.internal); @@clientName(AI.Model.ChatCompletionsResponseFormatJSON, "ChatCompletionsResponseFormatJson", "java"); // We use shorter names in the Python client library From 14a23cfe23884571e6971feebfa56fb4f64dd4a7 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 28 Aug 2024 08:29:26 -0400 Subject: [PATCH 09/38] Update tspconfig.yaml --- specification/ai/ModelClient/tspconfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 5a2f03c3615a..f42d41848134 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -32,4 +32,5 @@ options: "@azure-tools/typespec-java": package-dir: "azure-ai-inference" namespace: com.azure.ai.inference + partial-update: true flavor: azure From 7d64c392924c28cd125576903cc5121a1f444dcc Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 28 Aug 2024 08:48:51 -0400 Subject: [PATCH 10/38] Update tspconfig.yaml --- specification/ai/ModelClient/tspconfig.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index f42d41848134..c75653928b6a 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -33,4 +33,6 @@ options: package-dir: "azure-ai-inference" namespace: com.azure.ai.inference partial-update: true + enable-sync-stack: true + generate-tests: false flavor: azure From 4dc99b86409163e24341215f4bbb6418c51ac186 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:09:48 -0400 Subject: [PATCH 11/38] Update tspconfig.yaml --- specification/ai/ModelClient/tspconfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index c75653928b6a..273e58898c83 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -35,4 +35,5 @@ options: partial-update: true enable-sync-stack: true generate-tests: false + customization-class: customization/src/main/java/InferenceCustomization.java flavor: azure From 69098f115bd7d8b0a27bc7f57b2fca2906982ae1 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 28 Aug 2024 11:10:33 -0400 Subject: [PATCH 12/38] Update tspconfig.yaml fix typo --- specification/ai/ModelClient/tspconfig.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 273e58898c83..81dcecdcda6f 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -35,5 +35,5 @@ options: partial-update: true enable-sync-stack: true generate-tests: false - customization-class: customization/src/main/java/InferenceCustomization.java + customization-class: customization/src/main/java/InferenceCustomizations.java flavor: azure From 016b80ae90f5fabc1e572efabd3ecb1030d444c0 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:29:59 -0400 Subject: [PATCH 13/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 19b08cf68000..ed962108e7cb 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -45,6 +45,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.EmbeddingItem, Access.public); @@access(AI.Model.EmbeddingsResult, Access.public); @@access(AI.Model.EmbeddingsUsage, Access.public); +@@access(AI.Model.ExtraParameters, Access.public, "java"); @@access(AI.Model.FunctionCall, Access.public); @@access(AI.Model.FunctionDefinition, Access.public); @@access(AI.Model.ModelInfo, Access.public); From 50eac41d6593a47f25b8f70b615fb1b7c22b5a73 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:21:20 -0800 Subject: [PATCH 14/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 - 1 file changed, 1 deletion(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index ed962108e7cb..06155570f899 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -68,7 +68,6 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.getEmbeddings, Access.internal); @@access(AI.Model.getImageEmbeddings, Access.internal); @@access(AI.Model.getModelInfo, Access.internal); -@@clientName(AI.Model.ChatCompletionsResponseFormatJSON, "ChatCompletionsResponseFormatJson", "java"); // We use shorter names in the Python client library @@clientName(AI.Model.ChatRequestSystemMessage, "SystemMessage", "python"); From 4d0913a8ef22c57ae1bc8b3d9896d4c8efb1860c Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:23:16 -0800 Subject: [PATCH 15/38] Update routes.tsp --- specification/ai/ModelClient/routes.tsp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index 7b79bbfad9f8..e8f49b9b4380 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -25,8 +25,8 @@ namespace AI.Model; @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< { - ...ChatCompletionsOptions; - ...AdditionalRequestHeaders; + { @body body: ChatCompletionsOptions } & + AdditionalRequestHeaders; }, ChatCompletions >; @@ -39,8 +39,8 @@ op getChatCompletions is Azure.Core.RpcOperation< @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< { - ...EmbeddingsOptions; - ...AdditionalRequestHeaders; + { @body body: EmbeddingsOptions } & + AdditionalRequestHeaders; }, EmbeddingsResult >; @@ -53,8 +53,8 @@ op getEmbeddings is Azure.Core.RpcOperation< @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< { - ...ImageEmbeddingsOptions; - ...AdditionalRequestHeaders; + { @body body: ImageEmbeddingsOptions } & + AdditionalRequestHeaders; }, EmbeddingsResult >; From 5beef1d52c4c867bf0cdfd188b5642c6d994cc86 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:25:23 -0800 Subject: [PATCH 16/38] Update routes.tsp --- specification/ai/ModelClient/routes.tsp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index e8f49b9b4380..ab157375722f 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -24,10 +24,9 @@ namespace AI.Model; @actionSeparator("/") @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< - { { @body body: ChatCompletionsOptions } & AdditionalRequestHeaders; - }, + , ChatCompletions >; @@ -38,10 +37,10 @@ op getChatCompletions is Azure.Core.RpcOperation< @actionSeparator("/") @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< - { + { @body body: EmbeddingsOptions } & AdditionalRequestHeaders; - }, + , EmbeddingsResult >; @@ -52,10 +51,10 @@ op getEmbeddings is Azure.Core.RpcOperation< @actionSeparator("/") @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< - { + { @body body: ImageEmbeddingsOptions } & AdditionalRequestHeaders; - }, + , EmbeddingsResult >; From ac4196acd80e6f4ae9d97484c097395ed37e2902 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:29:56 -0800 Subject: [PATCH 17/38] Update routes.tsp --- specification/ai/ModelClient/routes.tsp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index ab157375722f..d35445590eb3 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -24,9 +24,10 @@ namespace AI.Model; @actionSeparator("/") @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< - { @body body: ChatCompletionsOptions } & - AdditionalRequestHeaders; - , + { + @body body: ChatCompletionsOptions; + ...AdditionalRequestHeaders; + }, ChatCompletions >; @@ -37,10 +38,10 @@ op getChatCompletions is Azure.Core.RpcOperation< @actionSeparator("/") @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< - - { @body body: EmbeddingsOptions } & - AdditionalRequestHeaders; - , + { + @body body: EmbeddingsOptions; + ...AdditionalRequestHeaders; + }, EmbeddingsResult >; @@ -51,10 +52,10 @@ op getEmbeddings is Azure.Core.RpcOperation< @actionSeparator("/") @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< - - { @body body: ImageEmbeddingsOptions } & - AdditionalRequestHeaders; - , + { + @body body: ImageEmbeddingsOptions; + ...AdditionalRequestHeaders; + }, EmbeddingsResult >; From 1307c7d622b7e31cf44ded2744c897629427eb0e Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Fri, 24 Jan 2025 11:19:22 -0800 Subject: [PATCH 18/38] updated data-plane for ModelClient --- .../preview/2024-05-01-preview/openapi.json | 191 +++++++++--------- 1 file changed, 96 insertions(+), 95 deletions(-) diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index 25f1c340969a..9676fda8ab33 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -92,98 +92,7 @@ "in": "body", "required": true, "schema": { - "type": "object", - "properties": { - "messages": { - "type": "array", - "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatRequestMessage" - }, - "x-ms-identifiers": [] - }, - "frequency_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "stream": { - "type": "boolean", - "description": "A value indicating whether chat completions should be streamed for this request." - }, - "presence_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "temperature": { - "type": "number", - "format": "float", - "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 0.7, - "minimum": 0, - "maximum": 1 - }, - "top_p": { - "type": "number", - "format": "float", - "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 1, - "minimum": 0, - "maximum": 1 - }, - "max_tokens": { - "type": "integer", - "format": "int32", - "description": "The maximum number of tokens to generate.", - "minimum": 0 - }, - "response_format": { - "$ref": "#/definitions/ChatCompletionsResponseFormat", - "description": "The format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message." - }, - "stop": { - "type": "array", - "description": "A collection of textual sequences that will end completions generation.", - "minItems": 1, - "items": { - "type": "string" - } - }, - "tools": { - "type": "array", - "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatCompletionsToolDefinition" - }, - "x-ms-identifiers": [] - }, - "tool_choice": { - "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", - "x-ms-client-name": "toolChoice" - }, - "seed": { - "type": "integer", - "format": "int64", - "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "messages" - ], - "additionalProperties": {} + "$ref": "#/definitions/ChatCompletionsOptions" } } ], @@ -643,6 +552,100 @@ "function" ] }, + "ChatCompletionsOptions": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatRequestMessage" + }, + "x-ms-identifiers": [] + }, + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "stream": { + "type": "boolean", + "description": "A value indicating whether chat completions should be streamed for this request." + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "temperature": { + "type": "number", + "format": "float", + "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 0.7, + "minimum": 0, + "maximum": 1 + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 1, + "minimum": 0, + "maximum": 1 + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens to generate.", + "minimum": 0 + }, + "response_format": { + "$ref": "#/definitions/ChatCompletionsResponseFormat", + "description": "The format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message." + }, + "stop": { + "type": "array", + "description": "A collection of textual sequences that will end completions generation.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "tools": { + "type": "array", + "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatCompletionsToolDefinition" + }, + "x-ms-identifiers": [] + }, + "tool_choice": { + "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", + "x-ms-client-name": "toolChoice" + }, + "seed": { + "type": "integer", + "format": "int64", + "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "messages" + ], + "additionalProperties": {} + }, "ChatCompletionsResponseFormat": { "type": "object", "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", @@ -1308,9 +1311,7 @@ "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters." }, "parameters": { - "type": "object", - "description": "The parameters the function accepts, described as a JSON Schema object.", - "additionalProperties": {} + "description": "The parameters the function accepts, described as a JSON Schema object." } }, "required": [ From c6069eb3ce69d6bff37e64c19d58e5e7ab632a8b Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Fri, 24 Jan 2025 11:47:11 -0800 Subject: [PATCH 19/38] add warning suppressions to allow compilation --- specification/ai/ModelClient/models/chat_completions.tsp | 5 ++++- specification/ai/ModelClient/models/embeddings.tsp | 2 ++ specification/ai/ModelClient/models/image_embeddings.tsp | 2 ++ specification/ai/ModelClient/routes.tsp | 6 ++++++ specification/ai/ModelClient/tspconfig.yaml | 2 +- .../AI.Model/preview/2024-05-01-preview/openapi.json | 4 ++++ 6 files changed, 19 insertions(+), 2 deletions(-) diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 134ab8b73775..b0ed4650a7c0 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -9,6 +9,8 @@ using Azure.ClientGenerator.Core; namespace AI.Model; +@doc("The ChatCompletionsOptions model") +#suppress "@azure-tools/typespec-azure-core/bad-record-type" model ChatCompletionsOptions { @doc(""" The collection of context messages associated with this chat completions request. @@ -115,6 +117,7 @@ model ChatCompletionsOptions { """) `model`?: string; + #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; }; @@ -455,7 +458,7 @@ model FunctionDefinition { """) description?: string; - #suppress "@azure-tools/typespec-azure-core/bad-record-type" + #suppress "@azure-tools/typespec-azure-core/no-unknown" "External API shape takes an arbitrary json" @doc("The parameters the function accepts, described as a JSON Schema object.") parameters?: unknown; } diff --git a/specification/ai/ModelClient/models/embeddings.tsp b/specification/ai/ModelClient/models/embeddings.tsp index 8e2aa8e1751f..3b3fbf54fb91 100644 --- a/specification/ai/ModelClient/models/embeddings.tsp +++ b/specification/ai/ModelClient/models/embeddings.tsp @@ -6,6 +6,7 @@ using TypeSpec.Http; namespace AI.Model; +#suppress "@azure-tools/typespec-azure-core/bad-record-type" alias EmbeddingsOptions = { @doc(""" Input text to embed, encoded as a string or array of tokens. @@ -37,5 +38,6 @@ alias EmbeddingsOptions = { """) `model`?: string; + #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; }; diff --git a/specification/ai/ModelClient/models/image_embeddings.tsp b/specification/ai/ModelClient/models/image_embeddings.tsp index 6e2f407d5dbc..dcea3f1b8358 100644 --- a/specification/ai/ModelClient/models/image_embeddings.tsp +++ b/specification/ai/ModelClient/models/image_embeddings.tsp @@ -6,6 +6,7 @@ using TypeSpec.Http; namespace AI.Model; +#suppress "@azure-tools/typespec-azure-core/bad-record-type" alias ImageEmbeddingsOptions = { @doc(""" Input image to embed. To embed multiple inputs in a single request, pass an array. @@ -38,6 +39,7 @@ alias ImageEmbeddingsOptions = { """) `model`?: string; + #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; }; diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index d35445590eb3..206d2cf130b9 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -25,6 +25,8 @@ namespace AI.Model; @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< { + @doc("request options to pass to the endpoint using complete path") + #suppress "@azure-tools/typespec-azure-core/bad-record-type" @body body: ChatCompletionsOptions; ...AdditionalRequestHeaders; }, @@ -39,6 +41,8 @@ op getChatCompletions is Azure.Core.RpcOperation< @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< { + @doc("request options to pass to the endpoint using embeddings path") + #suppress "@azure-tools/typespec-azure-core/bad-record-type" @body body: EmbeddingsOptions; ...AdditionalRequestHeaders; }, @@ -53,6 +57,8 @@ op getEmbeddings is Azure.Core.RpcOperation< @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< { + @doc("request options to pass to the endpoint using images embeddings path") + #suppress "@azure-tools/typespec-azure-core/bad-record-type" @body body: ImageEmbeddingsOptions; ...AdditionalRequestHeaders; }, diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 81dcecdcda6f..01af21e11251 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -16,7 +16,7 @@ options: "@azure-tools/typespec-autorest": azure-resource-provider-folder: "data-plane" emitter-output-dir: "{project-root}/.." - examples-directory: "{project-root}/examples" + examples-dir: "{project-root}/examples" output-file: "{azure-resource-provider-folder}/{service-name}/{version-status}/{version}/openapi.json" "@azure-tools/typespec-python": package-mode: dataplane diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index 9676fda8ab33..ad6f6d401407 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -90,6 +90,7 @@ { "name": "body", "in": "body", + "description": "request options to pass to the endpoint using complete path", "required": true, "schema": { "$ref": "#/definitions/ChatCompletionsOptions" @@ -171,6 +172,7 @@ { "name": "body", "in": "body", + "description": "request options to pass to the endpoint using embeddings path", "required": true, "schema": { "type": "object", @@ -282,6 +284,7 @@ { "name": "body", "in": "body", + "description": "request options to pass to the endpoint using images embeddings path", "required": true, "schema": { "type": "object", @@ -554,6 +557,7 @@ }, "ChatCompletionsOptions": { "type": "object", + "description": "The ChatCompletionsOptions model", "properties": { "messages": { "type": "array", From 587744e82101375c32ece34c5c80150db67297cd Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Fri, 24 Jan 2025 13:53:32 -0800 Subject: [PATCH 20/38] formatting changes --- specification/ai/ModelClient/client.tsp | 20 ++++++++----------- .../ModelClient/models/chat_completions.tsp | 4 ++-- specification/ai/ModelClient/routes.tsp | 18 +++++++++++------ 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 06155570f899..56c11ad0a218 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -98,23 +98,19 @@ interface Client1 { getModelInfo is AI.Model.getModelInfo; } -@client( - { - name: "EmbeddingsClient", - service: AI.Model, - } -) +@client({ + name: "EmbeddingsClient", + service: AI.Model, +}) interface Client2 { embed is AI.Model.getEmbeddings; getModelInfo is AI.Model.getModelInfo; } -@client( - { - name: "ImageEmbeddingsClient", - service: AI.Model, - } -) +@client({ + name: "ImageEmbeddingsClient", + service: AI.Model, +}) interface Client3 { embed is AI.Model.getImageEmbeddings; getModelInfo is AI.Model.getModelInfo; diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index b0ed4650a7c0..5783e86bf62b 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -9,8 +9,8 @@ using Azure.ClientGenerator.Core; namespace AI.Model; -@doc("The ChatCompletionsOptions model") #suppress "@azure-tools/typespec-azure-core/bad-record-type" +@doc("The ChatCompletionsOptions model") model ChatCompletionsOptions { @doc(""" The collection of context messages associated with this chat completions request. @@ -119,7 +119,7 @@ model ChatCompletionsOptions { #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; -}; +} alias ChatCompletionsCommon = { @doc("A unique identifier associated with this chat completions response.") diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index 206d2cf130b9..30eb9dadd893 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -25,9 +25,11 @@ namespace AI.Model; @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< { - @doc("request options to pass to the endpoint using complete path") #suppress "@azure-tools/typespec-azure-core/bad-record-type" - @body body: ChatCompletionsOptions; + @doc("request options to pass to the endpoint using complete path") + @body + body: ChatCompletionsOptions; + ...AdditionalRequestHeaders; }, ChatCompletions @@ -41,9 +43,11 @@ op getChatCompletions is Azure.Core.RpcOperation< @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< { - @doc("request options to pass to the endpoint using embeddings path") #suppress "@azure-tools/typespec-azure-core/bad-record-type" - @body body: EmbeddingsOptions; + @doc("request options to pass to the endpoint using embeddings path") + @body + body: EmbeddingsOptions; + ...AdditionalRequestHeaders; }, EmbeddingsResult @@ -57,9 +61,11 @@ op getEmbeddings is Azure.Core.RpcOperation< @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< { - @doc("request options to pass to the endpoint using images embeddings path") #suppress "@azure-tools/typespec-azure-core/bad-record-type" - @body body: ImageEmbeddingsOptions; + @doc("request options to pass to the endpoint using images embeddings path") + @body + body: ImageEmbeddingsOptions; + ...AdditionalRequestHeaders; }, EmbeddingsResult From 287f734522cd0b947e1289bb03655d1d3923bd6a Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Mon, 27 Jan 2025 09:07:35 -0800 Subject: [PATCH 21/38] sync with current data-plane examples --- .../GetChatCompletions_MaximumSet_Gen.json | 37 +++++++++---------- .../GetChatCompletions_MinimumSet_Gen.json | 16 ++++---- .../GetEmbeddings_MaximumSet_Gen.json | 15 ++++---- .../GetImageEmbeddings_MaximumSet_Gen.json | 17 +++++---- .../examples/GetModelInfo_MaximumSet_Gen.json | 4 +- .../examples/GetModelInfo_MinimumSet_Gen.json | 4 +- 6 files changed, 47 insertions(+), 46 deletions(-) diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json index 8897ad8be3a2..a097e1a38d0d 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json @@ -10,9 +10,9 @@ "role": "ChatRequestMessage" } ], - "frequency_penalty": 1, + "frequency_penalty": -2, "stream": true, - "presence_penalty": -2, + "presence_penalty": -1, "temperature": 0, "top_p": 0, "max_tokens": 0, @@ -20,47 +20,46 @@ "type": "ChatCompletionsResponseFormat" }, "stop": [ - "lwlqenszpaurxntgit" + "dcfnxrdeumnoytdaooqkbl" ], "tools": [ { "type": "function", "function": { - "name": "velupowkmhiyypklqmgzzlmcjokcs", - "description": "dngfctkfjoypnjfikiowvpuvisga", - "parameters": {} + "name": "ikvkykzp", + "description": "gofxoftbpdi" } } ], - "seed": 10, - "model": "bcxpoxjhymqvjo" + "seed": 21, + "model": "askiizcjob" } }, "responses": { "200": { "body": { - "id": "qdjwxvfotmorpdevwdmhbqnsgi", - "created": 16, - "model": "wr", + "id": "kgousajxgzyhugvqekuswuqbk", + "created": 18, + "model": "zjxvtpxhzhvgjrhit", "usage": { - "completion_tokens": 9, - "prompt_tokens": 15, - "total_tokens": 1 + "completion_tokens": 19, + "prompt_tokens": 28, + "total_tokens": 16 }, "choices": [ { - "index": 29, + "index": 7, "finish_reason": "stop", "message": { "role": "system", - "content": "wynvtftvlenfzzzrfmg", + "content": "jnsnrwblpuokzbkrzdcwubpfz", "tool_calls": [ { - "id": "zlmfpvg", + "id": "yrobmilsrugmbwukmzo", "type": "function", "function": { - "name": "velupowkmhiyypklqmgzzlmcjokcs", - "arguments": "zocluceldmcazefk" + "name": "ikvkykzp", + "arguments": "oqxvktuduomvckic" } } ] diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json index 81098e271ad8..d2bc60ef2f8a 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json @@ -14,21 +14,21 @@ "responses": { "200": { "body": { - "id": "qdjwxvfotmorpdevwdmhbqnsgi", - "created": 16, - "model": "wr", + "id": "kgousajxgzyhugvqekuswuqbk", + "created": 18, + "model": "zjxvtpxhzhvgjrhit", "usage": { - "completion_tokens": 9, - "prompt_tokens": 15, - "total_tokens": 1 + "completion_tokens": 19, + "prompt_tokens": 28, + "total_tokens": 16 }, "choices": [ { - "index": 29, + "index": 7, "finish_reason": "stop", "message": { "role": "system", - "content": "wynvtftvlenfzzzrfmg" + "content": "jnsnrwblpuokzbkrzdcwubpfz" } } ] diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json index 2e0f4eb70e80..413c4531a81b 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json @@ -6,27 +6,28 @@ "extra-parameters": "error", "body": { "input": [ - "wditkfkcn" + "p" ], - "dimensions": 14, + "dimensions": 11, "encoding_format": "base64", "input_type": "text", - "model": "esgcnvlwfzgrstu" + "model": "kwkpluujwiabfquhkaugttxut" } }, "responses": { "200": { "body": { + "id": "cknxthfa", "data": [ { - "index": 4 + "index": 21 } ], "usage": { - "prompt_tokens": 30, - "total_tokens": 29 + "prompt_tokens": 4, + "total_tokens": 22 }, - "model": "rbnjxkkdrp" + "model": "uvrmctbnze" } } } diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json index 7a2162d40771..5a4ef17ad877 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json @@ -7,29 +7,30 @@ "body": { "input": [ { - "image": "hxkwvrx", - "text": "qpoyljvucirvkzjhhnhpdeqkl" + "image": "puqkvvlvgcjyzughesnkena", + "text": "azrzyjsmnuefqpowpvfmyobeehqsni" } ], - "dimensions": 17, + "dimensions": 26, "encoding_format": "base64", "input_type": "text", - "model": "wssoguntnhwg" + "model": "jyb" } }, "responses": { "200": { "body": { + "id": "cknxthfa", "data": [ { - "index": 4 + "index": 21 } ], "usage": { - "prompt_tokens": 30, - "total_tokens": 29 + "prompt_tokens": 4, + "total_tokens": 22 }, - "model": "rbnjxkkdrp" + "model": "uvrmctbnze" } } } diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json index aa45ac99562b..3e40dd40effa 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json @@ -7,9 +7,9 @@ "responses": { "200": { "body": { - "model_name": "wzkm", + "model_name": "jno", "model_type": "embeddings", - "model_provider_name": "jkxwuyloxsmuhsevvytzp" + "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh" } } } diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json index 8468a6d5e85c..a819c57488ff 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json @@ -7,9 +7,9 @@ "responses": { "200": { "body": { - "model_name": "wzkm", + "model_name": "jno", "model_type": "embeddings", - "model_provider_name": "jkxwuyloxsmuhsevvytzp" + "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh" } } } From 27bd96776e2957a1a8bb366f26ae36057e0018d4 Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Mon, 27 Jan 2025 09:21:54 -0800 Subject: [PATCH 22/38] sync examples json with dargilco/ai-model-inference --- .../GetChatCompletions_MaximumSet_Gen.json | 37 +++++++++---------- .../GetChatCompletions_MinimumSet_Gen.json | 16 ++++---- .../GetEmbeddings_MaximumSet_Gen.json | 15 ++++---- .../GetImageEmbeddings_MaximumSet_Gen.json | 17 +++++---- .../GetModelInfo_MaximumSet_Gen.json | 4 +- .../GetModelInfo_MinimumSet_Gen.json | 4 +- 6 files changed, 47 insertions(+), 46 deletions(-) diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json index 8897ad8be3a2..a097e1a38d0d 100644 --- a/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json +++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json @@ -10,9 +10,9 @@ "role": "ChatRequestMessage" } ], - "frequency_penalty": 1, + "frequency_penalty": -2, "stream": true, - "presence_penalty": -2, + "presence_penalty": -1, "temperature": 0, "top_p": 0, "max_tokens": 0, @@ -20,47 +20,46 @@ "type": "ChatCompletionsResponseFormat" }, "stop": [ - "lwlqenszpaurxntgit" + "dcfnxrdeumnoytdaooqkbl" ], "tools": [ { "type": "function", "function": { - "name": "velupowkmhiyypklqmgzzlmcjokcs", - "description": "dngfctkfjoypnjfikiowvpuvisga", - "parameters": {} + "name": "ikvkykzp", + "description": "gofxoftbpdi" } } ], - "seed": 10, - "model": "bcxpoxjhymqvjo" + "seed": 21, + "model": "askiizcjob" } }, "responses": { "200": { "body": { - "id": "qdjwxvfotmorpdevwdmhbqnsgi", - "created": 16, - "model": "wr", + "id": "kgousajxgzyhugvqekuswuqbk", + "created": 18, + "model": "zjxvtpxhzhvgjrhit", "usage": { - "completion_tokens": 9, - "prompt_tokens": 15, - "total_tokens": 1 + "completion_tokens": 19, + "prompt_tokens": 28, + "total_tokens": 16 }, "choices": [ { - "index": 29, + "index": 7, "finish_reason": "stop", "message": { "role": "system", - "content": "wynvtftvlenfzzzrfmg", + "content": "jnsnrwblpuokzbkrzdcwubpfz", "tool_calls": [ { - "id": "zlmfpvg", + "id": "yrobmilsrugmbwukmzo", "type": "function", "function": { - "name": "velupowkmhiyypklqmgzzlmcjokcs", - "arguments": "zocluceldmcazefk" + "name": "ikvkykzp", + "arguments": "oqxvktuduomvckic" } } ] diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json index 81098e271ad8..d2bc60ef2f8a 100644 --- a/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json +++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json @@ -14,21 +14,21 @@ "responses": { "200": { "body": { - "id": "qdjwxvfotmorpdevwdmhbqnsgi", - "created": 16, - "model": "wr", + "id": "kgousajxgzyhugvqekuswuqbk", + "created": 18, + "model": "zjxvtpxhzhvgjrhit", "usage": { - "completion_tokens": 9, - "prompt_tokens": 15, - "total_tokens": 1 + "completion_tokens": 19, + "prompt_tokens": 28, + "total_tokens": 16 }, "choices": [ { - "index": 29, + "index": 7, "finish_reason": "stop", "message": { "role": "system", - "content": "wynvtftvlenfzzzrfmg" + "content": "jnsnrwblpuokzbkrzdcwubpfz" } } ] diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json index 2e0f4eb70e80..413c4531a81b 100644 --- a/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json +++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json @@ -6,27 +6,28 @@ "extra-parameters": "error", "body": { "input": [ - "wditkfkcn" + "p" ], - "dimensions": 14, + "dimensions": 11, "encoding_format": "base64", "input_type": "text", - "model": "esgcnvlwfzgrstu" + "model": "kwkpluujwiabfquhkaugttxut" } }, "responses": { "200": { "body": { + "id": "cknxthfa", "data": [ { - "index": 4 + "index": 21 } ], "usage": { - "prompt_tokens": 30, - "total_tokens": 29 + "prompt_tokens": 4, + "total_tokens": 22 }, - "model": "rbnjxkkdrp" + "model": "uvrmctbnze" } } } diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json index 7a2162d40771..5a4ef17ad877 100644 --- a/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json +++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json @@ -7,29 +7,30 @@ "body": { "input": [ { - "image": "hxkwvrx", - "text": "qpoyljvucirvkzjhhnhpdeqkl" + "image": "puqkvvlvgcjyzughesnkena", + "text": "azrzyjsmnuefqpowpvfmyobeehqsni" } ], - "dimensions": 17, + "dimensions": 26, "encoding_format": "base64", "input_type": "text", - "model": "wssoguntnhwg" + "model": "jyb" } }, "responses": { "200": { "body": { + "id": "cknxthfa", "data": [ { - "index": 4 + "index": 21 } ], "usage": { - "prompt_tokens": 30, - "total_tokens": 29 + "prompt_tokens": 4, + "total_tokens": 22 }, - "model": "rbnjxkkdrp" + "model": "uvrmctbnze" } } } diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json index aa45ac99562b..3e40dd40effa 100644 --- a/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json +++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json @@ -7,9 +7,9 @@ "responses": { "200": { "body": { - "model_name": "wzkm", + "model_name": "jno", "model_type": "embeddings", - "model_provider_name": "jkxwuyloxsmuhsevvytzp" + "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh" } } } diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json index 8468a6d5e85c..a819c57488ff 100644 --- a/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json +++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json @@ -7,9 +7,9 @@ "responses": { "200": { "body": { - "model_name": "wzkm", + "model_name": "jno", "model_type": "embeddings", - "model_provider_name": "jkxwuyloxsmuhsevvytzp" + "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh" } } } From 23514a5b3fc94593803f4358a060fea20ee171ee Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:30:32 -0800 Subject: [PATCH 23/38] Remove config class to see if that fixes java gen --- specification/ai/ModelClient/tspconfig.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 01af21e11251..40c0c0b4814f 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -35,5 +35,4 @@ options: partial-update: true enable-sync-stack: true generate-tests: false - customization-class: customization/src/main/java/InferenceCustomizations.java flavor: azure From 2d46d8d9e544e55ec7653cb14c4dada785581491 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:52:57 -0800 Subject: [PATCH 24/38] Update tspconfig.yaml --- specification/ai/ModelClient/tspconfig.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 40c0c0b4814f..305068936484 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -33,6 +33,7 @@ options: package-dir: "azure-ai-inference" namespace: com.azure.ai.inference partial-update: true + customization-class: customization/src/main/java/InferenceCustomizations.java enable-sync-stack: true generate-tests: false flavor: azure From f0aa20254d9d258bb2e4761ce083243a030f08ed Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:54:34 -0800 Subject: [PATCH 25/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 56c11ad0a218..6c5707bac496 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -88,6 +88,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for "ImageDetailLevel", "python" ); +@@clientName(AI.Model.ChatCompletionsResponseFormatJSON, "ChatCompletionsResponseFormatJson", "java"); @client({ name: "ChatCompletionsClient", From a0e0b91f149b89388e72195506c360b443b00568 Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Fri, 31 Jan 2025 08:40:00 -0800 Subject: [PATCH 26/38] merge model changes from trangevi/structured-output-java --- specification/ai/ModelClient/client.tsp | 20 +- .../ModelClient/models/chat_completions.tsp | 79 +++++-- .../ai/ModelClient/models/common.tsp | 23 +- .../ai/ModelClient/models/embeddings.tsp | 2 - .../ModelClient/models/image_embeddings.tsp | 8 +- specification/ai/ModelClient/tspconfig.yaml | 1 + .../preview/2024-05-01-preview/openapi.json | 205 +++++++++++------- 7 files changed, 215 insertions(+), 123 deletions(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 56c11ad0a218..f0dea205585b 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -8,28 +8,22 @@ using TypeSpec.Versioning; @useDependency(AI.Model.Versions.v2024_05_01_Preview) namespace Customizations; // The actual name here doesn't matter and is here for organization purposes only -// Are these needed? -@@usage(AI.Model.ChatCompletions, Usage.output); -@@usage(AI.Model.ModelInfo, Usage.output); -@@usage(AI.Model.StreamingChatCompletionsUpdate, Usage.output); -@@usage(AI.Model.StreamingChatChoiceUpdate, Usage.output); -@@usage(AI.Model.StreamingChatResponseMessageUpdate, Usage.output); -@@usage(AI.Model.StreamingChatResponseToolCallUpdate, Usage.output); - // Since we made all operator methods internal, we need to expliclty // say we still want the models they use to be public. @@access(AI.Model.ChatChoice, Access.public); @@access(AI.Model.ChatCompletions, Access.public); @@access(AI.Model.ChatCompletionsToolCall, Access.public); @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); -@@access(AI.Model.ChatCompletionsNamedToolSelection, Access.public); -@@access(AI.Model.ChatCompletionsFunctionToolSelection, Access.public); +@@access(AI.Model.ChatCompletionsNamedToolChoice, Access.public); +@@access(AI.Model.ChatCompletionsNamedToolChoiceFunction, Access.public); @@access(AI.Model.ChatCompletionsResponseFormat, Access.public); -@@access(AI.Model.ChatCompletionsResponseFormatJSON, Access.public); +@@access(AI.Model.ChatCompletionsResponseFormatJsonObject, Access.public); +@@access(AI.Model.ChatCompletionsResponseFormatJsonSchema, Access.public); +@@access(AI.Model.ChatCompletionsResponseFormatJsonSchemaDefinition, Access.public); @@access(AI.Model.ChatCompletionsResponseFormatText, Access.public); @@access(AI.Model.ChatCompletionsToolCall, Access.public); @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); -@@access(AI.Model.ChatCompletionsToolSelectionPreset, Access.public); +@@access(AI.Model.ChatCompletionsToolChoicePreset, Access.public); @@access(AI.Model.ChatRequestAssistantMessage, Access.public); @@access(AI.Model.ChatRequestMessage, Access.public); @@access(AI.Model.ChatRequestSystemMessage, Access.public); @@ -40,12 +34,12 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.CompletionsFinishReason, Access.public); @@access(AI.Model.CompletionsUsage, Access.public); @@access(AI.Model.EmbeddingEncodingFormat, Access.public); -@@access(AI.Model.EmbeddingInput, Access.public); @@access(AI.Model.EmbeddingInputType, Access.public); @@access(AI.Model.EmbeddingItem, Access.public); @@access(AI.Model.EmbeddingsResult, Access.public); @@access(AI.Model.EmbeddingsUsage, Access.public); @@access(AI.Model.ExtraParameters, Access.public, "java"); +@@access(AI.Model.ImageEmbeddingInput, Access.public); @@access(AI.Model.FunctionCall, Access.public); @@access(AI.Model.FunctionDefinition, Access.public); @@access(AI.Model.ModelInfo, Access.public); diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 5783e86bf62b..886938346885 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -80,9 +80,13 @@ model ChatCompletionsOptions { max_tokens?: int32; @doc(""" - The format that the model must output. Use this to enable JSON mode instead of the default text mode. - Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON - via a system or user message. + An object specifying the format that the model must output. + + Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. + + Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length. """) response_format?: ChatCompletionsResponseFormat; @@ -104,7 +108,7 @@ model ChatCompletionsOptions { @doc(""" If specified, the model will configure which of the provided tools it can use for the chat completions response. """) - toolChoice?: ChatCompletionsToolSelection; + toolChoice?: ChatCompletionsToolChoice; @doc(""" If specified, the system will make a best effort to sample deterministically such that repeated requests with the @@ -119,7 +123,7 @@ model ChatCompletionsOptions { #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; -} +}; alias ChatCompletionsCommon = { @doc("A unique identifier associated with this chat completions response.") @@ -258,11 +262,58 @@ model ChatCompletionsResponseFormatText extends ChatCompletionsResponseFormat { Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON via a system or user message. """) -model ChatCompletionsResponseFormatJSON extends ChatCompletionsResponseFormat { +model ChatCompletionsResponseFormatJsonObject + extends ChatCompletionsResponseFormat { @doc("Response format type: always 'json_object' for this object.") type: "json_object"; } +@doc(""" + Defines the response format for chat completions as JSON with a given schema. The AI model + will need to adhere to this schema when generating completions. + """) +model ChatCompletionsResponseFormatJsonSchemaDefinition { + @doc(""" + A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. + """) + name: string; + + @doc(""" + The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema. + Note that AI models usually only support a subset of the keywords defined by JSON schema. Consult your AI model documentation + to determine what is supported. + """) + schema: Record; + + @doc(""" + A description of the response format, used by the AI model to determine how to generate responses in this format. + """) + description?: string; + + @doc(""" + If set to true, the service will error out if the provided JSON schema contains keywords + not supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`. + If false, and the provided JSON schema contains keywords not supported + by the AI model, the AI model will not error out. Instead it will ignore the unsupported keywords. + """) + strict?: boolean = false; +} + +@doc(""" + A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a + JSON schema specified by the caller. + """) +model ChatCompletionsResponseFormatJsonSchema + extends ChatCompletionsResponseFormat { + @doc(""" + The type of response format being defined: `json_schema` + """) + type: "json_schema"; + + /** The definition of the required JSON schema in the response, and associated metadata. */ + json_schema: ChatCompletionsResponseFormatJsonSchemaDefinition; +} + alias ChatChoiceCommon = { @doc("The ordered index associated with this chat completions choice.") index: int32; @@ -334,7 +385,7 @@ model ChatRequestAssistantMessage extends ChatRequestMessage { #suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API" @doc("The content of the message.") - content?: string | null; + content?: string; @encodedName("application/json", "tool_calls") @doc(""" @@ -351,7 +402,7 @@ model ChatRequestToolMessage extends ChatRequestMessage { #suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API" @doc("The content of the message.") - content: string | null; + content?: string; @encodedName("application/json", "tool_call_id") @doc("The ID of the tool call resolved by the provided content.") @@ -392,13 +443,11 @@ model ChatResponseMessage { toolCalls?: ChatCompletionsToolCall[]; } -// tool_choice: "auto" | "none" | "required" | {"type": "function", "function": {"name": string}} - #suppress "@azure-tools/typespec-autorest/union-unsupported" "External API shape is defined in OpenAPI 3.0 as oneOf." -alias ChatCompletionsToolSelection = ChatCompletionsToolSelectionPreset | ChatCompletionsNamedToolSelection; +alias ChatCompletionsToolChoice = ChatCompletionsToolChoicePreset | ChatCompletionsNamedToolChoice; @doc("Represents a generic policy for how a chat completions tool may be selected.") -union ChatCompletionsToolSelectionPreset { +union ChatCompletionsToolChoicePreset { string, @doc(""" @@ -420,16 +469,16 @@ union ChatCompletionsToolSelectionPreset { } @doc("A tool selection of a specific, named function tool that will limit chat completions to using the named function.") -model ChatCompletionsNamedToolSelection { +model ChatCompletionsNamedToolChoice { @doc("The type of the tool. Currently, only `function` is supported.") type: "function"; @doc("The function that should be called.") - function: ChatCompletionsFunctionToolSelection; + function: ChatCompletionsNamedToolChoiceFunction; } @doc("A tool selection of a specific, named function tool that will limit chat completions to using the named function.") -model ChatCompletionsFunctionToolSelection { +model ChatCompletionsNamedToolChoiceFunction { @doc("The name of the function that should be called.") name: string; } diff --git a/specification/ai/ModelClient/models/common.tsp b/specification/ai/ModelClient/models/common.tsp index 5c7610ae95cb..e21a83db031a 100644 --- a/specification/ai/ModelClient/models/common.tsp +++ b/specification/ai/ModelClient/models/common.tsp @@ -10,13 +10,13 @@ namespace AI.Model; union EmbeddingInputType { string, - @doc("to do") + @doc("Indicates the input is a general text input.") text: "text", - @doc("to do") + @doc("Indicates the input represents a search query to find the most relevant documents in your vector database.") query: "query", - @doc("to do") + @doc("Indicates the input represents a document that is stored in a vector database.") document: "document", } @@ -52,6 +52,9 @@ union EmbeddingEncodingFormat { recommendations, and other similar scenarios. """) model EmbeddingsResult { + @doc("Unique identifier for the embeddings result.") + id: string; + @doc("Embedding values for the prompts submitted in the request.") data: EmbeddingItem[]; @@ -109,23 +112,23 @@ model ModelInfo { union ModelType { string, - @doc("Embeddings.") + @doc("A model capable of generating embeddings from a text") embeddings: "embeddings", - @doc("Image generation") + @doc("A model capable of generating images from an image and text description") image_generation: "image_generation", - @doc("Text generation") + @doc("A text generation model") text_generation: "text_generation", - @doc("Image embeddings") + @doc("A model capable of generating embeddings from an image") image_embeddings: "image_embeddings", - @doc("Audio generation") + @doc("A text-to-audio generative model") audio_generation: "audio_generation", - @doc("Chat completions") - chat: "chat", + @doc("A model capable of taking chat-formatted messages and generate responses") + chat_completion: "chat_completion", } alias AdditionalRequestHeaders = { diff --git a/specification/ai/ModelClient/models/embeddings.tsp b/specification/ai/ModelClient/models/embeddings.tsp index 3b3fbf54fb91..8e2aa8e1751f 100644 --- a/specification/ai/ModelClient/models/embeddings.tsp +++ b/specification/ai/ModelClient/models/embeddings.tsp @@ -6,7 +6,6 @@ using TypeSpec.Http; namespace AI.Model; -#suppress "@azure-tools/typespec-azure-core/bad-record-type" alias EmbeddingsOptions = { @doc(""" Input text to embed, encoded as a string or array of tokens. @@ -38,6 +37,5 @@ alias EmbeddingsOptions = { """) `model`?: string; - #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; }; diff --git a/specification/ai/ModelClient/models/image_embeddings.tsp b/specification/ai/ModelClient/models/image_embeddings.tsp index dcea3f1b8358..712eaa8a9985 100644 --- a/specification/ai/ModelClient/models/image_embeddings.tsp +++ b/specification/ai/ModelClient/models/image_embeddings.tsp @@ -6,13 +6,12 @@ using TypeSpec.Http; namespace AI.Model; -#suppress "@azure-tools/typespec-azure-core/bad-record-type" alias ImageEmbeddingsOptions = { @doc(""" Input image to embed. To embed multiple inputs in a single request, pass an array. The input must not exceed the max input tokens for the model. """) - input: EmbeddingInput[]; + input: ImageEmbeddingInput[]; @doc(""" Optional. The number of dimensions the resulting output embeddings should have. @@ -39,14 +38,13 @@ alias ImageEmbeddingsOptions = { """) `model`?: string; - #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; }; @doc("Represents an image with optional text.") -model EmbeddingInput { +model ImageEmbeddingInput { @doc(""" - The input image, in PNG format. + The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`. """) image: string; diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 01af21e11251..627e12438822 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -12,6 +12,7 @@ linter: disable: "@azure-tools/typespec-azure-core/casing-style": "Disabled since JSON payload in REST API does not conform to Azure guidelines with regards to casing" "@azure-tools/typespec-azure-core/no-string-discriminator": "Use an extensible union instead of a plain string" + "@azure-tools/typespec-azure-core/bad-record-type": "REST API does not conform to Azure guidelines with regards to record types" options: "@azure-tools/typespec-autorest": azure-resource-provider-folder: "data-plane" diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index ad6f6d401407..87c669a94828 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -293,7 +293,7 @@ "type": "array", "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", "items": { - "$ref": "#/definitions/EmbeddingInput" + "$ref": "#/definitions/ImageEmbeddingInput" }, "x-ms-identifiers": [] }, @@ -518,20 +518,7 @@ "choices" ] }, - "ChatCompletionsFunctionToolSelection": { - "type": "object", - "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", - "properties": { - "name": { - "type": "string", - "description": "The name of the function that should be called." - } - }, - "required": [ - "name" - ] - }, - "ChatCompletionsNamedToolSelection": { + "ChatCompletionsNamedToolChoice": { "type": "object", "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", "properties": { @@ -546,7 +533,7 @@ } }, "function": { - "$ref": "#/definitions/ChatCompletionsFunctionToolSelection", + "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction", "description": "The function that should be called." } }, @@ -555,6 +542,19 @@ "function" ] }, + "ChatCompletionsNamedToolChoiceFunction": { + "type": "object", + "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function that should be called." + } + }, + "required": [ + "name" + ] + }, "ChatCompletionsOptions": { "type": "object", "description": "The ChatCompletionsOptions model", @@ -612,7 +612,7 @@ }, "response_format": { "$ref": "#/definitions/ChatCompletionsResponseFormat", - "description": "The format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message." + "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." }, "stop": { "type": "array", @@ -664,7 +664,7 @@ "type" ] }, - "ChatCompletionsResponseFormatJSON": { + "ChatCompletionsResponseFormatJsonObject": { "type": "object", "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", "allOf": [ @@ -674,6 +674,53 @@ ], "x-ms-discriminator-value": "json_object" }, + "ChatCompletionsResponseFormatJsonSchema": { + "type": "object", + "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.", + "properties": { + "json_schema": { + "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition", + "description": "The definition of the required JSON schema in the response, and associated metadata." + } + }, + "required": [ + "json_schema" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatCompletionsResponseFormat" + } + ], + "x-ms-discriminator-value": "json_schema" + }, + "ChatCompletionsResponseFormatJsonSchemaDefinition": { + "type": "object", + "description": "Defines the response format for chat completions as JSON with a given schema. The AI model\nwill need to adhere to this schema when generating completions.", + "properties": { + "name": { + "type": "string", + "description": "A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64." + }, + "schema": { + "type": "object", + "description": "The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.\nNote that AI models usually only support a subset of the keywords defined by JSON schema. Consult your AI model documentation\nto determine what is supported.", + "additionalProperties": {} + }, + "description": { + "type": "string", + "description": "A description of the response format, used by the AI model to determine how to generate responses in this format." + }, + "strict": { + "type": "boolean", + "description": "If set to true, the service will error out if the provided JSON schema contains keywords\nnot supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.\nIf false, and the provided JSON schema contains keywords not supported\nby the AI model, the AI model will not error out. Instead it will ignore the unsupported keywords.", + "default": false + } + }, + "required": [ + "name", + "schema" + ] + }, "ChatCompletionsResponseFormatText": { "type": "object", "description": "A response format for Chat Completions that emits text responses. This is the default response format.", @@ -713,31 +760,7 @@ "function" ] }, - "ChatCompletionsToolDefinition": { - "type": "object", - "description": "The definition of a chat completions tool that can call a function.", - "properties": { - "type": { - "type": "string", - "description": "The type of the tool. Currently, only `function` is supported.", - "enum": [ - "function" - ], - "x-ms-enum": { - "modelAsString": false - } - }, - "function": { - "$ref": "#/definitions/FunctionDefinition", - "description": "The function definition details for the function tool." - } - }, - "required": [ - "type", - "function" - ] - }, - "ChatCompletionsToolSelectionPreset": { + "ChatCompletionsToolChoicePreset": { "type": "string", "description": "Represents a generic policy for how a chat completions tool may be selected.", "enum": [ @@ -746,7 +769,7 @@ "required" ], "x-ms-enum": { - "name": "ChatCompletionsToolSelectionPreset", + "name": "ChatCompletionsToolChoicePreset", "modelAsString": true, "values": [ { @@ -767,6 +790,30 @@ ] } }, + "ChatCompletionsToolDefinition": { + "type": "object", + "description": "The definition of a chat completions tool that can call a function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the tool. Currently, only `function` is supported.", + "enum": [ + "function" + ], + "x-ms-enum": { + "modelAsString": false + } + }, + "function": { + "$ref": "#/definitions/FunctionDefinition", + "description": "The function definition details for the function tool." + } + }, + "required": [ + "type", + "function" + ] + }, "ChatMessageContentItem": { "type": "object", "description": "An abstract representation of a structured content item within a chat message.", @@ -873,8 +920,7 @@ "properties": { "content": { "type": "string", - "description": "The content of the message.", - "x-nullable": true + "description": "The content of the message." }, "tool_calls": { "type": "array", @@ -931,8 +977,7 @@ "properties": { "content": { "type": "string", - "description": "The content of the message.", - "x-nullable": true + "description": "The content of the message." }, "tool_call_id": { "type": "string", @@ -941,7 +986,6 @@ } }, "required": [ - "content", "tool_call_id" ], "allOf": [ @@ -1142,23 +1186,6 @@ ] } }, - "EmbeddingInput": { - "type": "object", - "description": "Represents an image with optional text.", - "properties": { - "image": { - "type": "string", - "description": "The input image, in PNG format." - }, - "text": { - "type": "string", - "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." - } - }, - "required": [ - "image" - ] - }, "EmbeddingInputType": { "type": "string", "description": "Represents the input types used for embedding search.", @@ -1174,17 +1201,17 @@ { "name": "text", "value": "text", - "description": "to do" + "description": "Indicates the input is a general text input." }, { "name": "query", "value": "query", - "description": "to do" + "description": "Indicates the input represents a search query to find the most relevant documents in your vector database." }, { "name": "document", "value": "document", - "description": "to do" + "description": "Indicates the input represents a document that is stored in a vector database." } ] } @@ -1211,6 +1238,10 @@ "type": "object", "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.", "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the embeddings result." + }, "data": { "type": "array", "description": "Embedding values for the prompts submitted in the request.", @@ -1229,6 +1260,7 @@ } }, "required": [ + "id", "data", "usage", "model" @@ -1322,6 +1354,23 @@ "name" ] }, + "ImageEmbeddingInput": { + "type": "object", + "description": "Represents an image with optional text.", + "properties": { + "image": { + "type": "string", + "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." + }, + "text": { + "type": "string", + "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." + } + }, + "required": [ + "image" + ] + }, "ModelInfo": { "type": "object", "description": "Represents some basic information about the AI model.", @@ -1354,7 +1403,7 @@ "text_generation", "image_embeddings", "audio_generation", - "chat" + "chat_completion" ], "x-ms-enum": { "name": "ModelType", @@ -1363,32 +1412,32 @@ { "name": "embeddings", "value": "embeddings", - "description": "Embeddings." + "description": "A model capable of generating embeddings from a text" }, { "name": "image_generation", "value": "image_generation", - "description": "Image generation" + "description": "A model capable of generating images from an image and text description" }, { "name": "text_generation", "value": "text_generation", - "description": "Text generation" + "description": "A text generation model" }, { "name": "image_embeddings", "value": "image_embeddings", - "description": "Image embeddings" + "description": "A model capable of generating embeddings from an image" }, { "name": "audio_generation", "value": "audio_generation", - "description": "Audio generation" + "description": "A text-to-audio generative model" }, { - "name": "chat", - "value": "chat", - "description": "Chat completions" + "name": "chat_completion", + "value": "chat_completion", + "description": "A model capable of taking chat-formatted messages and generate responses" } ] } From d4c101df2f391139092af081c701e2ceabb083a7 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Fri, 31 Jan 2025 08:42:38 -0800 Subject: [PATCH 27/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 - 1 file changed, 1 deletion(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 7c195f92202b..f0dea205585b 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -82,7 +82,6 @@ namespace Customizations; // The actual name here doesn't matter and is here for "ImageDetailLevel", "python" ); -@@clientName(AI.Model.ChatCompletionsResponseFormatJSON, "ChatCompletionsResponseFormatJson", "java"); @client({ name: "ChatCompletionsClient", From dd22af85cd1c08b47e9f5828fd60eab4b1bc8b38 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Tue, 18 Feb 2025 09:20:35 -0800 Subject: [PATCH 28/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index f0dea205585b..ab6f417d859d 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -12,6 +12,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for // say we still want the models they use to be public. @@access(AI.Model.ChatChoice, Access.public); @@access(AI.Model.ChatCompletions, Access.public); +@@access(AI.Model.ChatCompletionsOptions, Access.public, "java"); @@access(AI.Model.ChatCompletionsToolCall, Access.public); @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); @@access(AI.Model.ChatCompletionsNamedToolChoice, Access.public); From 07da174a2394404154d37b3f065fb2903ee279e3 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Tue, 18 Feb 2025 12:59:41 -0800 Subject: [PATCH 29/38] Update main.tsp --- specification/ai/ModelClient/main.tsp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/specification/ai/ModelClient/main.tsp b/specification/ai/ModelClient/main.tsp index d3958a8697c1..47a59cea0724 100644 --- a/specification/ai/ModelClient/main.tsp +++ b/specification/ai/ModelClient/main.tsp @@ -29,4 +29,7 @@ enum Versions { @useDependency(Azure.Core.Versions.v1_0_Preview_2) @doc("The 2024-05-01-preview version of the AI.Model service.") v2024_05_01_Preview: "2024-05-01-preview", + @useDependency(Azure.Core.Versions.v1_0_Preview_2) + @doc("The 2024-08-01-preview version of the AI.Model service.") + v2024_08_01_Preview: "2024-08-01-preview", } From 4277cf43ebedd8f4f72074ad357e2024d8d55f79 Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Tue, 18 Feb 2025 13:00:08 -0800 Subject: [PATCH 30/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index ab6f417d859d..7b7162659c3c 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -5,7 +5,7 @@ import "./main.tsp"; using Azure.ClientGenerator.Core; using TypeSpec.Versioning; -@useDependency(AI.Model.Versions.v2024_05_01_Preview) +@useDependency(AI.Model.Versions.v2024_08_01_Preview) namespace Customizations; // The actual name here doesn't matter and is here for organization purposes only // Since we made all operator methods internal, we need to expliclty From c8db7c028659e151e6e9a0edf36ebeea182e3bac Mon Sep 17 00:00:00 2001 From: Glenn Harper Date: Wed, 19 Feb 2025 08:29:16 -0800 Subject: [PATCH 31/38] Add ChatRequestDeveloperMessage --- specification/ai/ModelClient/client.tsp | 4 + .../ModelClient/models/chat_completions.tsp | 15 + .../preview/2024-05-01-preview/openapi.json | 309 +++++++++++------- 3 files changed, 213 insertions(+), 115 deletions(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 7b7162659c3c..ea0e9fc0e65b 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -54,6 +54,10 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.StreamingChatChoiceUpdate, Access.public); @@access(AI.Model.StreamingChatResponseMessageUpdate, Access.public); @@access(AI.Model.StreamingChatResponseToolCallUpdate, Access.public); +@@access(AI.Model.ChatRequestDeveloperMessage, + Access.public, + "csharp,java,javascript" +); // The operators need to be hidden, since we hand-write the public versions of those to // 1. Add chat completions streaming (to getChatCompletions operator) diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 886938346885..8de12e7d9094 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -236,6 +236,9 @@ union ChatRole { @doc("The role that represents extension tool activity within a chat completions operation.") tool: "tool", + + @doc("The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role.") + developer: "developer", } @doc(""" @@ -368,6 +371,18 @@ model ChatRequestSystemMessage extends ChatRequestMessage { content: string; } +@doc(""" + A request chat message containing developer instructions that influence how the model will generate a chat completions + response. Some AI models support a developer message instead of a system message. + """) +model ChatRequestDeveloperMessage extends ChatRequestMessage { + @doc("The chat role associated with this message, which is always 'developer' for developer messages.") + role: ChatRole.developer; + + @doc("The contents of the developer message.") + content: string; +} + @doc("A request chat message representing user input to the assistant.") model ChatRequestUserMessage extends ChatRequestMessage { @doc("The chat role associated with this message, which is always 'user' for user messages.") diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index 87c669a94828..88076c03ce5e 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -90,10 +90,100 @@ { "name": "body", "in": "body", - "description": "request options to pass to the endpoint using complete path", "required": true, "schema": { - "$ref": "#/definitions/ChatCompletionsOptions" + "type": "object", + "properties": { + "messages": { + "type": "array", + "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatRequestMessage" + }, + "x-ms-identifiers": [] + }, + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "stream": { + "type": "boolean", + "description": "A value indicating whether chat completions should be streamed for this request." + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "temperature": { + "type": "number", + "format": "float", + "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 0.7, + "minimum": 0, + "maximum": 1 + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 1, + "minimum": 0, + "maximum": 1 + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens to generate.", + "minimum": 0 + }, + "response_format": { + "$ref": "#/definitions/ChatCompletionsResponseFormat", + "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." + }, + "stop": { + "type": "array", + "description": "A collection of textual sequences that will end completions generation.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "tools": { + "type": "array", + "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatCompletionsToolDefinition" + }, + "x-ms-identifiers": [] + }, + "tool_choice": { + "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", + "x-ms-client-name": "toolChoice" + }, + "seed": { + "type": "integer", + "format": "int64", + "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "messages" + ], + "additionalProperties": {} } } ], @@ -172,7 +262,6 @@ { "name": "body", "in": "body", - "description": "request options to pass to the endpoint using embeddings path", "required": true, "schema": { "type": "object", @@ -284,7 +373,6 @@ { "name": "body", "in": "body", - "description": "request options to pass to the endpoint using images embeddings path", "required": true, "schema": { "type": "object", @@ -355,7 +443,7 @@ "/info": { "get": { "operationId": "GetModelInfo", - "description": "Returns information about the AI model.\nThe method makes a REST API call to the `/info` route on the given endpoint.", + "description": "Returns information about the AI model.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API or Managed Compute endpoint.\nIt will not work for GitHub Models endpoint or Azure OpenAI endpoint.", "parameters": [ { "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" @@ -393,6 +481,30 @@ } }, "definitions": { + "AudioContentFormat": { + "type": "string", + "description": "A representation of the possible audio formats for audio.", + "enum": [ + "wav", + "mp3" + ], + "x-ms-enum": { + "name": "AudioContentFormat", + "modelAsString": true, + "values": [ + { + "name": "wav", + "value": "wav", + "description": "Specifies audio in WAV format." + }, + { + "name": "mp3", + "value": "mp3", + "description": "Specifies audio in MP3 format." + } + ] + } + }, "Azure.Core.Foundations.Error": { "type": "object", "description": "The error object.", @@ -496,10 +608,6 @@ "type": "string", "description": "The model used for the chat completion." }, - "usage": { - "$ref": "#/definitions/CompletionsUsage", - "description": "Usage information for tokens processed and generated as part of this completions operation." - }, "choices": { "type": "array", "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", @@ -508,14 +616,18 @@ "$ref": "#/definitions/ChatChoice" }, "x-ms-identifiers": [] + }, + "usage": { + "$ref": "#/definitions/CompletionsUsage", + "description": "Usage information for tokens processed and generated as part of this completions operation." } }, "required": [ "id", "created", "model", - "usage", - "choices" + "choices", + "usage" ] }, "ChatCompletionsNamedToolChoice": { @@ -555,101 +667,6 @@ "name" ] }, - "ChatCompletionsOptions": { - "type": "object", - "description": "The ChatCompletionsOptions model", - "properties": { - "messages": { - "type": "array", - "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatRequestMessage" - }, - "x-ms-identifiers": [] - }, - "frequency_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "stream": { - "type": "boolean", - "description": "A value indicating whether chat completions should be streamed for this request." - }, - "presence_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "temperature": { - "type": "number", - "format": "float", - "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 0.7, - "minimum": 0, - "maximum": 1 - }, - "top_p": { - "type": "number", - "format": "float", - "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 1, - "minimum": 0, - "maximum": 1 - }, - "max_tokens": { - "type": "integer", - "format": "int32", - "description": "The maximum number of tokens to generate.", - "minimum": 0 - }, - "response_format": { - "$ref": "#/definitions/ChatCompletionsResponseFormat", - "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." - }, - "stop": { - "type": "array", - "description": "A collection of textual sequences that will end completions generation.", - "minItems": 1, - "items": { - "type": "string" - } - }, - "tools": { - "type": "array", - "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatCompletionsToolDefinition" - }, - "x-ms-identifiers": [] - }, - "tool_choice": { - "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", - "x-ms-client-name": "toolChoice" - }, - "seed": { - "type": "integer", - "format": "int64", - "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "messages" - ], - "additionalProperties": {} - }, "ChatCompletionsResponseFormat": { "type": "object", "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", @@ -695,7 +712,7 @@ }, "ChatCompletionsResponseFormatJsonSchemaDefinition": { "type": "object", - "description": "Defines the response format for chat completions as JSON with a given schema. The AI model\nwill need to adhere to this schema when generating completions.", + "description": "Defines the response format for chat completions as JSON with a given schema.\nThe AI model will need to adhere to this schema when generating completions.", "properties": { "name": { "type": "string", @@ -703,7 +720,7 @@ }, "schema": { "type": "object", - "description": "The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.\nNote that AI models usually only support a subset of the keywords defined by JSON schema. Consult your AI model documentation\nto determine what is supported.", + "description": "The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.\nNote that AI models usually only support a subset of the keywords defined by JSON schema.\nConsult your AI model documentation to determine what is supported.", "additionalProperties": {} }, "description": { @@ -712,7 +729,7 @@ }, "strict": { "type": "boolean", - "description": "If set to true, the service will error out if the provided JSON schema contains keywords\nnot supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.\nIf false, and the provided JSON schema contains keywords not supported\nby the AI model, the AI model will not error out. Instead it will ignore the unsupported keywords.", + "description": "If set to true, the service will error out if the provided JSON schema contains keywords\nnot supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.\nIf false, and the provided JSON schema contains keywords not supported by the AI model,\nthe AI model will not error out. Instead it will ignore the unsupported keywords.", "default": false } }, @@ -814,6 +831,26 @@ "function" ] }, + "ChatMessageAudioContentItem": { + "type": "object", + "description": "A structured chat content item containing an audio content.", + "properties": { + "input_audio": { + "$ref": "#/definitions/ChatMessageInputAudio", + "description": "The details of the input audio.", + "x-ms-client-name": "inputAudio" + } + }, + "required": [ + "input_audio" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatMessageContentItem" + } + ], + "x-ms-discriminator-value": "input_audio" + }, "ChatMessageContentItem": { "type": "object", "description": "An abstract representation of a structured content item within a chat message.", @@ -895,6 +932,24 @@ "url" ] }, + "ChatMessageInputAudio": { + "type": "object", + "description": "The details of an audio chat message content part.", + "properties": { + "data": { + "type": "string", + "description": "Base64 encoded audio data" + }, + "format": { + "$ref": "#/definitions/AudioContentFormat", + "description": "The audio format of the audio content." + } + }, + "required": [ + "data", + "format" + ] + }, "ChatMessageTextContentItem": { "type": "object", "description": "A structured chat content item containing plain text.", @@ -938,6 +993,25 @@ ], "x-ms-discriminator-value": "assistant" }, + "ChatRequestDeveloperMessage": { + "type": "object", + "description": "A request chat message containing developer instructions that influence how the model will generate a chat completions\nresponse. Some AI models support a developer message instead of a system message.", + "properties": { + "content": { + "type": "string", + "description": "The contents of the developer message." + } + }, + "required": [ + "content" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatRequestMessage" + } + ], + "x-ms-discriminator-value": "developer" + }, "ChatRequestMessage": { "type": "object", "description": "An abstract representation of a chat message as provided in a request.", @@ -1047,7 +1121,8 @@ "system", "user", "assistant", - "tool" + "tool", + "developer" ], "x-ms-enum": { "name": "ChatRole", @@ -1072,6 +1147,11 @@ "name": "tool", "value": "tool", "description": "The role that represents extension tool activity within a chat completions operation." + }, + { + "name": "developer", + "value": "developer", + "description": "The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role." } ] } @@ -1484,10 +1564,6 @@ "type": "string", "description": "The model used for the chat completion." }, - "usage": { - "$ref": "#/definitions/CompletionsUsage", - "description": "Usage information for tokens processed and generated as part of this completions operation." - }, "choices": { "type": "array", "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", @@ -1496,13 +1572,16 @@ "$ref": "#/definitions/StreamingChatChoiceUpdate" }, "x-ms-identifiers": [] + }, + "usage": { + "$ref": "#/definitions/CompletionsUsage", + "description": "Usage information for tokens processed and generated as part of this completions operation." } }, "required": [ "id", "created", "model", - "usage", "choices" ] }, From e312e05d644feff5723a68e565c0c8f10c4b87cd Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:38:53 -0800 Subject: [PATCH 32/38] Update client.tsp --- specification/ai/ModelClient/client.tsp | 1 - 1 file changed, 1 deletion(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index ea0e9fc0e65b..4052f532cf0c 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -12,7 +12,6 @@ namespace Customizations; // The actual name here doesn't matter and is here for // say we still want the models they use to be public. @@access(AI.Model.ChatChoice, Access.public); @@access(AI.Model.ChatCompletions, Access.public); -@@access(AI.Model.ChatCompletionsOptions, Access.public, "java"); @@access(AI.Model.ChatCompletionsToolCall, Access.public); @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); @@access(AI.Model.ChatCompletionsNamedToolChoice, Access.public); From 8f5ad43ec297181e26d5c10f2ebbdc9ce636fd0c Mon Sep 17 00:00:00 2001 From: Glenn Harper <64209257+glharper@users.noreply.github.com> Date: Wed, 19 Feb 2025 18:05:27 -0800 Subject: [PATCH 33/38] readd chatcompletionsoptions as public class --- specification/ai/ModelClient/client.tsp | 1 + 1 file changed, 1 insertion(+) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 4052f532cf0c..ea0e9fc0e65b 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -12,6 +12,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for // say we still want the models they use to be public. @@access(AI.Model.ChatChoice, Access.public); @@access(AI.Model.ChatCompletions, Access.public); +@@access(AI.Model.ChatCompletionsOptions, Access.public, "java"); @@access(AI.Model.ChatCompletionsToolCall, Access.public); @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); @@access(AI.Model.ChatCompletionsNamedToolChoice, Access.public); From 70d146a18939fcfd36661adc0bb123046cd3d19b Mon Sep 17 00:00:00 2001 From: Srikanta <51379715+srnagar@users.noreply.github.com> Date: Mon, 31 Mar 2025 10:57:12 -0700 Subject: [PATCH 34/38] Update @service tag to work with latest TypeSpec version 0.67 --- specification/ai/ModelClient/main.tsp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ai/ModelClient/main.tsp b/specification/ai/ModelClient/main.tsp index 47a59cea0724..d19a8b7ad0cc 100644 --- a/specification/ai/ModelClient/main.tsp +++ b/specification/ai/ModelClient/main.tsp @@ -18,7 +18,7 @@ using Azure.Core; } ]> ) -@service({ +@service(#{ title: "AI Model Inference", }) @versioned(AI.Model.Versions) From 57c059585510b8963761b1622a5f2febdc7119bb Mon Sep 17 00:00:00 2001 From: Travis Angevine Date: Mon, 7 Apr 2025 09:17:41 -0700 Subject: [PATCH 35/38] Merge from feature branch (#33660) * Go back to `parameters?: unknown` in FunctionDefinition model * Add missing CompletionsFinishReason comments. Update tool choice related models to use the work `choice` instead of selection, similar to OAI TypeSpec * Update JSON examples. Fix client.tsp after previous commit to rename models * Add ID back to EmbeddingsResult model. Add comment to getModelInfo that it only works for MaaS/MaaP * Update comments about hand-written operator methods * Update teh ModelInfo enum * Rename EmbeddingInput to ImageEmbeddingInput (#31953) * Add Structured Outputs (#31962) * Add support for structured outputs Signed-off-by: trangevi * Naming consistency (#31966) Signed-off-by: trangevi * Fixes following `npx tsp format **\*.tsp` * Fix a couple of errors from `npx tsv .` * Add missing class description. Add updated swagger file * Update description * Remove the in-line definition * Make all response format models internal (#32005) * Make 'usage' optional in streaming response update (#32078) * Make message classes internal (#32103) * Add new input audio content item (#32287) * Add new audio content item Signed-off-by: trangevi * Update swagger Signed-off-by: trangevi --------- Signed-off-by: trangevi * Rename Audio Classes to Match Convention (#32304) * Add new audio content item Signed-off-by: trangevi * Update swagger Signed-off-by: trangevi * Rename to match convention Signed-off-by: trangevi --------- Signed-off-by: trangevi * Apply new local cspell * Fix wrongly deleted new cspell in the root * Add developer role to support new GPT models (#32474) * Fix tcgc error due to Record (#32719) * Fix tcgc error due to Record Signed-off-by: trangevi * Update other options objects to model Signed-off-by: trangevi * Fix compile errors. Update swagger Signed-off-by: trangevi --------- Signed-off-by: trangevi * Merge C# feature branch into primary Azure.AI.Inference branch (#31869) * Update ImageEmbeddingsClient to change embed to embedImage Signed-off-by: Travis Angevine * Add typespec-csharp to the tspconfig Signed-off-by: Travis Angevine * Move csharp client property renames Signed-off-by: Travis Angevine * Missing semicolons Signed-off-by: Travis Angevine * Wrong namespace Signed-off-by: Travis Angevine * Incorrect namespaces Signed-off-by: Travis Angevine * Aliases need to have things referenced differently, apparently Signed-off-by: Travis Angevine * Need to specify parameters? Signed-off-by: Travis Angevine * Move alias renames to the individual file for now Signed-off-by: Travis Angevine * Change ChatCompletionsOptions to a model Signed-off-by: Travis Angevine * syntax Signed-off-by: Travis Angevine * Leave the model property Signed-off-by: Travis Angevine * Add additional properties for ChatCompletionsOptions Signed-off-by: Travis Angevine * 3 dots, not 2 Signed-off-by: Travis Angevine * Can't add docstring to generic spread property Signed-off-by: Travis Angevine * Change unknown parameters to extra parameters Signed-off-by: Travis Angevine * Remove unnecessary rename Signed-off-by: Travis Angevine * Removing api-key change for python, for now Signed-off-by: Travis Angevine * Make operator method internal Signed-off-by: Travis Angevine * Fix pass-through header Signed-off-by: Travis Angevine * Re-enable protocol methods for csharp Signed-off-by: Travis Angevine * Remove "spread" for ChatCompletionsOptions Signed-off-by: Travis Angevine * Remove spread for additionalRequestHeaders Signed-off-by: Travis Angevine * needed commas Signed-off-by: Travis Angevine * another formatting attempt Signed-off-by: Travis Angevine * okay, figured out how the formatting works Signed-off-by: Travis Angevine * Alias breaks the generator Signed-off-by: Travis Angevine * A model won't give us the interaction pattern that we want. Trying this. Signed-off-by: Travis Angevine * Forgot semicolon Signed-off-by: Travis Angevine * Does swapping the order of parameters matter? Signed-off-by: Travis Angevine * Make extraParams optional? Signed-off-by: Travis Angevine * Try and get rid of anonymous model Signed-off-by: Travis Angevine * Try adding RequestHeadersTrait Signed-off-by: Travis Angevine * Maybe needs to be an object? Signed-off-by: Travis Angevine * Add using Signed-off-by: Travis Angevine * Revert to "good" state Signed-off-by: Travis Angevine * Try moving extra parameters. Change Parameters back to unknown Signed-off-by: Travis Angevine * Skip the alias Signed-off-by: Travis Angevine * Is it the header which is causing issues? Signed-off-by: Travis Angevine * Headers in the options object seems to not work Signed-off-by: Travis Angevine * Remove commented operation Signed-off-by: Travis Angevine * Unnecessary using Signed-off-by: Travis Angevine * Add embeddings for csharp Signed-off-by: Travis Angevine * Client changes for C# Signed-off-by: Travis Angevine * Need the options objects to be models Signed-off-by: Travis Angevine * Make the embedding methods public for now, for intial testing Signed-off-by: Travis Angevine * Remove image embedding for now until it can be tested Signed-off-by: Travis Angevine * Add new streaming classes for C# Signed-off-by: Travis Angevine * Revert to Darren's branch to test Signed-off-by: trangevi * Explicitly set the completions options object to be an input Signed-off-by: trangevi * Other clients with spread parameters Signed-off-by: trangevi * No image embedding client yet, so leave this out for now Signed-off-by: trangevi * Operations need to be internal for all languages Signed-off-by: trangevi * Comment Signed-off-by: trangevi * Add support for structured outputs Signed-off-by: trangevi * We already had the property, just need to add one more option Signed-off-by: trangevi * Remove unnecessary inner class Signed-off-by: trangevi * Rename Signed-off-by: trangevi * extra semicolon Signed-off-by: trangevi * Fix naming Signed-off-by: trangevi * What if it's just unknown? Signed-off-by: trangevi * unknown to record unknown Signed-off-by: trangevi * Leave as unknown for now Signed-off-by: trangevi * Add image embeddings client back to csharp Signed-off-by: trangevi * Generation is fixed, switch back to record Signed-off-by: trangevi * Make ImageEmbeddingsOptions explicitly an input Signed-off-by: trangevi * First attempt. Make the main content item a parent of the two relevant ones, will require a custom factory Signed-off-by: trangevi * Do I need this explicitly public? Signed-off-by: trangevi * What about usage? Signed-off-by: trangevi * More inputs Signed-off-by: trangevi * Remove middle layer Signed-off-by: trangevi * Missed some Signed-off-by: trangevi * TSP formatting and swagger changes Signed-off-by: trangevi * Another format change Signed-off-by: trangevi * Update with some changes to better reflect the desired merge Signed-off-by: trangevi * Mark options object as internal to address transitive access issue Signed-off-by: trangevi * review comments Signed-off-by: trangevi * Add renaming for python classes Signed-off-by: trangevi * Update after npx command Signed-off-by: trangevi --------- Signed-off-by: Travis Angevine Signed-off-by: trangevi * Inference JS/TS merge with main Inference Branch (#33553) * Update tspconfig.yaml * Update tspconfig.yaml * Update tspconfig.yaml * Update tspconfig.yaml * Update tspconfig.yaml * Update tspconfig.yaml * Update tspconfig.yaml * Update tspconfig.yaml * Update client.tsp * Update client.tsp - remove embeddingsClient and ImageEmbeddingsClient * Update tspconfig.yaml * Update main.tsp * Update main.tsp * change namespace to Model * use namespace AI.Model * Use AI.Model instead of ModelClient * Update client.tsp * Delete specification/ai/ModelClient/models/image_embeddings.tsp * Delete specification/ai/ModelClient/models/embeddings.tsp * Update common.tsp * Update routes.tsp * Update client.tsp * Update chat_completions.tsp * Update chat_completions.tsp * access fix * use model keyword * trying to fix ts gen * more fixes * more fixes * revert chatcompletionsoptions to alias * revert to alias * Update tspconfig.yaml * Update client.tsp * Attempt to merge Darren's branch (#32066) * Go back to `parameters?: unknown` in FunctionDefinition model * Add missing CompletionsFinishReason comments. Update tool choice related models to use the work `choice` instead of selection, similar to OAI TypeSpec * Update JSON examples. Fix client.tsp after previous commit to rename models * Add ID back to EmbeddingsResult model. Add comment to getModelInfo that it only works for MaaS/MaaP * Update comments about hand-written operator methods * Update teh ModelInfo enum * Rename EmbeddingInput to ImageEmbeddingInput (#31953) * Add Structured Outputs (#31962) * Add support for structured outputs Signed-off-by: trangevi * Naming consistency (#31966) Signed-off-by: trangevi * Fixes following `npx tsp format **\*.tsp` * Fix a couple of errors from `npx tsv .` * Add missing class description. Add updated swagger file * Update description * Remove the in-line definition * Make all response format models internal (#32005) * Update client.tsp - alias ChatCompletionsResponseFormatJsonObject in JS to avoid breaking change * Update chat_completions.tsp --------- Signed-off-by: trangevi Co-authored-by: Darren Cohen <39422044+dargilco@users.noreply.github.com> Co-authored-by: Glenn Harper <64209257+glharper@users.noreply.github.com> * Make ChatCompletionsOptions public in Java * add alias for ChatCompletionsResponseFormatJsonObject * add developer chat role * remove java specific augment * use correct names for audio content classes * fix tsp error * fix anonymous object error (#33514) Signed-off-by: trangevi * Merge main inference branch into js feature branch (#33451) * Apply new local cspell * Fix wrongly deleted new cspell in the root * Add developer role to support new GPT models (#32474) * Fix tcgc error due to Record (#32719) * Fix tcgc error due to Record Signed-off-by: trangevi * Update other options objects to model Signed-off-by: trangevi * Fix compile errors. Update swagger Signed-off-by: trangevi --------- Signed-off-by: trangevi * Merge C# feature branch into primary Azure.AI.Inference branch (#31869) * Update ImageEmbeddingsClient to change embed to embedImage Signed-off-by: Travis Angevine * Add typespec-csharp to the tspconfig Signed-off-by: Travis Angevine * Move csharp client property renames Signed-off-by: Travis Angevine * Missing semicolons Signed-off-by: Travis Angevine * Wrong namespace Signed-off-by: Travis Angevine * Incorrect namespaces Signed-off-by: Travis Angevine * Aliases need to have things referenced differently, apparently Signed-off-by: Travis Angevine * Need to specify parameters? Signed-off-by: Travis Angevine * Move alias renames to the individual file for now Signed-off-by: Travis Angevine * Change ChatCompletionsOptions to a model Signed-off-by: Travis Angevine * syntax Signed-off-by: Travis Angevine * Leave the model property Signed-off-by: Travis Angevine * Add additional properties for ChatCompletionsOptions Signed-off-by: Travis Angevine * 3 dots, not 2 Signed-off-by: Travis Angevine * Can't add docstring to generic spread property Signed-off-by: Travis Angevine * Change unknown parameters to extra parameters Signed-off-by: Travis Angevine * Remove unnecessary rename Signed-off-by: Travis Angevine * Removing api-key change for python, for now Signed-off-by: Travis Angevine * Make operator method internal Signed-off-by: Travis Angevine * Fix pass-through header Signed-off-by: Travis Angevine * Re-enable protocol methods for csharp Signed-off-by: Travis Angevine * Remove "spread" for ChatCompletionsOptions Signed-off-by: Travis Angevine * Remove spread for additionalRequestHeaders Signed-off-by: Travis Angevine * needed commas Signed-off-by: Travis Angevine * another formatting attempt Signed-off-by: Travis Angevine * okay, figured out how the formatting works Signed-off-by: Travis Angevine * Alias breaks the generator Signed-off-by: Travis Angevine * A model won't give us the interaction pattern that we want. Trying this. Signed-off-by: Travis Angevine * Forgot semicolon Signed-off-by: Travis Angevine * Does swapping the order of parameters matter? Signed-off-by: Travis Angevine * Make extraParams optional? Signed-off-by: Travis Angevine * Try and get rid of anonymous model Signed-off-by: Travis Angevine * Try adding RequestHeadersTrait Signed-off-by: Travis Angevine * Maybe needs to be an object? Signed-off-by: Travis Angevine * Add using Signed-off-by: Travis Angevine * Revert to "good" state Signed-off-by: Travis Angevine * Try moving extra parameters. Change Parameters back to unknown Signed-off-by: Travis Angevine * Skip the alias Signed-off-by: Travis Angevine * Is it the header which is causing issues? Signed-off-by: Travis Angevine * Headers in the options object seems to not work Signed-off-by: Travis Angevine * Remove commented operation Signed-off-by: Travis Angevine * Unnecessary using Signed-off-by: Travis Angevine * Add embeddings for csharp Signed-off-by: Travis Angevine * Client changes for C# Signed-off-by: Travis Angevine * Need the options objects to be models Signed-off-by: Travis Angevine * Make the embedding methods public for now, for intial testing Signed-off-by: Travis Angevine * Remove image embedding for now until it can be tested Signed-off-by: Travis Angevine * Add new streaming classes for C# Signed-off-by: Travis Angevine * Revert to Darren's branch to test Signed-off-by: trangevi * Explicitly set the completions options object to be an input Signed-off-by: trangevi * Other clients with spread parameters Signed-off-by: trangevi * No image embedding client yet, so leave this out for now Signed-off-by: trangevi * Operations need to be internal for all languages Signed-off-by: trangevi * Comment Signed-off-by: trangevi * Add support for structured outputs Signed-off-by: trangevi * We already had the property, just need to add one more option Signed-off-by: trangevi * Remove unnecessary inner class Signed-off-by: trangevi * Rename Signed-off-by: trangevi * extra semicolon Signed-off-by: trangevi * Fix naming Signed-off-by: trangevi * What if it's just unknown? Signed-off-by: trangevi * unknown to record unknown Signed-off-by: trangevi * Leave as unknown for now Signed-off-by: trangevi * Add image embeddings client back to csharp Signed-off-by: trangevi * Generation is fixed, switch back to record Signed-off-by: trangevi * Make ImageEmbeddingsOptions explicitly an input Signed-off-by: trangevi * First attempt. Make the main content item a parent of the two relevant ones, will require a custom factory Signed-off-by: trangevi * Do I need this explicitly public? Signed-off-by: trangevi * What about usage? Signed-off-by: trangevi * More inputs Signed-off-by: trangevi * Remove middle layer Signed-off-by: trangevi * Missed some Signed-off-by: trangevi * TSP formatting and swagger changes Signed-off-by: trangevi * Another format change Signed-off-by: trangevi * Update with some changes to better reflect the desired merge Signed-off-by: trangevi * Mark options object as internal to address transitive access issue Signed-off-by: trangevi * review comments Signed-off-by: trangevi * Add renaming for python classes Signed-off-by: trangevi * Update after npx command Signed-off-by: trangevi --------- Signed-off-by: Travis Angevine Signed-off-by: trangevi --------- Signed-off-by: trangevi Signed-off-by: Travis Angevine Co-authored-by: Darren Cohen <39422044+dargilco@users.noreply.github.com> * PR fixes Signed-off-by: trangevi * Add package flavor for tsp compiler Signed-off-by: trangevi * Replace removed cspell file Signed-off-by: trangevi --------- Signed-off-by: trangevi Signed-off-by: Travis Angevine Co-authored-by: Glenn Harper <64209257+glharper@users.noreply.github.com> Co-authored-by: Glenn Harper Co-authored-by: Darren Cohen <39422044+dargilco@users.noreply.github.com> * Missing access modifier Signed-off-by: trangevi --------- Signed-off-by: trangevi Signed-off-by: Travis Angevine Co-authored-by: Darren Cohen <39422044+dargilco@users.noreply.github.com> Co-authored-by: Glenn Harper <64209257+glharper@users.noreply.github.com> Co-authored-by: Glenn Harper --- specification/ai/ModelClient/client.tsp | 119 ++++-- specification/ai/ModelClient/main.tsp | 6 +- .../ModelClient/models/chat_completions.tsp | 85 +++- .../ai/ModelClient/models/embeddings.tsp | 7 +- .../ModelClient/models/image_embeddings.tsp | 7 +- specification/ai/ModelClient/routes.tsp | 25 +- specification/ai/ModelClient/tspconfig.yaml | 10 +- specification/ai/cspell.yaml | 5 +- .../preview/2024-05-01-preview/openapi.json | 393 ++++++++++-------- 9 files changed, 422 insertions(+), 235 deletions(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index ea0e9fc0e65b..a3d0438d525b 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -8,8 +8,31 @@ using TypeSpec.Versioning; @useDependency(AI.Model.Versions.v2024_08_01_Preview) namespace Customizations; // The actual name here doesn't matter and is here for organization purposes only -// Since we made all operator methods internal, we need to expliclty -// say we still want the models they use to be public. +// Are these needed? +@@usage(AI.Model.ChatCompletions, Usage.output); +@@usage(AI.Model.ModelInfo, Usage.output); +@@usage(AI.Model.StreamingChatCompletionsUpdate, Usage.output); +@@usage(AI.Model.StreamingChatChoiceUpdate, Usage.output); +@@usage(AI.Model.StreamingChatResponseMessageUpdate, Usage.output); +@@usage(AI.Model.StreamingChatResponseToolCallUpdate, Usage.output); + +// Necessary for autogenerating options objects while the clients specify spread parameters. Necessary for the languages that want the options class. +@@usage(AI.Model.ChatCompletionsOptions, Usage.input, "csharp"); +@@usage(AI.Model.EmbeddingsOptions, Usage.input, "csharp"); +@@usage(AI.Model.ImageEmbeddingsOptions, Usage.input, "csharp"); + +// Necessary due to how we're forcing this abstraction +@@usage(AI.Model.ChatMessageAudioDataContentItem, Usage.input); +@@usage(AI.Model.ChatMessageAudioUrlContentItem, Usage.input); + +// The operators need to be hidden, since we hand-write the public versions of those +@@access(AI.Model.getChatCompletions, Access.internal); +@@access(AI.Model.getEmbeddings, Access.internal); +@@access(AI.Model.getImageEmbeddings, Access.internal); +@@access(AI.Model.getModelInfo, Access.internal, "python,java"); + +// Since we made all operator methods internal, we need to explicity +// say we still want the models they use to be public, since they will be used by hand-written operator methods. @@access(AI.Model.ChatChoice, Access.public); @@access(AI.Model.ChatCompletions, Access.public); @@access(AI.Model.ChatCompletionsOptions, Access.public, "java"); @@ -17,30 +40,41 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); @@access(AI.Model.ChatCompletionsNamedToolChoice, Access.public); @@access(AI.Model.ChatCompletionsNamedToolChoiceFunction, Access.public); -@@access(AI.Model.ChatCompletionsResponseFormat, Access.public); -@@access(AI.Model.ChatCompletionsResponseFormatJsonObject, Access.public); -@@access(AI.Model.ChatCompletionsResponseFormatJsonSchema, Access.public); -@@access(AI.Model.ChatCompletionsResponseFormatJsonSchemaDefinition, Access.public); -@@access(AI.Model.ChatCompletionsResponseFormatText, Access.public); @@access(AI.Model.ChatCompletionsToolCall, Access.public); @@access(AI.Model.ChatCompletionsToolDefinition, Access.public); @@access(AI.Model.ChatCompletionsToolChoicePreset, Access.public); -@@access(AI.Model.ChatRequestAssistantMessage, Access.public); -@@access(AI.Model.ChatRequestMessage, Access.public); -@@access(AI.Model.ChatRequestSystemMessage, Access.public); -@@access(AI.Model.ChatRequestToolMessage, Access.public); -@@access(AI.Model.ChatRequestUserMessage, Access.public); +@@access(AI.Model.ChatRequestMessage, Access.public, "csharp,java,javascript"); +@@access(AI.Model.ChatRequestAssistantMessage, + Access.public, + "csharp,java,javascript" +); +@@access(AI.Model.ChatRequestSystemMessage, + Access.public, + "csharp,java,javascript" +); +@@access(AI.Model.ChatRequestToolMessage, + Access.public, + "csharp,java,javascript" +); +@@access(AI.Model.ChatRequestUserMessage, + Access.public, + "csharp,java,javascript" +); +@@access(AI.Model.ChatRequestDeveloperMessage, + Access.public, + "csharp,java,javascript" +); @@access(AI.Model.ChatResponseMessage, Access.public); @@access(AI.Model.ChatRole, Access.public); @@access(AI.Model.CompletionsFinishReason, Access.public); @@access(AI.Model.CompletionsUsage, Access.public); @@access(AI.Model.EmbeddingEncodingFormat, Access.public); +@@access(AI.Model.ImageEmbeddingInput, Access.public); @@access(AI.Model.EmbeddingInputType, Access.public); @@access(AI.Model.EmbeddingItem, Access.public); @@access(AI.Model.EmbeddingsResult, Access.public); @@access(AI.Model.EmbeddingsUsage, Access.public); @@access(AI.Model.ExtraParameters, Access.public, "java"); -@@access(AI.Model.ImageEmbeddingInput, Access.public); @@access(AI.Model.FunctionCall, Access.public); @@access(AI.Model.FunctionDefinition, Access.public); @@access(AI.Model.ModelInfo, Access.public); @@ -54,30 +88,45 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.StreamingChatChoiceUpdate, Access.public); @@access(AI.Model.StreamingChatResponseMessageUpdate, Access.public); @@access(AI.Model.StreamingChatResponseToolCallUpdate, Access.public); -@@access(AI.Model.ChatRequestDeveloperMessage, + +// Hide the JSON schema class in favor of a factory method on the ChatCompletionsResponseFormat class +@@access(AI.Model.ChatCompletionsResponseFormatJsonSchema, + Access.internal, + "csharp" +); + +@@access(AI.Model.ChatCompletionsResponseFormatJsonSchemaDefinition, Access.public, - "csharp,java,javascript" + "python" ); -// The operators need to be hidden, since we hand-write the public versions of those to -// 1. Add chat completions streaming (to getChatCompletions operator) -// 2. Add hyper-params (to getChatCompletions/Embeddings/ImageEmbeddings, all clients) -// 3. Cache model info (to getModelInfo, all clients, Python only) -@@access(AI.Model.getChatCompletions, Access.internal); -@@access(AI.Model.getEmbeddings, Access.internal); -@@access(AI.Model.getImageEmbeddings, Access.internal); -@@access(AI.Model.getModelInfo, Access.internal); +// In Python we hand-write the 4 input message classes, so we make them internal here. +// The base class ChatRequestMessage has to have the same access as the derived classes, +// so we make it internal as well. However the Python code will make it public again without changes. +@@access(AI.Model.ChatCompletionsOptions, Access.internal, "python"); +@@access(AI.Model.ChatRequestMessage, Access.internal, "python"); +@@access(AI.Model.ChatRequestAssistantMessage, Access.internal, "python"); +@@access(AI.Model.ChatRequestSystemMessage, Access.internal, "python"); +@@access(AI.Model.ChatRequestToolMessage, Access.internal, "python"); +@@access(AI.Model.ChatRequestUserMessage, Access.internal, "python"); +@@access(AI.Model.ChatRequestDeveloperMessage, Access.internal, "python"); -// We use shorter names in the Python client library -@@clientName(AI.Model.ChatRequestSystemMessage, "SystemMessage", "python"); -@@clientName(AI.Model.ChatRequestUserMessage, "UserMessage", "python"); -@@clientName(AI.Model.ChatRequestAssistantMessage, - "AssistantMessage", - "python" +// Make these internal, customize a third class for users to interact with +@@access(AI.Model.ChatMessageAudioDataContentItem, Access.internal); +@@access(AI.Model.ChatMessageAudioUrlContentItem, Access.internal); + +@@clientName(AI.Model.ChatCompletionsResponseFormatJsonObject, + "ChatCompletionsResponseFormatJSON", + "javascript" ); -@@clientName(AI.Model.ChatRequestToolMessage, "ToolMessage", "python"); + +// We use shorter names in the Python client library @@clientName(AI.Model.ChatMessageContentItem, "ContentItem", "python"); @@clientName(AI.Model.ChatMessageTextContentItem, "TextContentItem", "python"); +@@clientName(AI.Model.ChatCompletionsResponseFormatJsonSchemaDefinition, + "JsonSchemaFormat", + "python" +); @@clientName(AI.Model.ChatMessageImageContentItem, "ImageContentItem", "python" @@ -87,6 +136,16 @@ namespace Customizations; // The actual name here doesn't matter and is here for "ImageDetailLevel", "python" ); +@@clientName(AI.Model.ChatMessageAudioDataContentItem, + "AudioDataContentItem", + "python" +); +@@clientName(AI.Model.ChatMessageAudioUrlContentItem, + "AudioUrlContentItem", + "python" +); +@@clientName(AI.Model.ChatMessageInputAudio, "InputAudio", "python"); +@@clientName(AI.Model.ChatMessageInputAudioUrl, "InputAudioUrl", "python"); @client({ name: "ChatCompletionsClient", diff --git a/specification/ai/ModelClient/main.tsp b/specification/ai/ModelClient/main.tsp index d19a8b7ad0cc..4ce2daa69904 100644 --- a/specification/ai/ModelClient/main.tsp +++ b/specification/ai/ModelClient/main.tsp @@ -10,7 +10,7 @@ using Azure.Core; #suppress "@azure-tools/typespec-autorest/unsupported-http-auth-scheme" @useAuth( - BearerAuth | ApiKeyAuth | OAuth2Auth<[ + BearerAuth | OAuth2Auth<[ { type: OAuth2FlowType.implicit, authorizationUrl: "https://login.microsoftonline.com/common/oauth2/v2.0/authorize", @@ -18,9 +18,7 @@ using Azure.Core; } ]> ) -@service(#{ - title: "AI Model Inference", -}) +@service(#{ title: "AI Model Inference" }) @versioned(AI.Model.Versions) namespace AI.Model; diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 8de12e7d9094..0499afdf792c 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -9,8 +9,11 @@ using Azure.ClientGenerator.Core; namespace AI.Model; -#suppress "@azure-tools/typespec-azure-core/bad-record-type" -@doc("The ChatCompletionsOptions model") +@doc(""" + The configuration information for a chat completions request. + Completions support a wide variety of tasks and generate text that continues from or "completes" + provided prompt data. + """) model ChatCompletionsOptions { @doc(""" The collection of context messages associated with this chat completions request. @@ -121,9 +124,8 @@ model ChatCompletionsOptions { """) `model`?: string; - #suppress "@azure-tools/typespec-azure-core/bad-record-type" ...Record; -}; +} alias ChatCompletionsCommon = { @doc("A unique identifier associated with this chat completions response.") @@ -141,11 +143,6 @@ alias ChatCompletionsCommon = { @doc("The model used for the chat completion.") `model`: string; - - @doc(""" - Usage information for tokens processed and generated as part of this completions operation. - """) - usage: CompletionsUsage; }; @doc(""" @@ -163,6 +160,11 @@ model ChatCompletions { """) @minItems(1) choices: ChatChoice[]; + + @doc(""" + Usage information for tokens processed and generated as part of this completions operation. + """) + usage: CompletionsUsage; } @doc(""" @@ -181,6 +183,11 @@ model StreamingChatCompletionsUpdate { """) @minItems(1) choices: StreamingChatChoiceUpdate[]; + + @doc(""" + Usage information for tokens processed and generated as part of this completions operation. + """) + usage?: CompletionsUsage; } @doc(""" @@ -272,8 +279,8 @@ model ChatCompletionsResponseFormatJsonObject } @doc(""" - Defines the response format for chat completions as JSON with a given schema. The AI model - will need to adhere to this schema when generating completions. + Defines the response format for chat completions as JSON with a given schema. + The AI model will need to adhere to this schema when generating completions. """) model ChatCompletionsResponseFormatJsonSchemaDefinition { @doc(""" @@ -283,8 +290,8 @@ model ChatCompletionsResponseFormatJsonSchemaDefinition { @doc(""" The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema. - Note that AI models usually only support a subset of the keywords defined by JSON schema. Consult your AI model documentation - to determine what is supported. + Note that AI models usually only support a subset of the keywords defined by JSON schema. + Consult your AI model documentation to determine what is supported. """) schema: Record; @@ -296,8 +303,8 @@ model ChatCompletionsResponseFormatJsonSchemaDefinition { @doc(""" If set to true, the service will error out if the provided JSON schema contains keywords not supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`. - If false, and the provided JSON schema contains keywords not supported - by the AI model, the AI model will not error out. Instead it will ignore the unsupported keywords. + If false, and the provided JSON schema contains keywords not supported by the AI model, + the AI model will not error out. Instead it will ignore the unsupported keywords. """) strict?: boolean = false; } @@ -378,7 +385,7 @@ model ChatRequestSystemMessage extends ChatRequestMessage { model ChatRequestDeveloperMessage extends ChatRequestMessage { @doc("The chat role associated with this message, which is always 'developer' for developer messages.") role: ChatRole.developer; - + @doc("The contents of the developer message.") content: string; } @@ -623,3 +630,49 @@ union ChatMessageImageDetailLevel { """) high: "high", } + +@doc("A structured chat content item for audio content passed as a url.") +model ChatMessageAudioUrlContentItem extends ChatMessageContentItem { + @doc("The discriminated object type: always 'audio_url' for this type.") + type: "audio_url"; + + @doc("The details of the audio url.") + @encodedName("application/json", "audio_url") + audioUrl: ChatMessageInputAudioUrl; +} + +@doc("A structured chat content item for audio content passed as base64 encoded data.") +model ChatMessageAudioDataContentItem extends ChatMessageContentItem { + @doc("The discriminated object type: always 'input_audio' for this type.") + type: "input_audio"; + + @doc("The details of the input audio data.") + @encodedName("application/json", "input_audio") + inputAudio: ChatMessageInputAudio; +} + +@doc("The details of the audio url.") +model ChatMessageInputAudioUrl { + @doc("The URL of the audio content.") + url: string; +} + +@doc("The details of the input audio data.") +model ChatMessageInputAudio { + @doc("Base64 encoded audio data") + data: string; + + @doc("The audio format of the audio content.") + format: AudioContentFormat; +} + +@doc("A representation of the possible audio formats for audio.") +union AudioContentFormat { + string, + + @doc("Specifies audio in WAV format.") + wav: "wav", + + @doc("Specifies audio in MP3 format.") + mp3: "mp3", +} diff --git a/specification/ai/ModelClient/models/embeddings.tsp b/specification/ai/ModelClient/models/embeddings.tsp index 8e2aa8e1751f..da2155b52f0a 100644 --- a/specification/ai/ModelClient/models/embeddings.tsp +++ b/specification/ai/ModelClient/models/embeddings.tsp @@ -6,7 +6,10 @@ using TypeSpec.Http; namespace AI.Model; -alias EmbeddingsOptions = { +@doc(""" + The configuration information for an embeddings request. + """) +model EmbeddingsOptions { @doc(""" Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array @@ -38,4 +41,4 @@ alias EmbeddingsOptions = { `model`?: string; ...Record; -}; +} diff --git a/specification/ai/ModelClient/models/image_embeddings.tsp b/specification/ai/ModelClient/models/image_embeddings.tsp index 712eaa8a9985..e03571e12f06 100644 --- a/specification/ai/ModelClient/models/image_embeddings.tsp +++ b/specification/ai/ModelClient/models/image_embeddings.tsp @@ -6,7 +6,10 @@ using TypeSpec.Http; namespace AI.Model; -alias ImageEmbeddingsOptions = { +@doc(""" + The configuration information for an image embeddings request. + """) +model ImageEmbeddingsOptions { @doc(""" Input image to embed. To embed multiple inputs in a single request, pass an array. The input must not exceed the max input tokens for the model. @@ -39,7 +42,7 @@ alias ImageEmbeddingsOptions = { `model`?: string; ...Record; -}; +} @doc("Represents an image with optional text.") model ImageEmbeddingInput { diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index 30eb9dadd893..37be0f7a8814 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -18,16 +18,17 @@ namespace AI.Model; @doc(""" Gets chat completions for the provided chat messages. Completions support a wide variety of tasks and generate text that continues from or "completes" - provided prompt data. The method makes a REST API call to the `/chat/completions` route + provided prompt data. The method makes a REST API call to the `/chat/completions` route on the given endpoint. """) @actionSeparator("/") @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< { - #suppress "@azure-tools/typespec-azure-core/bad-record-type" - @doc("request options to pass to the endpoint using complete path") - @body + /** + * The options for chat completions. + */ + @bodyRoot body: ChatCompletionsOptions; ...AdditionalRequestHeaders; @@ -43,9 +44,10 @@ op getChatCompletions is Azure.Core.RpcOperation< @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< { - #suppress "@azure-tools/typespec-azure-core/bad-record-type" - @doc("request options to pass to the endpoint using embeddings path") - @body + /** + * The body of the request containing the options for generating embeddings. + */ + @bodyRoot body: EmbeddingsOptions; ...AdditionalRequestHeaders; @@ -61,9 +63,10 @@ op getEmbeddings is Azure.Core.RpcOperation< @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< { - #suppress "@azure-tools/typespec-azure-core/bad-record-type" - @doc("request options to pass to the endpoint using images embeddings path") - @body + /** + * The body of the request containing options for image embeddings. + */ + @bodyRoot body: ImageEmbeddingsOptions; ...AdditionalRequestHeaders; @@ -74,6 +77,8 @@ op getImageEmbeddings is Azure.Core.RpcOperation< @doc(""" Returns information about the AI model. The method makes a REST API call to the `/info` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. """) @actionSeparator("/") @route("info") diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index 4d2806067305..5c3d68855e78 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -12,18 +12,24 @@ linter: disable: "@azure-tools/typespec-azure-core/casing-style": "Disabled since JSON payload in REST API does not conform to Azure guidelines with regards to casing" "@azure-tools/typespec-azure-core/no-string-discriminator": "Use an extensible union instead of a plain string" - "@azure-tools/typespec-azure-core/bad-record-type": "REST API does not conform to Azure guidelines with regards to record types" + "@azure-tools/typespec-azure-core/bad-record-type": "We do want to use Record, and not Record. But this needs further investigation" options: "@azure-tools/typespec-autorest": azure-resource-provider-folder: "data-plane" emitter-output-dir: "{project-root}/.." - examples-dir: "{project-root}/examples" output-file: "{azure-resource-provider-folder}/{service-name}/{version-status}/{version}/openapi.json" "@azure-tools/typespec-python": package-mode: dataplane package-dir: "azure-ai-inference" package-name: "{package-dir}" flavor: azure + "@azure-tools/typespec-ts": + package-dir: "ai-inference-rest" + isModularLibrary: false + packageDetails: + name: "@azure-rest/ai-inference" + description: "Inference API for Azure-supported AI models" + flavor: azure "@azure-tools/typespec-csharp": package-dir: "Azure.AI.Inference" namespace: "Azure.AI.Inference" diff --git a/specification/ai/cspell.yaml b/specification/ai/cspell.yaml index 2f9086f3bcc3..b299f873a501 100644 --- a/specification/ai/cspell.yaml +++ b/specification/ai/cspell.yaml @@ -74,6 +74,9 @@ overrides: - mednax - tonnis - tyrer - - filename: '**/specification/ai/data-plane/ModelInference/**/*.*' + - filename: '**/specification/ai/data-plane/ModelClient/**/openapi.json' + words: + - ubinary + - filename: '**/specification/ai/data-plane/ModelInference/**/openapi.json' words: - ubinary diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index 88076c03ce5e..4ccba5a50419 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -19,9 +19,6 @@ "application/json" ], "security": [ - { - "ApiKeyAuth": [] - }, { "OAuth2Auth": [ "https://ml.azure.com/.default" @@ -29,11 +26,6 @@ } ], "securityDefinitions": { - "ApiKeyAuth": { - "type": "apiKey", - "name": "api-key", - "in": "header" - }, "OAuth2Auth": { "type": "oauth2", "flow": "implicit", @@ -48,7 +40,7 @@ "/chat/completions": { "post": { "operationId": "GetChatCompletions", - "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route \non the given endpoint.", + "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.", "parameters": [ { "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" @@ -90,100 +82,10 @@ { "name": "body", "in": "body", + "description": "The options for chat completions.", "required": true, "schema": { - "type": "object", - "properties": { - "messages": { - "type": "array", - "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatRequestMessage" - }, - "x-ms-identifiers": [] - }, - "frequency_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "stream": { - "type": "boolean", - "description": "A value indicating whether chat completions should be streamed for this request." - }, - "presence_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "temperature": { - "type": "number", - "format": "float", - "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 0.7, - "minimum": 0, - "maximum": 1 - }, - "top_p": { - "type": "number", - "format": "float", - "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 1, - "minimum": 0, - "maximum": 1 - }, - "max_tokens": { - "type": "integer", - "format": "int32", - "description": "The maximum number of tokens to generate.", - "minimum": 0 - }, - "response_format": { - "$ref": "#/definitions/ChatCompletionsResponseFormat", - "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." - }, - "stop": { - "type": "array", - "description": "A collection of textual sequences that will end completions generation.", - "minItems": 1, - "items": { - "type": "string" - } - }, - "tools": { - "type": "array", - "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatCompletionsToolDefinition" - }, - "x-ms-identifiers": [] - }, - "tool_choice": { - "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", - "x-ms-client-name": "toolChoice" - }, - "seed": { - "type": "integer", - "format": "int64", - "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "messages" - ], - "additionalProperties": {} + "$ref": "#/definitions/ChatCompletionsOptions" } } ], @@ -262,39 +164,10 @@ { "name": "body", "in": "body", + "description": "The body of the request containing the options for generating embeddings.", "required": true, "schema": { - "type": "object", - "properties": { - "input": { - "type": "array", - "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.", - "items": { - "type": "string" - } - }, - "dimensions": { - "type": "integer", - "format": "int32", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "encoding_format": { - "$ref": "#/definitions/EmbeddingEncodingFormat", - "description": "Optional. The desired format for the returned embeddings." - }, - "input_type": { - "$ref": "#/definitions/EmbeddingInputType", - "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "input" - ], - "additionalProperties": {} + "$ref": "#/definitions/EmbeddingsOptions" } } ], @@ -373,40 +246,10 @@ { "name": "body", "in": "body", + "description": "The body of the request containing options for image embeddings.", "required": true, "schema": { - "type": "object", - "properties": { - "input": { - "type": "array", - "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", - "items": { - "$ref": "#/definitions/ImageEmbeddingInput" - }, - "x-ms-identifiers": [] - }, - "dimensions": { - "type": "integer", - "format": "int32", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "encoding_format": { - "$ref": "#/definitions/EmbeddingEncodingFormat", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "input_type": { - "$ref": "#/definitions/EmbeddingInputType", - "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "input" - ], - "additionalProperties": {} + "$ref": "#/definitions/ImageEmbeddingsOptions" } } ], @@ -667,6 +510,101 @@ "name" ] }, + "ChatCompletionsOptions": { + "type": "object", + "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", + "properties": { + "messages": { + "type": "array", + "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatRequestMessage" + }, + "x-ms-identifiers": [] + }, + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "stream": { + "type": "boolean", + "description": "A value indicating whether chat completions should be streamed for this request." + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "temperature": { + "type": "number", + "format": "float", + "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 0.7, + "minimum": 0, + "maximum": 1 + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 1, + "minimum": 0, + "maximum": 1 + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens to generate.", + "minimum": 0 + }, + "response_format": { + "$ref": "#/definitions/ChatCompletionsResponseFormat", + "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." + }, + "stop": { + "type": "array", + "description": "A collection of textual sequences that will end completions generation.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "tools": { + "type": "array", + "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatCompletionsToolDefinition" + }, + "x-ms-identifiers": [] + }, + "tool_choice": { + "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", + "x-ms-client-name": "toolChoice" + }, + "seed": { + "type": "integer", + "format": "int64", + "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "messages" + ], + "additionalProperties": {} + }, "ChatCompletionsResponseFormat": { "type": "object", "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", @@ -831,13 +769,13 @@ "function" ] }, - "ChatMessageAudioContentItem": { + "ChatMessageAudioDataContentItem": { "type": "object", - "description": "A structured chat content item containing an audio content.", + "description": "A structured chat content item for audio content passed as base64 encoded data.", "properties": { "input_audio": { "$ref": "#/definitions/ChatMessageInputAudio", - "description": "The details of the input audio.", + "description": "The details of the input audio data.", "x-ms-client-name": "inputAudio" } }, @@ -851,6 +789,26 @@ ], "x-ms-discriminator-value": "input_audio" }, + "ChatMessageAudioUrlContentItem": { + "type": "object", + "description": "A structured chat content item for audio content passed as a url.", + "properties": { + "audio_url": { + "$ref": "#/definitions/ChatMessageInputAudioUrl", + "description": "The details of the audio url.", + "x-ms-client-name": "audioUrl" + } + }, + "required": [ + "audio_url" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatMessageContentItem" + } + ], + "x-ms-discriminator-value": "audio_url" + }, "ChatMessageContentItem": { "type": "object", "description": "An abstract representation of a structured content item within a chat message.", @@ -934,7 +892,7 @@ }, "ChatMessageInputAudio": { "type": "object", - "description": "The details of an audio chat message content part.", + "description": "The details of the input audio data.", "properties": { "data": { "type": "string", @@ -950,6 +908,19 @@ "format" ] }, + "ChatMessageInputAudioUrl": { + "type": "object", + "description": "The details of the audio url.", + "properties": { + "url": { + "type": "string", + "description": "The URL of the audio content." + } + }, + "required": [ + "url" + ] + }, "ChatMessageTextContentItem": { "type": "object", "description": "A structured chat content item containing plain text.", @@ -1314,6 +1285,40 @@ "index" ] }, + "EmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.", + "items": { + "type": "string" + } + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The desired format for the returned embeddings." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, "EmbeddingsResult": { "type": "object", "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.", @@ -1451,6 +1456,58 @@ "image" ] }, + "ImageEmbeddingInput": { + "type": "object", + "description": "Represents an image with optional text.", + "properties": { + "image": { + "type": "string", + "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." + }, + "text": { + "type": "string", + "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." + } + }, + "required": [ + "image" + ] + }, + "ImageEmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an image embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", + "items": { + "$ref": "#/definitions/ImageEmbeddingInput" + }, + "x-ms-identifiers": [] + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, "ModelInfo": { "type": "object", "description": "Represents some basic information about the AI model.", From 1143ba647bd770d48cd1e40ef47e184608b8dee2 Mon Sep 17 00:00:00 2001 From: trangevi Date: Mon, 7 Apr 2025 10:58:22 -0700 Subject: [PATCH 36/38] Update swagger spec Signed-off-by: trangevi --- .../preview/2024-05-01-preview/openapi.json | 17 - .../preview/2024-08-01-preview/openapi.json | 1651 +++++++++++++++++ 2 files changed, 1651 insertions(+), 17 deletions(-) create mode 100644 specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index af19a4578da2..8bbde16c721d 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -1458,23 +1458,6 @@ "image" ] }, - "ImageEmbeddingInput": { - "type": "object", - "description": "Represents an image with optional text.", - "properties": { - "image": { - "type": "string", - "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." - }, - "text": { - "type": "string", - "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." - } - }, - "required": [ - "image" - ] - }, "ImageEmbeddingsOptions": { "type": "object", "description": "The configuration information for an image embeddings request.", diff --git a/specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json new file mode 100644 index 000000000000..0819dd9a03cf --- /dev/null +++ b/specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json @@ -0,0 +1,1651 @@ +{ + "swagger": "2.0", + "info": { + "title": "AI Model Inference", + "version": "2024-08-01-preview", + "x-typespec-generated": [ + { + "emitter": "@azure-tools/typespec-autorest" + } + ] + }, + "schemes": [ + "https" + ], + "produces": [ + "application/json" + ], + "consumes": [ + "application/json" + ], + "security": [ + { + "OAuth2Auth": [ + "https://ml.azure.com/.default" + ] + } + ], + "securityDefinitions": { + "OAuth2Auth": { + "type": "oauth2", + "flow": "implicit", + "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize", + "scopes": { + "https://ml.azure.com/.default": "" + } + } + }, + "tags": [], + "paths": { + "/chat/completions": { + "post": { + "operationId": "GetChatCompletions", + "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.", + "parameters": [ + { + "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" + }, + { + "name": "extra-parameters", + "in": "header", + "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", + "required": false, + "type": "string", + "enum": [ + "error", + "drop", + "pass-through" + ], + "x-ms-enum": { + "name": "ExtraParameters", + "modelAsString": true, + "values": [ + { + "name": "error", + "value": "error", + "description": "The service will error if it detected extra parameters in the request payload. This is the service default." + }, + { + "name": "drop", + "value": "drop", + "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." + }, + { + "name": "pass_through", + "value": "pass-through", + "description": "The service will pass extra parameters to the back-end AI model." + } + ] + }, + "x-ms-client-name": "extra_params" + }, + { + "name": "body", + "in": "body", + "description": "The options for chat completions.", + "required": true, + "schema": { + "$ref": "#/definitions/ChatCompletionsOptions" + } + } + ], + "responses": { + "200": { + "description": "The request has succeeded.", + "schema": { + "$ref": "#/definitions/ChatCompletions" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" + }, + "headers": { + "x-ms-error-code": { + "type": "string", + "description": "String error code indicating what went wrong." + } + } + } + } + } + }, + "/embeddings": { + "post": { + "operationId": "GetEmbeddings", + "description": "Return the embedding vectors for given text prompts.\nThe method makes a REST API call to the `/embeddings` route on the given endpoint.", + "parameters": [ + { + "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" + }, + { + "name": "extra-parameters", + "in": "header", + "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", + "required": false, + "type": "string", + "enum": [ + "error", + "drop", + "pass-through" + ], + "x-ms-enum": { + "name": "ExtraParameters", + "modelAsString": true, + "values": [ + { + "name": "error", + "value": "error", + "description": "The service will error if it detected extra parameters in the request payload. This is the service default." + }, + { + "name": "drop", + "value": "drop", + "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." + }, + { + "name": "pass_through", + "value": "pass-through", + "description": "The service will pass extra parameters to the back-end AI model." + } + ] + }, + "x-ms-client-name": "extra_params" + }, + { + "name": "body", + "in": "body", + "description": "The body of the request containing the options for generating embeddings.", + "required": true, + "schema": { + "$ref": "#/definitions/EmbeddingsOptions" + } + } + ], + "responses": { + "200": { + "description": "The request has succeeded.", + "schema": { + "$ref": "#/definitions/EmbeddingsResult" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" + }, + "headers": { + "x-ms-error-code": { + "type": "string", + "description": "String error code indicating what went wrong." + } + } + } + } + } + }, + "/images/embeddings": { + "post": { + "operationId": "GetImageEmbeddings", + "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.", + "parameters": [ + { + "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" + }, + { + "name": "extra-parameters", + "in": "header", + "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", + "required": false, + "type": "string", + "enum": [ + "error", + "drop", + "pass-through" + ], + "x-ms-enum": { + "name": "ExtraParameters", + "modelAsString": true, + "values": [ + { + "name": "error", + "value": "error", + "description": "The service will error if it detected extra parameters in the request payload. This is the service default." + }, + { + "name": "drop", + "value": "drop", + "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." + }, + { + "name": "pass_through", + "value": "pass-through", + "description": "The service will pass extra parameters to the back-end AI model." + } + ] + }, + "x-ms-client-name": "extra_params" + }, + { + "name": "body", + "in": "body", + "description": "The body of the request containing options for image embeddings.", + "required": true, + "schema": { + "$ref": "#/definitions/ImageEmbeddingsOptions" + } + } + ], + "responses": { + "200": { + "description": "The request has succeeded.", + "schema": { + "$ref": "#/definitions/EmbeddingsResult" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" + }, + "headers": { + "x-ms-error-code": { + "type": "string", + "description": "String error code indicating what went wrong." + } + } + } + } + } + }, + "/info": { + "get": { + "operationId": "GetModelInfo", + "description": "Returns information about the AI model.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API or Managed Compute endpoint.\nIt will not work for GitHub Models endpoint or Azure OpenAI endpoint.", + "parameters": [ + { + "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" + } + ], + "responses": { + "200": { + "description": "The request has succeeded.", + "schema": { + "$ref": "#/definitions/ModelInfo" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" + }, + "headers": { + "x-ms-error-code": { + "type": "string", + "description": "String error code indicating what went wrong." + } + } + } + } + } + } + }, + "definitions": { + "AudioContentFormat": { + "type": "string", + "description": "A representation of the possible audio formats for audio.", + "enum": [ + "wav", + "mp3" + ], + "x-ms-enum": { + "name": "AudioContentFormat", + "modelAsString": true, + "values": [ + { + "name": "wav", + "value": "wav", + "description": "Specifies audio in WAV format." + }, + { + "name": "mp3", + "value": "mp3", + "description": "Specifies audio in MP3 format." + } + ] + } + }, + "Azure.Core.Foundations.Error": { + "type": "object", + "description": "The error object.", + "properties": { + "code": { + "type": "string", + "description": "One of a server-defined set of error codes." + }, + "message": { + "type": "string", + "description": "A human-readable representation of the error." + }, + "target": { + "type": "string", + "description": "The target of the error." + }, + "details": { + "type": "array", + "description": "An array of details about specific errors that led to this reported error.", + "items": { + "$ref": "#/definitions/Azure.Core.Foundations.Error" + }, + "x-ms-identifiers": [] + }, + "innererror": { + "$ref": "#/definitions/Azure.Core.Foundations.InnerError", + "description": "An object containing more specific information than the current object about the error." + } + }, + "required": [ + "code", + "message" + ] + }, + "Azure.Core.Foundations.ErrorResponse": { + "type": "object", + "description": "A response containing error details.", + "properties": { + "error": { + "$ref": "#/definitions/Azure.Core.Foundations.Error", + "description": "The error object." + } + }, + "required": [ + "error" + ] + }, + "Azure.Core.Foundations.InnerError": { + "type": "object", + "description": "An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/Microsoft/api-guidelines/blob/vNext/Guidelines.md#7102-error-condition-responses.", + "properties": { + "code": { + "type": "string", + "description": "One of a server-defined set of error codes." + }, + "innererror": { + "$ref": "#/definitions/Azure.Core.Foundations.InnerError", + "description": "Inner error." + } + } + }, + "ChatChoice": { + "type": "object", + "description": "The representation of a single prompt completion as part of an overall chat completions request.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", + "properties": { + "index": { + "type": "integer", + "format": "int32", + "description": "The ordered index associated with this chat completions choice." + }, + "finish_reason": { + "$ref": "#/definitions/CompletionsFinishReason", + "description": "The reason that this chat completions choice completed its generated.", + "x-nullable": true + }, + "message": { + "$ref": "#/definitions/ChatResponseMessage", + "description": "The chat message for a given chat completions prompt." + } + }, + "required": [ + "index", + "finish_reason", + "message" + ] + }, + "ChatCompletions": { + "type": "object", + "description": "Representation of the response data from a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", + "properties": { + "id": { + "type": "string", + "description": "A unique identifier associated with this chat completions response." + }, + "created": { + "type": "integer", + "format": "unixtime", + "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970." + }, + "model": { + "type": "string", + "description": "The model used for the chat completion." + }, + "choices": { + "type": "array", + "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatChoice" + }, + "x-ms-identifiers": [] + }, + "usage": { + "$ref": "#/definitions/CompletionsUsage", + "description": "Usage information for tokens processed and generated as part of this completions operation." + } + }, + "required": [ + "id", + "created", + "model", + "choices", + "usage" + ] + }, + "ChatCompletionsNamedToolChoice": { + "type": "object", + "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the tool. Currently, only `function` is supported.", + "enum": [ + "function" + ], + "x-ms-enum": { + "modelAsString": false + } + }, + "function": { + "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction", + "description": "The function that should be called." + } + }, + "required": [ + "type", + "function" + ] + }, + "ChatCompletionsNamedToolChoiceFunction": { + "type": "object", + "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function that should be called." + } + }, + "required": [ + "name" + ] + }, + "ChatCompletionsOptions": { + "type": "object", + "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", + "properties": { + "messages": { + "type": "array", + "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatRequestMessage" + }, + "x-ms-identifiers": [] + }, + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "stream": { + "type": "boolean", + "description": "A value indicating whether chat completions should be streamed for this request." + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "temperature": { + "type": "number", + "format": "float", + "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 0.7, + "minimum": 0, + "maximum": 1 + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 1, + "minimum": 0, + "maximum": 1 + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens to generate.", + "minimum": 0 + }, + "response_format": { + "$ref": "#/definitions/ChatCompletionsResponseFormat", + "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." + }, + "stop": { + "type": "array", + "description": "A collection of textual sequences that will end completions generation.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "tools": { + "type": "array", + "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatCompletionsToolDefinition" + }, + "x-ms-identifiers": [] + }, + "tool_choice": { + "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", + "x-ms-client-name": "toolChoice" + }, + "seed": { + "type": "integer", + "format": "int64", + "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "messages" + ], + "additionalProperties": {} + }, + "ChatCompletionsResponseFormat": { + "type": "object", + "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", + "properties": { + "type": { + "type": "string", + "description": "The response format type to use for chat completions." + } + }, + "discriminator": "type", + "required": [ + "type" + ] + }, + "ChatCompletionsResponseFormatJsonObject": { + "type": "object", + "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", + "allOf": [ + { + "$ref": "#/definitions/ChatCompletionsResponseFormat" + } + ], + "x-ms-discriminator-value": "json_object" + }, + "ChatCompletionsResponseFormatJsonSchema": { + "type": "object", + "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.", + "properties": { + "json_schema": { + "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition", + "description": "The definition of the required JSON schema in the response, and associated metadata." + } + }, + "required": [ + "json_schema" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatCompletionsResponseFormat" + } + ], + "x-ms-discriminator-value": "json_schema" + }, + "ChatCompletionsResponseFormatJsonSchemaDefinition": { + "type": "object", + "description": "Defines the response format for chat completions as JSON with a given schema.\nThe AI model will need to adhere to this schema when generating completions.", + "properties": { + "name": { + "type": "string", + "description": "A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64." + }, + "schema": { + "type": "object", + "description": "The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.\nNote that AI models usually only support a subset of the keywords defined by JSON schema.\nConsult your AI model documentation to determine what is supported.", + "additionalProperties": {} + }, + "description": { + "type": "string", + "description": "A description of the response format, used by the AI model to determine how to generate responses in this format." + }, + "strict": { + "type": "boolean", + "description": "If set to true, the service will error out if the provided JSON schema contains keywords\nnot supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.\nIf false, and the provided JSON schema contains keywords not supported by the AI model,\nthe AI model will not error out. Instead it will ignore the unsupported keywords.", + "default": false + } + }, + "required": [ + "name", + "schema" + ] + }, + "ChatCompletionsResponseFormatText": { + "type": "object", + "description": "A response format for Chat Completions that emits text responses. This is the default response format.", + "allOf": [ + { + "$ref": "#/definitions/ChatCompletionsResponseFormat" + } + ], + "x-ms-discriminator-value": "text" + }, + "ChatCompletionsToolCall": { + "type": "object", + "description": "A function tool call requested by the AI model.", + "properties": { + "id": { + "type": "string", + "description": "The ID of the tool call." + }, + "type": { + "type": "string", + "description": "The type of tool call. Currently, only `function` is supported.", + "enum": [ + "function" + ], + "x-ms-enum": { + "modelAsString": false + } + }, + "function": { + "$ref": "#/definitions/FunctionCall", + "description": "The details of the function call requested by the AI model." + } + }, + "required": [ + "id", + "type", + "function" + ] + }, + "ChatCompletionsToolChoicePreset": { + "type": "string", + "description": "Represents a generic policy for how a chat completions tool may be selected.", + "enum": [ + "auto", + "none", + "required" + ], + "x-ms-enum": { + "name": "ChatCompletionsToolChoicePreset", + "modelAsString": true, + "values": [ + { + "name": "auto", + "value": "auto", + "description": "Specifies that the model may either use any of the tools provided in this chat completions request or\ninstead return a standard chat completions response as if no tools were provided." + }, + { + "name": "none", + "value": "none", + "description": "Specifies that the model should not respond with a tool call and should instead provide a standard chat\ncompletions response. Response content may still be influenced by the provided tool definitions." + }, + { + "name": "required", + "value": "required", + "description": "Specifies that the model should respond with a call to one or more tools." + } + ] + } + }, + "ChatCompletionsToolDefinition": { + "type": "object", + "description": "The definition of a chat completions tool that can call a function.", + "properties": { + "type": { + "type": "string", + "description": "The type of the tool. Currently, only `function` is supported.", + "enum": [ + "function" + ], + "x-ms-enum": { + "modelAsString": false + } + }, + "function": { + "$ref": "#/definitions/FunctionDefinition", + "description": "The function definition details for the function tool." + } + }, + "required": [ + "type", + "function" + ] + }, + "ChatMessageAudioDataContentItem": { + "type": "object", + "description": "A structured chat content item for audio content passed as base64 encoded data.", + "properties": { + "input_audio": { + "$ref": "#/definitions/ChatMessageInputAudio", + "description": "The details of the input audio data.", + "x-ms-client-name": "inputAudio" + } + }, + "required": [ + "input_audio" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatMessageContentItem" + } + ], + "x-ms-discriminator-value": "input_audio" + }, + "ChatMessageAudioUrlContentItem": { + "type": "object", + "description": "A structured chat content item for audio content passed as a url.", + "properties": { + "audio_url": { + "$ref": "#/definitions/ChatMessageInputAudioUrl", + "description": "The details of the audio url.", + "x-ms-client-name": "audioUrl" + } + }, + "required": [ + "audio_url" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatMessageContentItem" + } + ], + "x-ms-discriminator-value": "audio_url" + }, + "ChatMessageContentItem": { + "type": "object", + "description": "An abstract representation of a structured content item within a chat message.", + "properties": { + "type": { + "type": "string", + "description": "The discriminated object type." + } + }, + "discriminator": "type", + "required": [ + "type" + ] + }, + "ChatMessageImageContentItem": { + "type": "object", + "description": "A structured chat content item containing an image reference.", + "properties": { + "image_url": { + "$ref": "#/definitions/ChatMessageImageUrl", + "description": "An internet location, which must be accessible to the model,from which the image may be retrieved.", + "x-ms-client-name": "imageUrl" + } + }, + "required": [ + "image_url" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatMessageContentItem" + } + ], + "x-ms-discriminator-value": "image_url" + }, + "ChatMessageImageDetailLevel": { + "type": "string", + "description": "A representation of the possible image detail levels for image-based chat completions message content.", + "enum": [ + "auto", + "low", + "high" + ], + "x-ms-enum": { + "name": "ChatMessageImageDetailLevel", + "modelAsString": true, + "values": [ + { + "name": "auto", + "value": "auto", + "description": "Specifies that the model should determine which detail level to apply using heuristics like image size." + }, + { + "name": "low", + "value": "low", + "description": "Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer\ntokens but may also be less accurate for highly detailed images." + }, + { + "name": "high", + "value": "high", + "description": "Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed\nimages but may also be slower and consume more tokens." + } + ] + } + }, + "ChatMessageImageUrl": { + "type": "object", + "description": "An internet location from which the model may retrieve an image.", + "properties": { + "url": { + "type": "string", + "description": "The URL of the image." + }, + "detail": { + "$ref": "#/definitions/ChatMessageImageDetailLevel", + "description": "The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and\naccuracy." + } + }, + "required": [ + "url" + ] + }, + "ChatMessageInputAudio": { + "type": "object", + "description": "The details of the input audio data.", + "properties": { + "data": { + "type": "string", + "description": "Base64 encoded audio data" + }, + "format": { + "$ref": "#/definitions/AudioContentFormat", + "description": "The audio format of the audio content." + } + }, + "required": [ + "data", + "format" + ] + }, + "ChatMessageInputAudioUrl": { + "type": "object", + "description": "The details of the audio url.", + "properties": { + "url": { + "type": "string", + "description": "The URL of the audio content." + } + }, + "required": [ + "url" + ] + }, + "ChatMessageTextContentItem": { + "type": "object", + "description": "A structured chat content item containing plain text.", + "properties": { + "text": { + "type": "string", + "description": "The content of the message." + } + }, + "required": [ + "text" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatMessageContentItem" + } + ], + "x-ms-discriminator-value": "text" + }, + "ChatRequestAssistantMessage": { + "type": "object", + "description": "A request chat message representing response or action from the assistant.", + "properties": { + "content": { + "type": "string", + "description": "The content of the message." + }, + "tool_calls": { + "type": "array", + "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.", + "items": { + "$ref": "#/definitions/ChatCompletionsToolCall" + }, + "x-ms-client-name": "toolCalls" + } + }, + "allOf": [ + { + "$ref": "#/definitions/ChatRequestMessage" + } + ], + "x-ms-discriminator-value": "assistant" + }, + "ChatRequestDeveloperMessage": { + "type": "object", + "description": "A request chat message containing developer instructions that influence how the model will generate a chat completions\nresponse. Some AI models support a developer message instead of a system message.", + "properties": { + "content": { + "type": "string", + "description": "The contents of the developer message." + } + }, + "required": [ + "content" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatRequestMessage" + } + ], + "x-ms-discriminator-value": "developer" + }, + "ChatRequestMessage": { + "type": "object", + "description": "An abstract representation of a chat message as provided in a request.", + "properties": { + "role": { + "$ref": "#/definitions/ChatRole", + "description": "The chat role associated with this message." + } + }, + "discriminator": "role", + "required": [ + "role" + ] + }, + "ChatRequestSystemMessage": { + "type": "object", + "description": "A request chat message containing system instructions that influence how the model will generate a chat completions\nresponse.", + "properties": { + "content": { + "type": "string", + "description": "The contents of the system message." + } + }, + "required": [ + "content" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatRequestMessage" + } + ], + "x-ms-discriminator-value": "system" + }, + "ChatRequestToolMessage": { + "type": "object", + "description": "A request chat message representing requested output from a configured tool.", + "properties": { + "content": { + "type": "string", + "description": "The content of the message." + }, + "tool_call_id": { + "type": "string", + "description": "The ID of the tool call resolved by the provided content.", + "x-ms-client-name": "toolCallId" + } + }, + "required": [ + "tool_call_id" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatRequestMessage" + } + ], + "x-ms-discriminator-value": "tool" + }, + "ChatRequestUserMessage": { + "type": "object", + "description": "A request chat message representing user input to the assistant.", + "properties": { + "content": { + "description": "The contents of the user message, with available input types varying by selected model." + } + }, + "required": [ + "content" + ], + "allOf": [ + { + "$ref": "#/definitions/ChatRequestMessage" + } + ], + "x-ms-discriminator-value": "user" + }, + "ChatResponseMessage": { + "type": "object", + "description": "A representation of a chat message as received in a response.", + "properties": { + "role": { + "$ref": "#/definitions/ChatRole", + "description": "The chat role associated with the message." + }, + "content": { + "type": "string", + "description": "The content of the message.", + "x-nullable": true + }, + "tool_calls": { + "type": "array", + "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.", + "items": { + "$ref": "#/definitions/ChatCompletionsToolCall" + }, + "x-ms-client-name": "toolCalls" + } + }, + "required": [ + "role", + "content" + ] + }, + "ChatRole": { + "type": "string", + "description": "A description of the intended purpose of a message within a chat completions interaction.", + "enum": [ + "system", + "user", + "assistant", + "tool", + "developer" + ], + "x-ms-enum": { + "name": "ChatRole", + "modelAsString": true, + "values": [ + { + "name": "system", + "value": "system", + "description": "The role that instructs or sets the behavior of the assistant." + }, + { + "name": "user", + "value": "user", + "description": "The role that provides input for chat completions." + }, + { + "name": "assistant", + "value": "assistant", + "description": "The role that provides responses to system-instructed, user-prompted input." + }, + { + "name": "tool", + "value": "tool", + "description": "The role that represents extension tool activity within a chat completions operation." + }, + { + "name": "developer", + "value": "developer", + "description": "The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role." + } + ] + } + }, + "CompletionsFinishReason": { + "type": "string", + "description": "Representation of the manner in which a completions response concluded.", + "enum": [ + "stop", + "length", + "content_filter", + "tool_calls" + ], + "x-ms-enum": { + "name": "CompletionsFinishReason", + "modelAsString": true, + "values": [ + { + "name": "stopped", + "value": "stop", + "description": "Completions ended normally and reached its end of token generation." + }, + { + "name": "tokenLimitReached", + "value": "length", + "description": "Completions exhausted available token limits before generation could complete." + }, + { + "name": "contentFiltered", + "value": "content_filter", + "description": "Completions generated a response that was identified as potentially sensitive per content\nmoderation policies." + }, + { + "name": "toolCalls", + "value": "tool_calls", + "description": "Completion ended with the model calling a provided tool for output." + } + ] + } + }, + "CompletionsUsage": { + "type": "object", + "description": "Representation of the token counts processed for a completions request.\nCounts consider all tokens across prompts, choices, choice alternates, best_of generations, and\nother consumers.", + "properties": { + "completion_tokens": { + "type": "integer", + "format": "int32", + "description": "The number of tokens generated across all completions emissions." + }, + "prompt_tokens": { + "type": "integer", + "format": "int32", + "description": "The number of tokens in the provided prompts for the completions request." + }, + "total_tokens": { + "type": "integer", + "format": "int32", + "description": "The total number of tokens processed for the completions request and response." + } + }, + "required": [ + "completion_tokens", + "prompt_tokens", + "total_tokens" + ] + }, + "EmbeddingEncodingFormat": { + "type": "string", + "description": "The format of the embeddings result.\nReturns a 422 error if the model doesn't support the value or parameter.", + "enum": [ + "base64", + "binary", + "float", + "int8", + "ubinary", + "uint8" + ], + "x-ms-enum": { + "name": "EmbeddingEncodingFormat", + "modelAsString": true, + "values": [ + { + "name": "base64", + "value": "base64", + "description": "Base64" + }, + { + "name": "binary", + "value": "binary", + "description": "Binary" + }, + { + "name": "float", + "value": "float", + "description": "Floating point" + }, + { + "name": "int8", + "value": "int8", + "description": "Signed 8-bit integer" + }, + { + "name": "ubinary", + "value": "ubinary", + "description": "ubinary" + }, + { + "name": "uint8", + "value": "uint8", + "description": "Unsigned 8-bit integer" + } + ] + } + }, + "EmbeddingInputType": { + "type": "string", + "description": "Represents the input types used for embedding search.", + "enum": [ + "text", + "query", + "document" + ], + "x-ms-enum": { + "name": "EmbeddingInputType", + "modelAsString": true, + "values": [ + { + "name": "text", + "value": "text", + "description": "Indicates the input is a general text input." + }, + { + "name": "query", + "value": "query", + "description": "Indicates the input represents a search query to find the most relevant documents in your vector database." + }, + { + "name": "document", + "value": "document", + "description": "Indicates the input represents a document that is stored in a vector database." + } + ] + } + }, + "EmbeddingItem": { + "type": "object", + "description": "Representation of a single embeddings relatedness comparison.", + "properties": { + "embedding": { + "description": "List of embedding values for the input prompt. These represent a measurement of the\nvector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector." + }, + "index": { + "type": "integer", + "format": "int32", + "description": "Index of the prompt to which the EmbeddingItem corresponds." + } + }, + "required": [ + "embedding", + "index" + ] + }, + "EmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.", + "items": { + "type": "string" + } + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The desired format for the returned embeddings." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, + "EmbeddingsResult": { + "type": "object", + "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the embeddings result." + }, + "data": { + "type": "array", + "description": "Embedding values for the prompts submitted in the request.", + "items": { + "$ref": "#/definitions/EmbeddingItem" + }, + "x-ms-identifiers": [] + }, + "usage": { + "$ref": "#/definitions/EmbeddingsUsage", + "description": "Usage counts for tokens input using the embeddings API." + }, + "model": { + "type": "string", + "description": "The model ID used to generate this result." + } + }, + "required": [ + "id", + "data", + "usage", + "model" + ] + }, + "EmbeddingsUsage": { + "type": "object", + "description": "Measurement of the amount of tokens used in this request and response.", + "properties": { + "prompt_tokens": { + "type": "integer", + "format": "int32", + "description": "Number of tokens in the request." + }, + "total_tokens": { + "type": "integer", + "format": "int32", + "description": "Total number of tokens transacted in this request/response. Should equal the\nnumber of tokens in the request." + } + }, + "required": [ + "prompt_tokens", + "total_tokens" + ] + }, + "ExtraParameters": { + "type": "string", + "description": "Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.", + "enum": [ + "error", + "drop", + "pass-through" + ], + "x-ms-enum": { + "name": "ExtraParameters", + "modelAsString": true, + "values": [ + { + "name": "error", + "value": "error", + "description": "The service will error if it detected extra parameters in the request payload. This is the service default." + }, + { + "name": "drop", + "value": "drop", + "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." + }, + { + "name": "pass_through", + "value": "pass-through", + "description": "The service will pass extra parameters to the back-end AI model." + } + ] + } + }, + "FunctionCall": { + "type": "object", + "description": "The name and arguments of a function that should be called, as generated by the model.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to call." + }, + "arguments": { + "type": "string", + "description": "The arguments to call the function with, as generated by the model in JSON format.\nNote that the model does not always generate valid JSON, and may hallucinate parameters\nnot defined by your function schema. Validate the arguments in your code before calling\nyour function." + } + }, + "required": [ + "name", + "arguments" + ] + }, + "FunctionDefinition": { + "type": "object", + "description": "The definition of a caller-specified function that chat completions may invoke in response to matching user input.", + "properties": { + "name": { + "type": "string", + "description": "The name of the function to be called." + }, + "description": { + "type": "string", + "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters." + }, + "parameters": { + "type": "object", + "description": "The parameters the function accepts, described as a JSON Schema object.", + "additionalProperties": {} + } + }, + "required": [ + "name" + ] + }, + "ImageEmbeddingInput": { + "type": "object", + "description": "Represents an image with optional text.", + "properties": { + "image": { + "type": "string", + "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." + }, + "text": { + "type": "string", + "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." + } + }, + "required": [ + "image" + ] + }, + "ImageEmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an image embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", + "items": { + "$ref": "#/definitions/ImageEmbeddingInput" + }, + "x-ms-identifiers": [] + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, + "ModelInfo": { + "type": "object", + "description": "Represents some basic information about the AI model.", + "properties": { + "model_name": { + "type": "string", + "description": "The name of the AI model. For example: `Phi21`" + }, + "model_type": { + "$ref": "#/definitions/ModelType", + "description": "The type of the AI model. A Unique identifier for the profile." + }, + "model_provider_name": { + "type": "string", + "description": "The model provider name. For example: `Microsoft Research`" + } + }, + "required": [ + "model_name", + "model_type", + "model_provider_name" + ] + }, + "ModelType": { + "type": "string", + "description": "The type of AI model", + "enum": [ + "embeddings", + "image_generation", + "text_generation", + "image_embeddings", + "audio_generation", + "chat_completion" + ], + "x-ms-enum": { + "name": "ModelType", + "modelAsString": true, + "values": [ + { + "name": "embeddings", + "value": "embeddings", + "description": "A model capable of generating embeddings from a text" + }, + { + "name": "image_generation", + "value": "image_generation", + "description": "A model capable of generating images from an image and text description" + }, + { + "name": "text_generation", + "value": "text_generation", + "description": "A text generation model" + }, + { + "name": "image_embeddings", + "value": "image_embeddings", + "description": "A model capable of generating embeddings from an image" + }, + { + "name": "audio_generation", + "value": "audio_generation", + "description": "A text-to-audio generative model" + }, + { + "name": "chat_completion", + "value": "chat_completion", + "description": "A model capable of taking chat-formatted messages and generate responses" + } + ] + } + }, + "StreamingChatChoiceUpdate": { + "type": "object", + "description": "Represents an update to a single prompt completion when the service is streaming updates \nusing Server Sent Events (SSE).\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", + "properties": { + "index": { + "type": "integer", + "format": "int32", + "description": "The ordered index associated with this chat completions choice." + }, + "finish_reason": { + "$ref": "#/definitions/CompletionsFinishReason", + "description": "The reason that this chat completions choice completed its generated.", + "x-nullable": true + }, + "delta": { + "$ref": "#/definitions/StreamingChatResponseMessageUpdate", + "description": "An update to the chat message for a given chat completions prompt." + } + }, + "required": [ + "index", + "finish_reason", + "delta" + ] + }, + "StreamingChatCompletionsUpdate": { + "type": "object", + "description": "Represents a response update to a chat completions request, when the service is streaming updates \nusing Server Sent Events (SSE).\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", + "properties": { + "id": { + "type": "string", + "description": "A unique identifier associated with this chat completions response." + }, + "created": { + "type": "integer", + "format": "unixtime", + "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970." + }, + "model": { + "type": "string", + "description": "The model used for the chat completion." + }, + "choices": { + "type": "array", + "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", + "minItems": 1, + "items": { + "$ref": "#/definitions/StreamingChatChoiceUpdate" + }, + "x-ms-identifiers": [] + }, + "usage": { + "$ref": "#/definitions/CompletionsUsage", + "description": "Usage information for tokens processed and generated as part of this completions operation." + } + }, + "required": [ + "id", + "created", + "model", + "choices" + ] + }, + "StreamingChatResponseMessageUpdate": { + "type": "object", + "description": "A representation of a chat message update as received in a streaming response.", + "properties": { + "role": { + "$ref": "#/definitions/ChatRole", + "description": "The chat role associated with the message. If present, should always be 'assistant'" + }, + "content": { + "type": "string", + "description": "The content of the message." + }, + "tool_calls": { + "type": "array", + "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.", + "items": { + "$ref": "#/definitions/StreamingChatResponseToolCallUpdate" + }, + "x-ms-client-name": "toolCalls" + } + } + }, + "StreamingChatResponseToolCallUpdate": { + "type": "object", + "description": "An update to the function tool call information requested by the AI model.", + "properties": { + "id": { + "type": "string", + "description": "The ID of the tool call." + }, + "function": { + "$ref": "#/definitions/FunctionCall", + "description": "Updates to the function call requested by the AI model." + } + }, + "required": [ + "id", + "function" + ] + } + }, + "parameters": { + "Azure.Core.Foundations.ApiVersionParameter": { + "name": "api-version", + "in": "query", + "description": "The API version to use for this operation.", + "required": true, + "type": "string", + "minLength": 1, + "x-ms-parameter-location": "method", + "x-ms-client-name": "apiVersion" + } + } +} From b13797e5383c75e7ad7e04ffe2852529d0fbf491 Mon Sep 17 00:00:00 2001 From: trangevi Date: Tue, 15 Apr 2025 10:42:39 -0700 Subject: [PATCH 37/38] Remove 08-01 api version Signed-off-by: trangevi --- specification/ai/ModelClient/client.tsp | 2 +- specification/ai/ModelClient/main.tsp | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index a3d0438d525b..e60ab79dc4d8 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -5,7 +5,7 @@ import "./main.tsp"; using Azure.ClientGenerator.Core; using TypeSpec.Versioning; -@useDependency(AI.Model.Versions.v2024_08_01_Preview) +@useDependency(AI.Model.Versions.v2024_05_01_Preview) namespace Customizations; // The actual name here doesn't matter and is here for organization purposes only // Are these needed? diff --git a/specification/ai/ModelClient/main.tsp b/specification/ai/ModelClient/main.tsp index b620ba39d097..391dfbad79e2 100644 --- a/specification/ai/ModelClient/main.tsp +++ b/specification/ai/ModelClient/main.tsp @@ -4,9 +4,7 @@ import "@azure-tools/typespec-azure-core"; import "./routes.tsp"; using TypeSpec.Http; -using TypeSpec.Rest; using TypeSpec.Versioning; -using Azure.Core; #suppress "@azure-tools/typespec-autorest/unsupported-http-auth-scheme" @useAuth( @@ -27,8 +25,4 @@ enum Versions { @useDependency(Azure.Core.Versions.v1_0_Preview_2) @doc("The 2024-05-01-preview version of the AI.Model service.") v2024_05_01_Preview: "2024-05-01-preview", - - @useDependency(Azure.Core.Versions.v1_0_Preview_2) - @doc("The 2024-08-01-preview version of the AI.Model service.") - v2024_08_01_Preview: "2024-08-01-preview", } From 8eb7cc375f11b721e94594c6eacfc5a054f6b468 Mon Sep 17 00:00:00 2001 From: trangevi Date: Tue, 15 Apr 2025 11:28:41 -0700 Subject: [PATCH 38/38] Remove 08-01 swagger spec Signed-off-by: trangevi --- .../preview/2024-08-01-preview/openapi.json | 1651 ----------------- 1 file changed, 1651 deletions(-) delete mode 100644 specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json diff --git a/specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json deleted file mode 100644 index 0819dd9a03cf..000000000000 --- a/specification/ai/data-plane/AI.Model/preview/2024-08-01-preview/openapi.json +++ /dev/null @@ -1,1651 +0,0 @@ -{ - "swagger": "2.0", - "info": { - "title": "AI Model Inference", - "version": "2024-08-01-preview", - "x-typespec-generated": [ - { - "emitter": "@azure-tools/typespec-autorest" - } - ] - }, - "schemes": [ - "https" - ], - "produces": [ - "application/json" - ], - "consumes": [ - "application/json" - ], - "security": [ - { - "OAuth2Auth": [ - "https://ml.azure.com/.default" - ] - } - ], - "securityDefinitions": { - "OAuth2Auth": { - "type": "oauth2", - "flow": "implicit", - "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize", - "scopes": { - "https://ml.azure.com/.default": "" - } - } - }, - "tags": [], - "paths": { - "/chat/completions": { - "post": { - "operationId": "GetChatCompletions", - "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.", - "parameters": [ - { - "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" - }, - { - "name": "extra-parameters", - "in": "header", - "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", - "required": false, - "type": "string", - "enum": [ - "error", - "drop", - "pass-through" - ], - "x-ms-enum": { - "name": "ExtraParameters", - "modelAsString": true, - "values": [ - { - "name": "error", - "value": "error", - "description": "The service will error if it detected extra parameters in the request payload. This is the service default." - }, - { - "name": "drop", - "value": "drop", - "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." - }, - { - "name": "pass_through", - "value": "pass-through", - "description": "The service will pass extra parameters to the back-end AI model." - } - ] - }, - "x-ms-client-name": "extra_params" - }, - { - "name": "body", - "in": "body", - "description": "The options for chat completions.", - "required": true, - "schema": { - "$ref": "#/definitions/ChatCompletionsOptions" - } - } - ], - "responses": { - "200": { - "description": "The request has succeeded.", - "schema": { - "$ref": "#/definitions/ChatCompletions" - } - }, - "default": { - "description": "An unexpected error response.", - "schema": { - "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" - }, - "headers": { - "x-ms-error-code": { - "type": "string", - "description": "String error code indicating what went wrong." - } - } - } - } - } - }, - "/embeddings": { - "post": { - "operationId": "GetEmbeddings", - "description": "Return the embedding vectors for given text prompts.\nThe method makes a REST API call to the `/embeddings` route on the given endpoint.", - "parameters": [ - { - "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" - }, - { - "name": "extra-parameters", - "in": "header", - "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", - "required": false, - "type": "string", - "enum": [ - "error", - "drop", - "pass-through" - ], - "x-ms-enum": { - "name": "ExtraParameters", - "modelAsString": true, - "values": [ - { - "name": "error", - "value": "error", - "description": "The service will error if it detected extra parameters in the request payload. This is the service default." - }, - { - "name": "drop", - "value": "drop", - "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." - }, - { - "name": "pass_through", - "value": "pass-through", - "description": "The service will pass extra parameters to the back-end AI model." - } - ] - }, - "x-ms-client-name": "extra_params" - }, - { - "name": "body", - "in": "body", - "description": "The body of the request containing the options for generating embeddings.", - "required": true, - "schema": { - "$ref": "#/definitions/EmbeddingsOptions" - } - } - ], - "responses": { - "200": { - "description": "The request has succeeded.", - "schema": { - "$ref": "#/definitions/EmbeddingsResult" - } - }, - "default": { - "description": "An unexpected error response.", - "schema": { - "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" - }, - "headers": { - "x-ms-error-code": { - "type": "string", - "description": "String error code indicating what went wrong." - } - } - } - } - } - }, - "/images/embeddings": { - "post": { - "operationId": "GetImageEmbeddings", - "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.", - "parameters": [ - { - "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" - }, - { - "name": "extra-parameters", - "in": "header", - "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", - "required": false, - "type": "string", - "enum": [ - "error", - "drop", - "pass-through" - ], - "x-ms-enum": { - "name": "ExtraParameters", - "modelAsString": true, - "values": [ - { - "name": "error", - "value": "error", - "description": "The service will error if it detected extra parameters in the request payload. This is the service default." - }, - { - "name": "drop", - "value": "drop", - "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." - }, - { - "name": "pass_through", - "value": "pass-through", - "description": "The service will pass extra parameters to the back-end AI model." - } - ] - }, - "x-ms-client-name": "extra_params" - }, - { - "name": "body", - "in": "body", - "description": "The body of the request containing options for image embeddings.", - "required": true, - "schema": { - "$ref": "#/definitions/ImageEmbeddingsOptions" - } - } - ], - "responses": { - "200": { - "description": "The request has succeeded.", - "schema": { - "$ref": "#/definitions/EmbeddingsResult" - } - }, - "default": { - "description": "An unexpected error response.", - "schema": { - "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" - }, - "headers": { - "x-ms-error-code": { - "type": "string", - "description": "String error code indicating what went wrong." - } - } - } - } - } - }, - "/info": { - "get": { - "operationId": "GetModelInfo", - "description": "Returns information about the AI model.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API or Managed Compute endpoint.\nIt will not work for GitHub Models endpoint or Azure OpenAI endpoint.", - "parameters": [ - { - "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" - } - ], - "responses": { - "200": { - "description": "The request has succeeded.", - "schema": { - "$ref": "#/definitions/ModelInfo" - } - }, - "default": { - "description": "An unexpected error response.", - "schema": { - "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" - }, - "headers": { - "x-ms-error-code": { - "type": "string", - "description": "String error code indicating what went wrong." - } - } - } - } - } - } - }, - "definitions": { - "AudioContentFormat": { - "type": "string", - "description": "A representation of the possible audio formats for audio.", - "enum": [ - "wav", - "mp3" - ], - "x-ms-enum": { - "name": "AudioContentFormat", - "modelAsString": true, - "values": [ - { - "name": "wav", - "value": "wav", - "description": "Specifies audio in WAV format." - }, - { - "name": "mp3", - "value": "mp3", - "description": "Specifies audio in MP3 format." - } - ] - } - }, - "Azure.Core.Foundations.Error": { - "type": "object", - "description": "The error object.", - "properties": { - "code": { - "type": "string", - "description": "One of a server-defined set of error codes." - }, - "message": { - "type": "string", - "description": "A human-readable representation of the error." - }, - "target": { - "type": "string", - "description": "The target of the error." - }, - "details": { - "type": "array", - "description": "An array of details about specific errors that led to this reported error.", - "items": { - "$ref": "#/definitions/Azure.Core.Foundations.Error" - }, - "x-ms-identifiers": [] - }, - "innererror": { - "$ref": "#/definitions/Azure.Core.Foundations.InnerError", - "description": "An object containing more specific information than the current object about the error." - } - }, - "required": [ - "code", - "message" - ] - }, - "Azure.Core.Foundations.ErrorResponse": { - "type": "object", - "description": "A response containing error details.", - "properties": { - "error": { - "$ref": "#/definitions/Azure.Core.Foundations.Error", - "description": "The error object." - } - }, - "required": [ - "error" - ] - }, - "Azure.Core.Foundations.InnerError": { - "type": "object", - "description": "An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/Microsoft/api-guidelines/blob/vNext/Guidelines.md#7102-error-condition-responses.", - "properties": { - "code": { - "type": "string", - "description": "One of a server-defined set of error codes." - }, - "innererror": { - "$ref": "#/definitions/Azure.Core.Foundations.InnerError", - "description": "Inner error." - } - } - }, - "ChatChoice": { - "type": "object", - "description": "The representation of a single prompt completion as part of an overall chat completions request.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", - "properties": { - "index": { - "type": "integer", - "format": "int32", - "description": "The ordered index associated with this chat completions choice." - }, - "finish_reason": { - "$ref": "#/definitions/CompletionsFinishReason", - "description": "The reason that this chat completions choice completed its generated.", - "x-nullable": true - }, - "message": { - "$ref": "#/definitions/ChatResponseMessage", - "description": "The chat message for a given chat completions prompt." - } - }, - "required": [ - "index", - "finish_reason", - "message" - ] - }, - "ChatCompletions": { - "type": "object", - "description": "Representation of the response data from a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", - "properties": { - "id": { - "type": "string", - "description": "A unique identifier associated with this chat completions response." - }, - "created": { - "type": "integer", - "format": "unixtime", - "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970." - }, - "model": { - "type": "string", - "description": "The model used for the chat completion." - }, - "choices": { - "type": "array", - "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatChoice" - }, - "x-ms-identifiers": [] - }, - "usage": { - "$ref": "#/definitions/CompletionsUsage", - "description": "Usage information for tokens processed and generated as part of this completions operation." - } - }, - "required": [ - "id", - "created", - "model", - "choices", - "usage" - ] - }, - "ChatCompletionsNamedToolChoice": { - "type": "object", - "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", - "properties": { - "type": { - "type": "string", - "description": "The type of the tool. Currently, only `function` is supported.", - "enum": [ - "function" - ], - "x-ms-enum": { - "modelAsString": false - } - }, - "function": { - "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction", - "description": "The function that should be called." - } - }, - "required": [ - "type", - "function" - ] - }, - "ChatCompletionsNamedToolChoiceFunction": { - "type": "object", - "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.", - "properties": { - "name": { - "type": "string", - "description": "The name of the function that should be called." - } - }, - "required": [ - "name" - ] - }, - "ChatCompletionsOptions": { - "type": "object", - "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", - "properties": { - "messages": { - "type": "array", - "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatRequestMessage" - }, - "x-ms-identifiers": [] - }, - "frequency_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "stream": { - "type": "boolean", - "description": "A value indicating whether chat completions should be streamed for this request." - }, - "presence_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "temperature": { - "type": "number", - "format": "float", - "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 0.7, - "minimum": 0, - "maximum": 1 - }, - "top_p": { - "type": "number", - "format": "float", - "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 1, - "minimum": 0, - "maximum": 1 - }, - "max_tokens": { - "type": "integer", - "format": "int32", - "description": "The maximum number of tokens to generate.", - "minimum": 0 - }, - "response_format": { - "$ref": "#/definitions/ChatCompletionsResponseFormat", - "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." - }, - "stop": { - "type": "array", - "description": "A collection of textual sequences that will end completions generation.", - "minItems": 1, - "items": { - "type": "string" - } - }, - "tools": { - "type": "array", - "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatCompletionsToolDefinition" - }, - "x-ms-identifiers": [] - }, - "tool_choice": { - "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", - "x-ms-client-name": "toolChoice" - }, - "seed": { - "type": "integer", - "format": "int64", - "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "messages" - ], - "additionalProperties": {} - }, - "ChatCompletionsResponseFormat": { - "type": "object", - "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", - "properties": { - "type": { - "type": "string", - "description": "The response format type to use for chat completions." - } - }, - "discriminator": "type", - "required": [ - "type" - ] - }, - "ChatCompletionsResponseFormatJsonObject": { - "type": "object", - "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", - "allOf": [ - { - "$ref": "#/definitions/ChatCompletionsResponseFormat" - } - ], - "x-ms-discriminator-value": "json_object" - }, - "ChatCompletionsResponseFormatJsonSchema": { - "type": "object", - "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.", - "properties": { - "json_schema": { - "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition", - "description": "The definition of the required JSON schema in the response, and associated metadata." - } - }, - "required": [ - "json_schema" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatCompletionsResponseFormat" - } - ], - "x-ms-discriminator-value": "json_schema" - }, - "ChatCompletionsResponseFormatJsonSchemaDefinition": { - "type": "object", - "description": "Defines the response format for chat completions as JSON with a given schema.\nThe AI model will need to adhere to this schema when generating completions.", - "properties": { - "name": { - "type": "string", - "description": "A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64." - }, - "schema": { - "type": "object", - "description": "The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.\nNote that AI models usually only support a subset of the keywords defined by JSON schema.\nConsult your AI model documentation to determine what is supported.", - "additionalProperties": {} - }, - "description": { - "type": "string", - "description": "A description of the response format, used by the AI model to determine how to generate responses in this format." - }, - "strict": { - "type": "boolean", - "description": "If set to true, the service will error out if the provided JSON schema contains keywords\nnot supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.\nIf false, and the provided JSON schema contains keywords not supported by the AI model,\nthe AI model will not error out. Instead it will ignore the unsupported keywords.", - "default": false - } - }, - "required": [ - "name", - "schema" - ] - }, - "ChatCompletionsResponseFormatText": { - "type": "object", - "description": "A response format for Chat Completions that emits text responses. This is the default response format.", - "allOf": [ - { - "$ref": "#/definitions/ChatCompletionsResponseFormat" - } - ], - "x-ms-discriminator-value": "text" - }, - "ChatCompletionsToolCall": { - "type": "object", - "description": "A function tool call requested by the AI model.", - "properties": { - "id": { - "type": "string", - "description": "The ID of the tool call." - }, - "type": { - "type": "string", - "description": "The type of tool call. Currently, only `function` is supported.", - "enum": [ - "function" - ], - "x-ms-enum": { - "modelAsString": false - } - }, - "function": { - "$ref": "#/definitions/FunctionCall", - "description": "The details of the function call requested by the AI model." - } - }, - "required": [ - "id", - "type", - "function" - ] - }, - "ChatCompletionsToolChoicePreset": { - "type": "string", - "description": "Represents a generic policy for how a chat completions tool may be selected.", - "enum": [ - "auto", - "none", - "required" - ], - "x-ms-enum": { - "name": "ChatCompletionsToolChoicePreset", - "modelAsString": true, - "values": [ - { - "name": "auto", - "value": "auto", - "description": "Specifies that the model may either use any of the tools provided in this chat completions request or\ninstead return a standard chat completions response as if no tools were provided." - }, - { - "name": "none", - "value": "none", - "description": "Specifies that the model should not respond with a tool call and should instead provide a standard chat\ncompletions response. Response content may still be influenced by the provided tool definitions." - }, - { - "name": "required", - "value": "required", - "description": "Specifies that the model should respond with a call to one or more tools." - } - ] - } - }, - "ChatCompletionsToolDefinition": { - "type": "object", - "description": "The definition of a chat completions tool that can call a function.", - "properties": { - "type": { - "type": "string", - "description": "The type of the tool. Currently, only `function` is supported.", - "enum": [ - "function" - ], - "x-ms-enum": { - "modelAsString": false - } - }, - "function": { - "$ref": "#/definitions/FunctionDefinition", - "description": "The function definition details for the function tool." - } - }, - "required": [ - "type", - "function" - ] - }, - "ChatMessageAudioDataContentItem": { - "type": "object", - "description": "A structured chat content item for audio content passed as base64 encoded data.", - "properties": { - "input_audio": { - "$ref": "#/definitions/ChatMessageInputAudio", - "description": "The details of the input audio data.", - "x-ms-client-name": "inputAudio" - } - }, - "required": [ - "input_audio" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatMessageContentItem" - } - ], - "x-ms-discriminator-value": "input_audio" - }, - "ChatMessageAudioUrlContentItem": { - "type": "object", - "description": "A structured chat content item for audio content passed as a url.", - "properties": { - "audio_url": { - "$ref": "#/definitions/ChatMessageInputAudioUrl", - "description": "The details of the audio url.", - "x-ms-client-name": "audioUrl" - } - }, - "required": [ - "audio_url" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatMessageContentItem" - } - ], - "x-ms-discriminator-value": "audio_url" - }, - "ChatMessageContentItem": { - "type": "object", - "description": "An abstract representation of a structured content item within a chat message.", - "properties": { - "type": { - "type": "string", - "description": "The discriminated object type." - } - }, - "discriminator": "type", - "required": [ - "type" - ] - }, - "ChatMessageImageContentItem": { - "type": "object", - "description": "A structured chat content item containing an image reference.", - "properties": { - "image_url": { - "$ref": "#/definitions/ChatMessageImageUrl", - "description": "An internet location, which must be accessible to the model,from which the image may be retrieved.", - "x-ms-client-name": "imageUrl" - } - }, - "required": [ - "image_url" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatMessageContentItem" - } - ], - "x-ms-discriminator-value": "image_url" - }, - "ChatMessageImageDetailLevel": { - "type": "string", - "description": "A representation of the possible image detail levels for image-based chat completions message content.", - "enum": [ - "auto", - "low", - "high" - ], - "x-ms-enum": { - "name": "ChatMessageImageDetailLevel", - "modelAsString": true, - "values": [ - { - "name": "auto", - "value": "auto", - "description": "Specifies that the model should determine which detail level to apply using heuristics like image size." - }, - { - "name": "low", - "value": "low", - "description": "Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer\ntokens but may also be less accurate for highly detailed images." - }, - { - "name": "high", - "value": "high", - "description": "Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed\nimages but may also be slower and consume more tokens." - } - ] - } - }, - "ChatMessageImageUrl": { - "type": "object", - "description": "An internet location from which the model may retrieve an image.", - "properties": { - "url": { - "type": "string", - "description": "The URL of the image." - }, - "detail": { - "$ref": "#/definitions/ChatMessageImageDetailLevel", - "description": "The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and\naccuracy." - } - }, - "required": [ - "url" - ] - }, - "ChatMessageInputAudio": { - "type": "object", - "description": "The details of the input audio data.", - "properties": { - "data": { - "type": "string", - "description": "Base64 encoded audio data" - }, - "format": { - "$ref": "#/definitions/AudioContentFormat", - "description": "The audio format of the audio content." - } - }, - "required": [ - "data", - "format" - ] - }, - "ChatMessageInputAudioUrl": { - "type": "object", - "description": "The details of the audio url.", - "properties": { - "url": { - "type": "string", - "description": "The URL of the audio content." - } - }, - "required": [ - "url" - ] - }, - "ChatMessageTextContentItem": { - "type": "object", - "description": "A structured chat content item containing plain text.", - "properties": { - "text": { - "type": "string", - "description": "The content of the message." - } - }, - "required": [ - "text" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatMessageContentItem" - } - ], - "x-ms-discriminator-value": "text" - }, - "ChatRequestAssistantMessage": { - "type": "object", - "description": "A request chat message representing response or action from the assistant.", - "properties": { - "content": { - "type": "string", - "description": "The content of the message." - }, - "tool_calls": { - "type": "array", - "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.", - "items": { - "$ref": "#/definitions/ChatCompletionsToolCall" - }, - "x-ms-client-name": "toolCalls" - } - }, - "allOf": [ - { - "$ref": "#/definitions/ChatRequestMessage" - } - ], - "x-ms-discriminator-value": "assistant" - }, - "ChatRequestDeveloperMessage": { - "type": "object", - "description": "A request chat message containing developer instructions that influence how the model will generate a chat completions\nresponse. Some AI models support a developer message instead of a system message.", - "properties": { - "content": { - "type": "string", - "description": "The contents of the developer message." - } - }, - "required": [ - "content" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatRequestMessage" - } - ], - "x-ms-discriminator-value": "developer" - }, - "ChatRequestMessage": { - "type": "object", - "description": "An abstract representation of a chat message as provided in a request.", - "properties": { - "role": { - "$ref": "#/definitions/ChatRole", - "description": "The chat role associated with this message." - } - }, - "discriminator": "role", - "required": [ - "role" - ] - }, - "ChatRequestSystemMessage": { - "type": "object", - "description": "A request chat message containing system instructions that influence how the model will generate a chat completions\nresponse.", - "properties": { - "content": { - "type": "string", - "description": "The contents of the system message." - } - }, - "required": [ - "content" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatRequestMessage" - } - ], - "x-ms-discriminator-value": "system" - }, - "ChatRequestToolMessage": { - "type": "object", - "description": "A request chat message representing requested output from a configured tool.", - "properties": { - "content": { - "type": "string", - "description": "The content of the message." - }, - "tool_call_id": { - "type": "string", - "description": "The ID of the tool call resolved by the provided content.", - "x-ms-client-name": "toolCallId" - } - }, - "required": [ - "tool_call_id" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatRequestMessage" - } - ], - "x-ms-discriminator-value": "tool" - }, - "ChatRequestUserMessage": { - "type": "object", - "description": "A request chat message representing user input to the assistant.", - "properties": { - "content": { - "description": "The contents of the user message, with available input types varying by selected model." - } - }, - "required": [ - "content" - ], - "allOf": [ - { - "$ref": "#/definitions/ChatRequestMessage" - } - ], - "x-ms-discriminator-value": "user" - }, - "ChatResponseMessage": { - "type": "object", - "description": "A representation of a chat message as received in a response.", - "properties": { - "role": { - "$ref": "#/definitions/ChatRole", - "description": "The chat role associated with the message." - }, - "content": { - "type": "string", - "description": "The content of the message.", - "x-nullable": true - }, - "tool_calls": { - "type": "array", - "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.", - "items": { - "$ref": "#/definitions/ChatCompletionsToolCall" - }, - "x-ms-client-name": "toolCalls" - } - }, - "required": [ - "role", - "content" - ] - }, - "ChatRole": { - "type": "string", - "description": "A description of the intended purpose of a message within a chat completions interaction.", - "enum": [ - "system", - "user", - "assistant", - "tool", - "developer" - ], - "x-ms-enum": { - "name": "ChatRole", - "modelAsString": true, - "values": [ - { - "name": "system", - "value": "system", - "description": "The role that instructs or sets the behavior of the assistant." - }, - { - "name": "user", - "value": "user", - "description": "The role that provides input for chat completions." - }, - { - "name": "assistant", - "value": "assistant", - "description": "The role that provides responses to system-instructed, user-prompted input." - }, - { - "name": "tool", - "value": "tool", - "description": "The role that represents extension tool activity within a chat completions operation." - }, - { - "name": "developer", - "value": "developer", - "description": "The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role." - } - ] - } - }, - "CompletionsFinishReason": { - "type": "string", - "description": "Representation of the manner in which a completions response concluded.", - "enum": [ - "stop", - "length", - "content_filter", - "tool_calls" - ], - "x-ms-enum": { - "name": "CompletionsFinishReason", - "modelAsString": true, - "values": [ - { - "name": "stopped", - "value": "stop", - "description": "Completions ended normally and reached its end of token generation." - }, - { - "name": "tokenLimitReached", - "value": "length", - "description": "Completions exhausted available token limits before generation could complete." - }, - { - "name": "contentFiltered", - "value": "content_filter", - "description": "Completions generated a response that was identified as potentially sensitive per content\nmoderation policies." - }, - { - "name": "toolCalls", - "value": "tool_calls", - "description": "Completion ended with the model calling a provided tool for output." - } - ] - } - }, - "CompletionsUsage": { - "type": "object", - "description": "Representation of the token counts processed for a completions request.\nCounts consider all tokens across prompts, choices, choice alternates, best_of generations, and\nother consumers.", - "properties": { - "completion_tokens": { - "type": "integer", - "format": "int32", - "description": "The number of tokens generated across all completions emissions." - }, - "prompt_tokens": { - "type": "integer", - "format": "int32", - "description": "The number of tokens in the provided prompts for the completions request." - }, - "total_tokens": { - "type": "integer", - "format": "int32", - "description": "The total number of tokens processed for the completions request and response." - } - }, - "required": [ - "completion_tokens", - "prompt_tokens", - "total_tokens" - ] - }, - "EmbeddingEncodingFormat": { - "type": "string", - "description": "The format of the embeddings result.\nReturns a 422 error if the model doesn't support the value or parameter.", - "enum": [ - "base64", - "binary", - "float", - "int8", - "ubinary", - "uint8" - ], - "x-ms-enum": { - "name": "EmbeddingEncodingFormat", - "modelAsString": true, - "values": [ - { - "name": "base64", - "value": "base64", - "description": "Base64" - }, - { - "name": "binary", - "value": "binary", - "description": "Binary" - }, - { - "name": "float", - "value": "float", - "description": "Floating point" - }, - { - "name": "int8", - "value": "int8", - "description": "Signed 8-bit integer" - }, - { - "name": "ubinary", - "value": "ubinary", - "description": "ubinary" - }, - { - "name": "uint8", - "value": "uint8", - "description": "Unsigned 8-bit integer" - } - ] - } - }, - "EmbeddingInputType": { - "type": "string", - "description": "Represents the input types used for embedding search.", - "enum": [ - "text", - "query", - "document" - ], - "x-ms-enum": { - "name": "EmbeddingInputType", - "modelAsString": true, - "values": [ - { - "name": "text", - "value": "text", - "description": "Indicates the input is a general text input." - }, - { - "name": "query", - "value": "query", - "description": "Indicates the input represents a search query to find the most relevant documents in your vector database." - }, - { - "name": "document", - "value": "document", - "description": "Indicates the input represents a document that is stored in a vector database." - } - ] - } - }, - "EmbeddingItem": { - "type": "object", - "description": "Representation of a single embeddings relatedness comparison.", - "properties": { - "embedding": { - "description": "List of embedding values for the input prompt. These represent a measurement of the\nvector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector." - }, - "index": { - "type": "integer", - "format": "int32", - "description": "Index of the prompt to which the EmbeddingItem corresponds." - } - }, - "required": [ - "embedding", - "index" - ] - }, - "EmbeddingsOptions": { - "type": "object", - "description": "The configuration information for an embeddings request.", - "properties": { - "input": { - "type": "array", - "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.", - "items": { - "type": "string" - } - }, - "dimensions": { - "type": "integer", - "format": "int32", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "encoding_format": { - "$ref": "#/definitions/EmbeddingEncodingFormat", - "description": "Optional. The desired format for the returned embeddings." - }, - "input_type": { - "$ref": "#/definitions/EmbeddingInputType", - "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "input" - ], - "additionalProperties": {} - }, - "EmbeddingsResult": { - "type": "object", - "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.", - "properties": { - "id": { - "type": "string", - "description": "Unique identifier for the embeddings result." - }, - "data": { - "type": "array", - "description": "Embedding values for the prompts submitted in the request.", - "items": { - "$ref": "#/definitions/EmbeddingItem" - }, - "x-ms-identifiers": [] - }, - "usage": { - "$ref": "#/definitions/EmbeddingsUsage", - "description": "Usage counts for tokens input using the embeddings API." - }, - "model": { - "type": "string", - "description": "The model ID used to generate this result." - } - }, - "required": [ - "id", - "data", - "usage", - "model" - ] - }, - "EmbeddingsUsage": { - "type": "object", - "description": "Measurement of the amount of tokens used in this request and response.", - "properties": { - "prompt_tokens": { - "type": "integer", - "format": "int32", - "description": "Number of tokens in the request." - }, - "total_tokens": { - "type": "integer", - "format": "int32", - "description": "Total number of tokens transacted in this request/response. Should equal the\nnumber of tokens in the request." - } - }, - "required": [ - "prompt_tokens", - "total_tokens" - ] - }, - "ExtraParameters": { - "type": "string", - "description": "Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.", - "enum": [ - "error", - "drop", - "pass-through" - ], - "x-ms-enum": { - "name": "ExtraParameters", - "modelAsString": true, - "values": [ - { - "name": "error", - "value": "error", - "description": "The service will error if it detected extra parameters in the request payload. This is the service default." - }, - { - "name": "drop", - "value": "drop", - "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." - }, - { - "name": "pass_through", - "value": "pass-through", - "description": "The service will pass extra parameters to the back-end AI model." - } - ] - } - }, - "FunctionCall": { - "type": "object", - "description": "The name and arguments of a function that should be called, as generated by the model.", - "properties": { - "name": { - "type": "string", - "description": "The name of the function to call." - }, - "arguments": { - "type": "string", - "description": "The arguments to call the function with, as generated by the model in JSON format.\nNote that the model does not always generate valid JSON, and may hallucinate parameters\nnot defined by your function schema. Validate the arguments in your code before calling\nyour function." - } - }, - "required": [ - "name", - "arguments" - ] - }, - "FunctionDefinition": { - "type": "object", - "description": "The definition of a caller-specified function that chat completions may invoke in response to matching user input.", - "properties": { - "name": { - "type": "string", - "description": "The name of the function to be called." - }, - "description": { - "type": "string", - "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters." - }, - "parameters": { - "type": "object", - "description": "The parameters the function accepts, described as a JSON Schema object.", - "additionalProperties": {} - } - }, - "required": [ - "name" - ] - }, - "ImageEmbeddingInput": { - "type": "object", - "description": "Represents an image with optional text.", - "properties": { - "image": { - "type": "string", - "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." - }, - "text": { - "type": "string", - "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." - } - }, - "required": [ - "image" - ] - }, - "ImageEmbeddingsOptions": { - "type": "object", - "description": "The configuration information for an image embeddings request.", - "properties": { - "input": { - "type": "array", - "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", - "items": { - "$ref": "#/definitions/ImageEmbeddingInput" - }, - "x-ms-identifiers": [] - }, - "dimensions": { - "type": "integer", - "format": "int32", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "encoding_format": { - "$ref": "#/definitions/EmbeddingEncodingFormat", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "input_type": { - "$ref": "#/definitions/EmbeddingInputType", - "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "input" - ], - "additionalProperties": {} - }, - "ModelInfo": { - "type": "object", - "description": "Represents some basic information about the AI model.", - "properties": { - "model_name": { - "type": "string", - "description": "The name of the AI model. For example: `Phi21`" - }, - "model_type": { - "$ref": "#/definitions/ModelType", - "description": "The type of the AI model. A Unique identifier for the profile." - }, - "model_provider_name": { - "type": "string", - "description": "The model provider name. For example: `Microsoft Research`" - } - }, - "required": [ - "model_name", - "model_type", - "model_provider_name" - ] - }, - "ModelType": { - "type": "string", - "description": "The type of AI model", - "enum": [ - "embeddings", - "image_generation", - "text_generation", - "image_embeddings", - "audio_generation", - "chat_completion" - ], - "x-ms-enum": { - "name": "ModelType", - "modelAsString": true, - "values": [ - { - "name": "embeddings", - "value": "embeddings", - "description": "A model capable of generating embeddings from a text" - }, - { - "name": "image_generation", - "value": "image_generation", - "description": "A model capable of generating images from an image and text description" - }, - { - "name": "text_generation", - "value": "text_generation", - "description": "A text generation model" - }, - { - "name": "image_embeddings", - "value": "image_embeddings", - "description": "A model capable of generating embeddings from an image" - }, - { - "name": "audio_generation", - "value": "audio_generation", - "description": "A text-to-audio generative model" - }, - { - "name": "chat_completion", - "value": "chat_completion", - "description": "A model capable of taking chat-formatted messages and generate responses" - } - ] - } - }, - "StreamingChatChoiceUpdate": { - "type": "object", - "description": "Represents an update to a single prompt completion when the service is streaming updates \nusing Server Sent Events (SSE).\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", - "properties": { - "index": { - "type": "integer", - "format": "int32", - "description": "The ordered index associated with this chat completions choice." - }, - "finish_reason": { - "$ref": "#/definitions/CompletionsFinishReason", - "description": "The reason that this chat completions choice completed its generated.", - "x-nullable": true - }, - "delta": { - "$ref": "#/definitions/StreamingChatResponseMessageUpdate", - "description": "An update to the chat message for a given chat completions prompt." - } - }, - "required": [ - "index", - "finish_reason", - "delta" - ] - }, - "StreamingChatCompletionsUpdate": { - "type": "object", - "description": "Represents a response update to a chat completions request, when the service is streaming updates \nusing Server Sent Events (SSE).\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", - "properties": { - "id": { - "type": "string", - "description": "A unique identifier associated with this chat completions response." - }, - "created": { - "type": "integer", - "format": "unixtime", - "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970." - }, - "model": { - "type": "string", - "description": "The model used for the chat completion." - }, - "choices": { - "type": "array", - "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.", - "minItems": 1, - "items": { - "$ref": "#/definitions/StreamingChatChoiceUpdate" - }, - "x-ms-identifiers": [] - }, - "usage": { - "$ref": "#/definitions/CompletionsUsage", - "description": "Usage information for tokens processed and generated as part of this completions operation." - } - }, - "required": [ - "id", - "created", - "model", - "choices" - ] - }, - "StreamingChatResponseMessageUpdate": { - "type": "object", - "description": "A representation of a chat message update as received in a streaming response.", - "properties": { - "role": { - "$ref": "#/definitions/ChatRole", - "description": "The chat role associated with the message. If present, should always be 'assistant'" - }, - "content": { - "type": "string", - "description": "The content of the message." - }, - "tool_calls": { - "type": "array", - "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.", - "items": { - "$ref": "#/definitions/StreamingChatResponseToolCallUpdate" - }, - "x-ms-client-name": "toolCalls" - } - } - }, - "StreamingChatResponseToolCallUpdate": { - "type": "object", - "description": "An update to the function tool call information requested by the AI model.", - "properties": { - "id": { - "type": "string", - "description": "The ID of the tool call." - }, - "function": { - "$ref": "#/definitions/FunctionCall", - "description": "Updates to the function call requested by the AI model." - } - }, - "required": [ - "id", - "function" - ] - } - }, - "parameters": { - "Azure.Core.Foundations.ApiVersionParameter": { - "name": "api-version", - "in": "query", - "description": "The API version to use for this operation.", - "required": true, - "type": "string", - "minLength": 1, - "x-ms-parameter-location": "method", - "x-ms-client-name": "apiVersion" - } - } -}