diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp index 5f9654e56078..4c63782f9705 100644 --- a/specification/ai/ModelClient/models/chat_completions.tsp +++ b/specification/ai/ModelClient/models/chat_completions.tsp @@ -9,7 +9,12 @@ using Azure.ClientGenerator.Core; namespace AI.Model; -alias ChatCompletionsOptions = { +@doc(""" + The configuration information for a chat completions request. + Completions support a wide variety of tasks and generate text that continues from or "completes" + provided prompt data. + """) +model ChatCompletionsOptions { @doc(""" The collection of context messages associated with this chat completions request. Typical usage begins with a chat message for the System role that provides instructions for @@ -120,7 +125,7 @@ alias ChatCompletionsOptions = { `model`?: string; ...Record; -}; +} alias ChatCompletionsCommon = { @doc("A unique identifier associated with this chat completions response.") diff --git a/specification/ai/ModelClient/models/embeddings.tsp b/specification/ai/ModelClient/models/embeddings.tsp index 8e2aa8e1751f..da2155b52f0a 100644 --- a/specification/ai/ModelClient/models/embeddings.tsp +++ b/specification/ai/ModelClient/models/embeddings.tsp @@ -6,7 +6,10 @@ using TypeSpec.Http; namespace AI.Model; -alias EmbeddingsOptions = { +@doc(""" + The configuration information for an embeddings request. + """) +model EmbeddingsOptions { @doc(""" Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array @@ -38,4 +41,4 @@ alias EmbeddingsOptions = { `model`?: string; ...Record; -}; +} diff --git a/specification/ai/ModelClient/models/image_embeddings.tsp b/specification/ai/ModelClient/models/image_embeddings.tsp index 712eaa8a9985..e03571e12f06 100644 --- a/specification/ai/ModelClient/models/image_embeddings.tsp +++ b/specification/ai/ModelClient/models/image_embeddings.tsp @@ -6,7 +6,10 @@ using TypeSpec.Http; namespace AI.Model; -alias ImageEmbeddingsOptions = { +@doc(""" + The configuration information for an image embeddings request. + """) +model ImageEmbeddingsOptions { @doc(""" Input image to embed. To embed multiple inputs in a single request, pass an array. The input must not exceed the max input tokens for the model. @@ -39,7 +42,7 @@ alias ImageEmbeddingsOptions = { `model`?: string; ...Record; -}; +} @doc("Represents an image with optional text.") model ImageEmbeddingInput { diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp index 51d4c139e5e0..5e9bfc8dcb06 100644 --- a/specification/ai/ModelClient/routes.tsp +++ b/specification/ai/ModelClient/routes.tsp @@ -25,7 +25,12 @@ namespace AI.Model; @route("chat/completions") op getChatCompletions is Azure.Core.RpcOperation< { - ...ChatCompletionsOptions; + /** + * The options for chat completions. + */ + @bodyRoot + body: ChatCompletionsOptions; + ...AdditionalRequestHeaders; }, ChatCompletions @@ -39,7 +44,12 @@ op getChatCompletions is Azure.Core.RpcOperation< @route("embeddings") op getEmbeddings is Azure.Core.RpcOperation< { - ...EmbeddingsOptions; + /** + * The body of the request containing the options for generating embeddings. + */ + @bodyRoot + body: EmbeddingsOptions; + ...AdditionalRequestHeaders; }, EmbeddingsResult @@ -53,7 +63,12 @@ op getEmbeddings is Azure.Core.RpcOperation< @route("images/embeddings") op getImageEmbeddings is Azure.Core.RpcOperation< { - ...ImageEmbeddingsOptions; + /** + * The body of the request containing options for image embeddings. + */ + @bodyRoot + body: ImageEmbeddingsOptions; + ...AdditionalRequestHeaders; }, EmbeddingsResult diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index 88076c03ce5e..6a24b5cf2e79 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -90,100 +90,10 @@ { "name": "body", "in": "body", + "description": "The options for chat completions.", "required": true, "schema": { - "type": "object", - "properties": { - "messages": { - "type": "array", - "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatRequestMessage" - }, - "x-ms-identifiers": [] - }, - "frequency_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "stream": { - "type": "boolean", - "description": "A value indicating whether chat completions should be streamed for this request." - }, - "presence_penalty": { - "type": "number", - "format": "float", - "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "temperature": { - "type": "number", - "format": "float", - "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 0.7, - "minimum": 0, - "maximum": 1 - }, - "top_p": { - "type": "number", - "format": "float", - "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", - "default": 1, - "minimum": 0, - "maximum": 1 - }, - "max_tokens": { - "type": "integer", - "format": "int32", - "description": "The maximum number of tokens to generate.", - "minimum": 0 - }, - "response_format": { - "$ref": "#/definitions/ChatCompletionsResponseFormat", - "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." - }, - "stop": { - "type": "array", - "description": "A collection of textual sequences that will end completions generation.", - "minItems": 1, - "items": { - "type": "string" - } - }, - "tools": { - "type": "array", - "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", - "minItems": 1, - "items": { - "$ref": "#/definitions/ChatCompletionsToolDefinition" - }, - "x-ms-identifiers": [] - }, - "tool_choice": { - "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", - "x-ms-client-name": "toolChoice" - }, - "seed": { - "type": "integer", - "format": "int64", - "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "messages" - ], - "additionalProperties": {} + "$ref": "#/definitions/ChatCompletionsOptions" } } ], @@ -262,39 +172,10 @@ { "name": "body", "in": "body", + "description": "The body of the request containing the options for generating embeddings.", "required": true, "schema": { - "type": "object", - "properties": { - "input": { - "type": "array", - "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.", - "items": { - "type": "string" - } - }, - "dimensions": { - "type": "integer", - "format": "int32", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "encoding_format": { - "$ref": "#/definitions/EmbeddingEncodingFormat", - "description": "Optional. The desired format for the returned embeddings." - }, - "input_type": { - "$ref": "#/definitions/EmbeddingInputType", - "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "input" - ], - "additionalProperties": {} + "$ref": "#/definitions/EmbeddingsOptions" } } ], @@ -373,40 +254,10 @@ { "name": "body", "in": "body", + "description": "The body of the request containing options for image embeddings.", "required": true, "schema": { - "type": "object", - "properties": { - "input": { - "type": "array", - "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", - "items": { - "$ref": "#/definitions/ImageEmbeddingInput" - }, - "x-ms-identifiers": [] - }, - "dimensions": { - "type": "integer", - "format": "int32", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "encoding_format": { - "$ref": "#/definitions/EmbeddingEncodingFormat", - "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "input_type": { - "$ref": "#/definitions/EmbeddingInputType", - "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." - }, - "model": { - "type": "string", - "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." - } - }, - "required": [ - "input" - ], - "additionalProperties": {} + "$ref": "#/definitions/ImageEmbeddingsOptions" } } ], @@ -667,6 +518,101 @@ "name" ] }, + "ChatCompletionsOptions": { + "type": "object", + "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.", + "properties": { + "messages": { + "type": "array", + "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatRequestMessage" + }, + "x-ms-identifiers": [] + }, + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "stream": { + "type": "boolean", + "description": "A value indicating whether chat completions should be streamed for this request." + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "temperature": { + "type": "number", + "format": "float", + "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 0.7, + "minimum": 0, + "maximum": 1 + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].", + "default": 1, + "minimum": 0, + "maximum": 1 + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens to generate.", + "minimum": 0 + }, + "response_format": { + "$ref": "#/definitions/ChatCompletionsResponseFormat", + "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length." + }, + "stop": { + "type": "array", + "description": "A collection of textual sequences that will end completions generation.", + "minItems": 1, + "items": { + "type": "string" + } + }, + "tools": { + "type": "array", + "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.", + "minItems": 1, + "items": { + "$ref": "#/definitions/ChatCompletionsToolDefinition" + }, + "x-ms-identifiers": [] + }, + "tool_choice": { + "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.", + "x-ms-client-name": "toolChoice" + }, + "seed": { + "type": "integer", + "format": "int64", + "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "messages" + ], + "additionalProperties": {} + }, "ChatCompletionsResponseFormat": { "type": "object", "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.", @@ -1314,6 +1260,40 @@ "index" ] }, + "EmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.", + "items": { + "type": "string" + } + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The desired format for the returned embeddings." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, "EmbeddingsResult": { "type": "object", "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.", @@ -1451,6 +1431,41 @@ "image" ] }, + "ImageEmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an image embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", + "items": { + "$ref": "#/definitions/ImageEmbeddingInput" + }, + "x-ms-identifiers": [] + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, "ModelInfo": { "type": "object", "description": "Represents some basic information about the AI model.",