diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp index 95d7061de461..1bfd80674840 100644 --- a/specification/ai/ModelClient/client.tsp +++ b/specification/ai/ModelClient/client.tsp @@ -40,7 +40,7 @@ namespace Customizations; // The actual name here doesn't matter and is here for @@access(AI.Model.CompletionsFinishReason, Access.public); @@access(AI.Model.CompletionsUsage, Access.public); @@access(AI.Model.EmbeddingEncodingFormat, Access.public, "python"); -@@access(AI.Model.EmbeddingInput, Access.public, "python"); +@@access(AI.Model.ImageEmbeddingInput, Access.public, "python"); @@access(AI.Model.EmbeddingInputType, Access.public, "python"); @@access(AI.Model.EmbeddingItem, Access.public, "python"); @@access(AI.Model.EmbeddingsResult, Access.public, "python"); diff --git a/specification/ai/ModelClient/models/image_embeddings.tsp b/specification/ai/ModelClient/models/image_embeddings.tsp index 6e2f407d5dbc..712eaa8a9985 100644 --- a/specification/ai/ModelClient/models/image_embeddings.tsp +++ b/specification/ai/ModelClient/models/image_embeddings.tsp @@ -11,7 +11,7 @@ alias ImageEmbeddingsOptions = { Input image to embed. To embed multiple inputs in a single request, pass an array. The input must not exceed the max input tokens for the model. """) - input: EmbeddingInput[]; + input: ImageEmbeddingInput[]; @doc(""" Optional. The number of dimensions the resulting output embeddings should have. @@ -42,9 +42,9 @@ alias ImageEmbeddingsOptions = { }; @doc("Represents an image with optional text.") -model EmbeddingInput { +model ImageEmbeddingInput { @doc(""" - The input image, in PNG format. + The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`. """) image: string; diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml index a43f47381cc8..f8417273547c 100644 --- a/specification/ai/ModelClient/tspconfig.yaml +++ b/specification/ai/ModelClient/tspconfig.yaml @@ -16,7 +16,6 @@ options: "@azure-tools/typespec-autorest": azure-resource-provider-folder: "data-plane" emitter-output-dir: "{project-root}/.." - examples-directory: "{project-root}/examples" output-file: "{azure-resource-provider-folder}/{service-name}/{version-status}/{version}/openapi.json" "@azure-tools/typespec-python": package-mode: dataplane diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json index 6540edab667f..f0175f3a1d48 100644 --- a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json @@ -381,7 +381,7 @@ "type": "array", "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", "items": { - "$ref": "#/definitions/EmbeddingInput" + "$ref": "#/definitions/ImageEmbeddingInput" }, "x-ms-identifiers": [] }, @@ -1135,23 +1135,6 @@ ] } }, - "EmbeddingInput": { - "type": "object", - "description": "Represents an image with optional text.", - "properties": { - "image": { - "type": "string", - "description": "The input image, in PNG format." - }, - "text": { - "type": "string", - "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." - } - }, - "required": [ - "image" - ] - }, "EmbeddingInputType": { "type": "string", "description": "Represents the input types used for embedding search.", @@ -1320,6 +1303,23 @@ "name" ] }, + "ImageEmbeddingInput": { + "type": "object", + "description": "Represents an image with optional text.", + "properties": { + "image": { + "type": "string", + "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." + }, + "text": { + "type": "string", + "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." + } + }, + "required": [ + "image" + ] + }, "ModelInfo": { "type": "object", "description": "Represents some basic information about the AI model.", @@ -1352,7 +1352,7 @@ "text_generation", "image_embeddings", "audio_generation", - "chat" + "chat_completion" ], "x-ms-enum": { "name": "ModelType", @@ -1361,32 +1361,32 @@ { "name": "embeddings", "value": "embeddings", - "description": "Embeddings." + "description": "A model capable of generating embeddings from a text" }, { "name": "image_generation", "value": "image_generation", - "description": "Image generation" + "description": "A model capable of generating images from an image and text description" }, { "name": "text_generation", "value": "text_generation", - "description": "Text generation" + "description": "A text generation model" }, { "name": "image_embeddings", "value": "image_embeddings", - "description": "Image embeddings" + "description": "A model capable of generating embeddings from an image" }, { "name": "audio_generation", "value": "audio_generation", - "description": "Audio generation" + "description": "A text-to-audio generative model" }, { - "name": "chat", - "value": "chat", - "description": "Chat completions" + "name": "chat_completion", + "value": "chat_completion", + "description": "A model capable of taking chat-formatted messages and generate responses" } ] }