diff --git a/specification/ai/ModelClient/client.tsp b/specification/ai/ModelClient/client.tsp
new file mode 100644
index 000000000000..e60ab79dc4d8
--- /dev/null
+++ b/specification/ai/ModelClient/client.tsp
@@ -0,0 +1,175 @@
+import "@azure-tools/typespec-client-generator-core";
+
+import "./main.tsp";
+
+using Azure.ClientGenerator.Core;
+using TypeSpec.Versioning;
+
+@useDependency(AI.Model.Versions.v2024_05_01_Preview)
+namespace Customizations; // The actual name here doesn't matter and is here for organization purposes only
+
+// Are these needed?
+@@usage(AI.Model.ChatCompletions, Usage.output);
+@@usage(AI.Model.ModelInfo, Usage.output);
+@@usage(AI.Model.StreamingChatCompletionsUpdate, Usage.output);
+@@usage(AI.Model.StreamingChatChoiceUpdate, Usage.output);
+@@usage(AI.Model.StreamingChatResponseMessageUpdate, Usage.output);
+@@usage(AI.Model.StreamingChatResponseToolCallUpdate, Usage.output);
+
+// Necessary for autogenerating options objects while the clients specify spread parameters. Necessary for the languages that want the options class.
+@@usage(AI.Model.ChatCompletionsOptions, Usage.input, "csharp");
+@@usage(AI.Model.EmbeddingsOptions, Usage.input, "csharp");
+@@usage(AI.Model.ImageEmbeddingsOptions, Usage.input, "csharp");
+
+// Necessary due to how we're forcing this abstraction
+@@usage(AI.Model.ChatMessageAudioDataContentItem, Usage.input);
+@@usage(AI.Model.ChatMessageAudioUrlContentItem, Usage.input);
+
+// The operators need to be hidden, since we hand-write the public versions of those
+@@access(AI.Model.getChatCompletions, Access.internal);
+@@access(AI.Model.getEmbeddings, Access.internal);
+@@access(AI.Model.getImageEmbeddings, Access.internal);
+@@access(AI.Model.getModelInfo, Access.internal, "python,java");
+
+// Since we made all operator methods internal, we need to explicity
+// say we still want the models they use to be public, since they will be used by hand-written operator methods.
+@@access(AI.Model.ChatChoice, Access.public);
+@@access(AI.Model.ChatCompletions, Access.public);
+@@access(AI.Model.ChatCompletionsOptions, Access.public, "java");
+@@access(AI.Model.ChatCompletionsToolCall, Access.public);
+@@access(AI.Model.ChatCompletionsToolDefinition, Access.public);
+@@access(AI.Model.ChatCompletionsNamedToolChoice, Access.public);
+@@access(AI.Model.ChatCompletionsNamedToolChoiceFunction, Access.public);
+@@access(AI.Model.ChatCompletionsToolCall, Access.public);
+@@access(AI.Model.ChatCompletionsToolDefinition, Access.public);
+@@access(AI.Model.ChatCompletionsToolChoicePreset, Access.public);
+@@access(AI.Model.ChatRequestMessage, Access.public, "csharp,java,javascript");
+@@access(AI.Model.ChatRequestAssistantMessage,
+  Access.public,
+  "csharp,java,javascript"
+);
+@@access(AI.Model.ChatRequestSystemMessage,
+  Access.public,
+  "csharp,java,javascript"
+);
+@@access(AI.Model.ChatRequestToolMessage,
+  Access.public,
+  "csharp,java,javascript"
+);
+@@access(AI.Model.ChatRequestUserMessage,
+  Access.public,
+  "csharp,java,javascript"
+);
+@@access(AI.Model.ChatRequestDeveloperMessage,
+  Access.public,
+  "csharp,java,javascript"
+);
+@@access(AI.Model.ChatResponseMessage, Access.public);
+@@access(AI.Model.ChatRole, Access.public);
+@@access(AI.Model.CompletionsFinishReason, Access.public);
+@@access(AI.Model.CompletionsUsage, Access.public);
+@@access(AI.Model.EmbeddingEncodingFormat, Access.public);
+@@access(AI.Model.ImageEmbeddingInput, Access.public);
+@@access(AI.Model.EmbeddingInputType, Access.public);
+@@access(AI.Model.EmbeddingItem, Access.public);
+@@access(AI.Model.EmbeddingsResult, Access.public);
+@@access(AI.Model.EmbeddingsUsage, Access.public);
+@@access(AI.Model.ExtraParameters, Access.public, "java");
+@@access(AI.Model.FunctionCall, Access.public);
+@@access(AI.Model.FunctionDefinition, Access.public);
+@@access(AI.Model.ModelInfo, Access.public);
+@@access(AI.Model.ModelType, Access.public);
+@@access(AI.Model.ChatMessageContentItem, Access.public);
+@@access(AI.Model.ChatMessageTextContentItem, Access.public);
+@@access(AI.Model.ChatMessageImageContentItem, Access.public);
+@@access(AI.Model.ChatMessageImageUrl, Access.public);
+@@access(AI.Model.ChatMessageImageDetailLevel, Access.public);
+@@access(AI.Model.StreamingChatCompletionsUpdate, Access.public);
+@@access(AI.Model.StreamingChatChoiceUpdate, Access.public);
+@@access(AI.Model.StreamingChatResponseMessageUpdate, Access.public);
+@@access(AI.Model.StreamingChatResponseToolCallUpdate, Access.public);
+
+// Hide the JSON schema class in favor of a factory method on the ChatCompletionsResponseFormat class
+@@access(AI.Model.ChatCompletionsResponseFormatJsonSchema,
+  Access.internal,
+  "csharp"
+);
+
+@@access(AI.Model.ChatCompletionsResponseFormatJsonSchemaDefinition,
+  Access.public,
+  "python"
+);
+
+// In Python we hand-write the 4 input message classes, so we make them internal here.
+// The base class ChatRequestMessage has to have the same access as the derived classes,
+// so we make it internal as well. However the Python code will make it public again without changes.
+@@access(AI.Model.ChatCompletionsOptions, Access.internal, "python");
+@@access(AI.Model.ChatRequestMessage, Access.internal, "python");
+@@access(AI.Model.ChatRequestAssistantMessage, Access.internal, "python");
+@@access(AI.Model.ChatRequestSystemMessage, Access.internal, "python");
+@@access(AI.Model.ChatRequestToolMessage, Access.internal, "python");
+@@access(AI.Model.ChatRequestUserMessage, Access.internal, "python");
+@@access(AI.Model.ChatRequestDeveloperMessage, Access.internal, "python");
+
+// Make these internal, customize a third class for users to interact with
+@@access(AI.Model.ChatMessageAudioDataContentItem, Access.internal);
+@@access(AI.Model.ChatMessageAudioUrlContentItem, Access.internal);
+
+@@clientName(AI.Model.ChatCompletionsResponseFormatJsonObject,
+  "ChatCompletionsResponseFormatJSON",
+  "javascript"
+);
+
+// We use shorter names in the Python client library
+@@clientName(AI.Model.ChatMessageContentItem, "ContentItem", "python");
+@@clientName(AI.Model.ChatMessageTextContentItem, "TextContentItem", "python");
+@@clientName(AI.Model.ChatCompletionsResponseFormatJsonSchemaDefinition,
+  "JsonSchemaFormat",
+  "python"
+);
+@@clientName(AI.Model.ChatMessageImageContentItem,
+  "ImageContentItem",
+  "python"
+);
+@@clientName(AI.Model.ChatMessageImageUrl, "ImageUrl", "python");
+@@clientName(AI.Model.ChatMessageImageDetailLevel,
+  "ImageDetailLevel",
+  "python"
+);
+@@clientName(AI.Model.ChatMessageAudioDataContentItem,
+  "AudioDataContentItem",
+  "python"
+);
+@@clientName(AI.Model.ChatMessageAudioUrlContentItem,
+  "AudioUrlContentItem",
+  "python"
+);
+@@clientName(AI.Model.ChatMessageInputAudio, "InputAudio", "python");
+@@clientName(AI.Model.ChatMessageInputAudioUrl, "InputAudioUrl", "python");
+
+@client({
+  name: "ChatCompletionsClient",
+  service: AI.Model,
+})
+interface Client1 {
+  complete is AI.Model.getChatCompletions;
+  getModelInfo is AI.Model.getModelInfo;
+}
+
+@client({
+  name: "EmbeddingsClient",
+  service: AI.Model,
+})
+interface Client2 {
+  embed is AI.Model.getEmbeddings;
+  getModelInfo is AI.Model.getModelInfo;
+}
+
+@client({
+  name: "ImageEmbeddingsClient",
+  service: AI.Model,
+})
+interface Client3 {
+  embed is AI.Model.getImageEmbeddings;
+  getModelInfo is AI.Model.getModelInfo;
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json
new file mode 100644
index 000000000000..a097e1a38d0d
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json
@@ -0,0 +1,72 @@
+{
+  "title": "maximum set chat completion",
+  "operationId": "GetChatCompletions",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "extra-parameters": "error",
+    "body": {
+      "messages": [
+        {
+          "role": "ChatRequestMessage"
+        }
+      ],
+      "frequency_penalty": -2,
+      "stream": true,
+      "presence_penalty": -1,
+      "temperature": 0,
+      "top_p": 0,
+      "max_tokens": 0,
+      "response_format": {
+        "type": "ChatCompletionsResponseFormat"
+      },
+      "stop": [
+        "dcfnxrdeumnoytdaooqkbl"
+      ],
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "ikvkykzp",
+            "description": "gofxoftbpdi"
+          }
+        }
+      ],
+      "seed": 21,
+      "model": "askiizcjob"
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kgousajxgzyhugvqekuswuqbk",
+        "created": 18,
+        "model": "zjxvtpxhzhvgjrhit",
+        "usage": {
+          "completion_tokens": 19,
+          "prompt_tokens": 28,
+          "total_tokens": 16
+        },
+        "choices": [
+          {
+            "index": 7,
+            "finish_reason": "stop",
+            "message": {
+              "role": "system",
+              "content": "jnsnrwblpuokzbkrzdcwubpfz",
+              "tool_calls": [
+                {
+                  "id": "yrobmilsrugmbwukmzo",
+                  "type": "function",
+                  "function": {
+                    "name": "ikvkykzp",
+                    "arguments": "oqxvktuduomvckic"
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json
new file mode 100644
index 000000000000..d2bc60ef2f8a
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetChatCompletions_MinimumSet_Gen.json
@@ -0,0 +1,38 @@
+{
+  "title": "minimum set chat completion",
+  "operationId": "GetChatCompletions",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "body": {
+      "messages": [
+        {
+          "role": "ChatRequestMessage"
+        }
+      ]
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kgousajxgzyhugvqekuswuqbk",
+        "created": 18,
+        "model": "zjxvtpxhzhvgjrhit",
+        "usage": {
+          "completion_tokens": 19,
+          "prompt_tokens": 28,
+          "total_tokens": 16
+        },
+        "choices": [
+          {
+            "index": 7,
+            "finish_reason": "stop",
+            "message": {
+              "role": "system",
+              "content": "jnsnrwblpuokzbkrzdcwubpfz"
+            }
+          }
+        ]
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json
new file mode 100644
index 000000000000..413c4531a81b
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MaximumSet_Gen.json
@@ -0,0 +1,34 @@
+{
+  "title": "maximum set embeddings",
+  "operationId": "GetEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "extra-parameters": "error",
+    "body": {
+      "input": [
+        "p"
+      ],
+      "dimensions": 11,
+      "encoding_format": "base64",
+      "input_type": "text",
+      "model": "kwkpluujwiabfquhkaugttxut"
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "cknxthfa",
+        "data": [
+          {
+            "index": 21
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 4,
+          "total_tokens": 22
+        },
+        "model": "uvrmctbnze"
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MinimumSet_Gen.json
new file mode 100644
index 000000000000..e3c32e0dbdfe
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetEmbeddings_MinimumSet_Gen.json
@@ -0,0 +1,32 @@
+{
+  "title": "minimum set embeddings",
+  "operationId": "GetEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "body": {
+      "input": [
+        "ujbdzqgintkjjorqvgtyqxehwod"
+      ]
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kmclubpbyrjjrcihu",
+        "data": [
+          {
+            "embedding": [
+              3
+            ],
+            "index": 20
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 17,
+          "total_tokens": 25
+        },
+        "model": "gg"
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json
new file mode 100644
index 000000000000..5a4ef17ad877
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json
@@ -0,0 +1,37 @@
+{
+  "title": "maximum set image embeddings",
+  "operationId": "GetImageEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "extra-parameters": "error",
+    "body": {
+      "input": [
+        {
+          "image": "puqkvvlvgcjyzughesnkena",
+          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
+        }
+      ],
+      "dimensions": 26,
+      "encoding_format": "base64",
+      "input_type": "text",
+      "model": "jyb"
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "cknxthfa",
+        "data": [
+          {
+            "index": 21
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 4,
+          "total_tokens": 22
+        },
+        "model": "uvrmctbnze"
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json
new file mode 100644
index 000000000000..4ea5e30e4246
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json
@@ -0,0 +1,34 @@
+{
+  "title": "minimum set image embeddings",
+  "operationId": "GetImageEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "body": {
+      "input": [
+        {
+          "image": "gvmojtfooxixxzayrditjlyymg"
+        }
+      ]
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kmclubpbyrjjrcihu",
+        "data": [
+          {
+            "embedding": [
+              3
+            ],
+            "index": 20
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 17,
+          "total_tokens": 25
+        },
+        "model": "gg"
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json
new file mode 100644
index 000000000000..3e40dd40effa
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MaximumSet_Gen.json
@@ -0,0 +1,16 @@
+{
+  "title": "maximum set model information",
+  "operationId": "GetModelInfo",
+  "parameters": {
+    "api-version": "2024-05-01-preview"
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "model_name": "jno",
+        "model_type": "embeddings",
+        "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh"
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json
new file mode 100644
index 000000000000..a819c57488ff
--- /dev/null
+++ b/specification/ai/ModelClient/examples/2024-05-01-preview/GetModelInfo_MinimumSet_Gen.json
@@ -0,0 +1,16 @@
+{
+  "title": "minimum set model information",
+  "operationId": "GetModelInfo",
+  "parameters": {
+    "api-version": "2024-05-01-preview"
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "model_name": "jno",
+        "model_type": "embeddings",
+        "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh"
+      }
+    }
+  }
+}
diff --git a/specification/ai/ModelClient/main.tsp b/specification/ai/ModelClient/main.tsp
new file mode 100644
index 000000000000..391dfbad79e2
--- /dev/null
+++ b/specification/ai/ModelClient/main.tsp
@@ -0,0 +1,28 @@
+import "@typespec/rest";
+import "@typespec/versioning";
+import "@azure-tools/typespec-azure-core";
+import "./routes.tsp";
+
+using TypeSpec.Http;
+using TypeSpec.Versioning;
+
+#suppress "@azure-tools/typespec-autorest/unsupported-http-auth-scheme"
+@useAuth(
+  BearerAuth | OAuth2Auth<[
+    {
+      type: OAuth2FlowType.implicit,
+      authorizationUrl: "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
+      scopes: ["https://ml.azure.com/.default"],
+    }
+  ]>
+)
+@service(#{ title: "AI Model Inference" })
+@versioned(AI.Model.Versions)
+namespace AI.Model;
+
+@doc("The AI.Model service versions.")
+enum Versions {
+  @useDependency(Azure.Core.Versions.v1_0_Preview_2)
+  @doc("The 2024-05-01-preview version of the AI.Model service.")
+  v2024_05_01_Preview: "2024-05-01-preview",
+}
diff --git a/specification/ai/ModelClient/models/chat_completions.tsp b/specification/ai/ModelClient/models/chat_completions.tsp
new file mode 100644
index 000000000000..055eccf3b42f
--- /dev/null
+++ b/specification/ai/ModelClient/models/chat_completions.tsp
@@ -0,0 +1,678 @@
+import "@azure-tools/typespec-client-generator-core";
+import "@typespec/rest";
+import "@typespec/http";
+
+using TypeSpec.Rest;
+using TypeSpec.Http;
+
+using Azure.ClientGenerator.Core;
+
+namespace AI.Model;
+
+@doc("""
+  The configuration information for a chat completions request.
+  Completions support a wide variety of tasks and generate text that continues from or "completes"
+  provided prompt data.
+  """)
+model ChatCompletionsOptions {
+  @doc("""
+    The collection of context messages associated with this chat completions request.
+    Typical usage begins with a chat message for the System role that provides instructions for
+    the behavior of the assistant, followed by alternating messages between the User and
+    Assistant roles.
+    """)
+  @minItems(1)
+  messages: ChatRequestMessage[];
+
+  @doc("""
+    A value that influences the probability of generated tokens appearing based on their cumulative
+    frequency in generated text.
+    Positive values will make tokens less likely to appear as their frequency increases and
+    decrease the likelihood of the model repeating the same statements verbatim.
+    Supported range is [-2, 2].
+    """)
+  @maxValue(2.0)
+  @minValue(-2.0)
+  frequency_penalty?: float32 = 0.0;
+
+  @doc("""
+    A value indicating whether chat completions should be streamed for this request.
+    """)
+  @clientName("InternalShouldStreamResponse", "csharp")
+  stream?: boolean;
+
+  @doc("""
+    A value that influences the probability of generated tokens appearing based on their existing
+    presence in generated text.
+    Positive values will make tokens less likely to appear when they already exist and increase the
+    model's likelihood to output new topics.
+    Supported range is [-2, 2].
+    """)
+  @maxValue(2.0)
+  @minValue(-2.0)
+  presence_penalty?: float32 = 0.0;
+
+  @doc("""
+    The sampling temperature to use that controls the apparent creativity of generated completions.
+    Higher values will make output more random while lower values will make results more focused
+    and deterministic.
+    It is not recommended to modify temperature and top_p for the same completions request as the
+    interaction of these two settings is difficult to predict.
+    Supported range is [0, 1].
+    """)
+  @maxValue(1.0)
+  @minValue(0.0)
+  temperature?: float32 = 0.7;
+
+  @doc("""
+    An alternative to sampling with temperature called nucleus sampling. This value causes the
+    model to consider the results of tokens with the provided probability mass. As an example, a
+    value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+    considered.
+    It is not recommended to modify temperature and top_p for the same completions request as the
+    interaction of these two settings is difficult to predict.
+    Supported range is [0, 1].
+    """)
+  @maxValue(1.0)
+  @minValue(0.0)
+  @clientName("NucleusSamplingFactor", "csharp")
+  top_p?: float32 = 1.0;
+
+  @doc("The maximum number of tokens to generate.")
+  @minValue(0.0)
+  max_tokens?: int32;
+
+  @doc("""
+    An object specifying the format that the model must output.
+    
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
+    
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
+    
+    **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+    """)
+  response_format?: ChatCompletionsResponseFormat;
+
+  @doc("""
+    A collection of textual sequences that will end completions generation.
+    """)
+  @minItems(1)
+  @clientName("StopSequences", "csharp")
+  stop?: string[];
+
+  @doc("""
+    A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
+    may response with a function call request and provide the input arguments in JSON format for that function.
+    """)
+  @minItems(1)
+  tools?: ChatCompletionsToolDefinition[];
+
+  @encodedName("application/json", "tool_choice")
+  @doc("""
+    If specified, the model will configure which of the provided tools it can use for the chat completions response.
+    """)
+  toolChoice?: ChatCompletionsToolChoice;
+
+  @doc("""
+    If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+    same seed and parameters should return the same result. Determinism is not guaranteed.
+    """)
+  seed?: int64;
+
+  @doc("""
+    ID of the specific AI model to use, if more than one model is available on the endpoint.
+    """)
+  `model`?: string;
+
+  ...Record<unknown>;
+}
+
+alias ChatCompletionsCommon = {
+  @doc("A unique identifier associated with this chat completions response.")
+  id: string;
+
+  //@doc("The response object type, which is always `chat.completion`.")
+  //object: string;
+
+  @doc("""
+    The first timestamp associated with generation activity for this completions response,
+    represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
+    """)
+  @encode(DateTimeKnownEncoding.unixTimestamp, int32)
+  created: utcDateTime;
+
+  @doc("The model used for the chat completion.")
+  `model`: string;
+};
+
+@doc("""
+  Representation of the response data from a chat completions request.
+  Completions support a wide variety of tasks and generate text that continues from or "completes"
+  provided prompt data.
+  """)
+model ChatCompletions {
+  ...ChatCompletionsCommon;
+
+  @doc("""
+    The collection of completions choices associated with this completions response.
+    Generally, `n` choices are generated per provided prompt with a default value of 1.
+    Token limits and other settings may limit the number of choices generated.
+    """)
+  @minItems(1)
+  choices: ChatChoice[];
+
+  @doc("""
+    Usage information for tokens processed and generated as part of this completions operation.
+    """)
+  usage: CompletionsUsage;
+}
+
+@doc("""
+  Represents a response update to a chat completions request, when the service is streaming updates 
+  using Server Sent Events (SSE).
+  Completions support a wide variety of tasks and generate text that continues from or "completes"
+  provided prompt data.
+  """)
+model StreamingChatCompletionsUpdate {
+  ...ChatCompletionsCommon;
+
+  @doc("""
+    An update to the collection of completion choices associated with this completions response.
+    Generally, `n` choices are generated per provided prompt with a default value of 1.
+    Token limits and other settings may limit the number of choices generated.
+    """)
+  @minItems(1)
+  choices: StreamingChatChoiceUpdate[];
+
+  @doc("""
+    Usage information for tokens processed and generated as part of this completions operation.
+    """)
+  usage?: CompletionsUsage;
+}
+
+@doc("""
+  Representation of the token counts processed for a completions request.
+  Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
+  other consumers.
+  """)
+model CompletionsUsage {
+  @doc("The number of tokens generated across all completions emissions.")
+  completion_tokens: int32;
+
+  @doc("The number of tokens in the provided prompts for the completions request.")
+  prompt_tokens: int32;
+
+  @doc("The total number of tokens processed for the completions request and response.")
+  total_tokens: int32;
+}
+
+@doc("""
+  Representation of the manner in which a completions response concluded.
+  """)
+union CompletionsFinishReason {
+  string,
+
+  @doc("Completions ended normally and reached its end of token generation.")
+  stopped: "stop",
+
+  @doc("Completions exhausted available token limits before generation could complete.")
+  tokenLimitReached: "length",
+
+  @doc("""
+    Completions generated a response that was identified as potentially sensitive per content
+    moderation policies.
+    """)
+  contentFiltered: "content_filter",
+
+  @doc("Completion ended with the model calling a provided tool for output.")
+  toolCalls: "tool_calls",
+}
+
+@doc("A description of the intended purpose of a message within a chat completions interaction.")
+union ChatRole {
+  string,
+
+  @doc("The role that instructs or sets the behavior of the assistant.")
+  system: "system",
+
+  @doc("The role that provides input for chat completions.")
+  user: "user",
+
+  @doc("The role that provides responses to system-instructed, user-prompted input.")
+  assistant: "assistant",
+
+  @doc("The role that represents extension tool activity within a chat completions operation.")
+  tool: "tool",
+
+  @doc("The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role.")
+  developer: "developer",
+}
+
+@doc("""
+  Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.
+  Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+  via a system or user message.
+  """)
+@discriminator("type")
+model ChatCompletionsResponseFormat {
+  @doc("The response format type to use for chat completions.")
+  type: string;
+}
+
+@doc("""
+  A response format for Chat Completions that emits text responses. This is the default response format.
+  """)
+model ChatCompletionsResponseFormatText extends ChatCompletionsResponseFormat {
+  @doc("Response format type: always 'text' for this object.")
+  type: "text";
+}
+
+@doc("""
+  A response format for Chat Completions that restricts responses to emitting valid JSON objects.
+  Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+  via a system or user message.
+  """)
+model ChatCompletionsResponseFormatJsonObject
+  extends ChatCompletionsResponseFormat {
+  @doc("Response format type: always 'json_object' for this object.")
+  type: "json_object";
+}
+
+@doc("""
+  Defines the response format for chat completions as JSON with a given schema.
+  The AI model will need to adhere to this schema when generating completions.
+  """)
+model ChatCompletionsResponseFormatJsonSchemaDefinition {
+  @doc("""
+    A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
+    """)
+  name: string;
+
+  @doc("""
+    The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.
+    Note that AI models usually only support a subset of the keywords defined by JSON schema.
+    Consult your AI model documentation to determine what is supported.
+    """)
+  schema: Record<unknown>;
+
+  @doc("""
+    A description of the response format, used by the AI model to determine how to generate responses in this format.
+    """)
+  description?: string;
+
+  @doc("""
+    If set to true, the service will error out if the provided JSON schema contains keywords
+    not supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.
+    If false, and the provided JSON schema contains keywords not supported by the AI model,
+    the AI model will not error out. Instead it will ignore the unsupported keywords.
+    """)
+  strict?: boolean = false;
+}
+
+@doc("""
+  A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a
+  JSON schema specified by the caller.
+  """)
+model ChatCompletionsResponseFormatJsonSchema
+  extends ChatCompletionsResponseFormat {
+  @doc("""
+    The type of response format being defined: `json_schema`
+    """)
+  type: "json_schema";
+
+  /** The definition of the required JSON schema in the response, and associated metadata. */
+  json_schema: ChatCompletionsResponseFormatJsonSchemaDefinition;
+}
+
+alias ChatChoiceCommon = {
+  @doc("The ordered index associated with this chat completions choice.")
+  index: int32;
+
+  #suppress "@azure-tools/typespec-azure-core/no-nullable" "The operation already returns nulls"
+  #suppress "@azure-tools/typespec-autorest/union-unsupported" "OpenAPI v2 support deferred"
+  @doc("The reason that this chat completions choice completed its generated.")
+  finish_reason: CompletionsFinishReason | null;
+};
+
+@doc("""
+  The representation of a single prompt completion as part of an overall chat completions request.
+  Generally, `n` choices are generated per provided prompt with a default value of 1.
+  Token limits and other settings may limit the number of choices generated.
+  """)
+model ChatChoice {
+  ...ChatChoiceCommon;
+
+  @doc("The chat message for a given chat completions prompt.")
+  message: ChatResponseMessage;
+}
+
+@doc("""
+  Represents an update to a single prompt completion when the service is streaming updates 
+  using Server Sent Events (SSE).
+  Generally, `n` choices are generated per provided prompt with a default value of 1.
+  Token limits and other settings may limit the number of choices generated.
+  """)
+model StreamingChatChoiceUpdate {
+  ...ChatChoiceCommon;
+
+  @doc("An update to the chat message for a given chat completions prompt.")
+  delta: StreamingChatResponseMessageUpdate;
+}
+
+@discriminator("role")
+@doc("An abstract representation of a chat message as provided in a request.")
+model ChatRequestMessage {
+  @doc("The chat role associated with this message.")
+  role: ChatRole;
+}
+
+@doc("""
+  A request chat message containing system instructions that influence how the model will generate a chat completions
+  response.
+  """)
+model ChatRequestSystemMessage extends ChatRequestMessage {
+  @doc("The chat role associated with this message, which is always 'system' for system messages.")
+  role: ChatRole.system;
+
+  @doc("The contents of the system message.")
+  content: string;
+}
+
+@doc("""
+  A request chat message containing developer instructions that influence how the model will generate a chat completions
+  response. Some AI models support a developer message instead of a system message.
+  """)
+model ChatRequestDeveloperMessage extends ChatRequestMessage {
+  @doc("The chat role associated with this message, which is always 'developer' for developer messages.")
+  role: ChatRole.developer;
+
+  @doc("The contents of the developer message.")
+  content: string;
+}
+
+@doc("A request chat message representing user input to the assistant.")
+model ChatRequestUserMessage extends ChatRequestMessage {
+  @doc("The chat role associated with this message, which is always 'user' for user messages.")
+  role: ChatRole.user;
+
+  #suppress "@azure-tools/typespec-autorest/union-unsupported" "External API shape is defined in OpenAPI 3.0 as oneOf."
+  @doc("The contents of the user message, with available input types varying by selected model.")
+  content: string | ChatMessageContentItem[];
+}
+
+@doc("A request chat message representing response or action from the assistant.")
+model ChatRequestAssistantMessage extends ChatRequestMessage {
+  @doc("The chat role associated with this message, which is always 'assistant' for assistant messages.")
+  role: ChatRole.assistant;
+
+  #suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API"
+  @doc("The content of the message.")
+  content?: string;
+
+  @encodedName("application/json", "tool_calls")
+  @doc("""
+    The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
+    completions request to resolve as configured.
+    """)
+  toolCalls?: ChatCompletionsToolCall[];
+}
+
+@doc("A request chat message representing requested output from a configured tool.")
+model ChatRequestToolMessage extends ChatRequestMessage {
+  @doc("The chat role associated with this message, which is always 'tool' for tool messages.")
+  role: ChatRole.tool;
+
+  #suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API"
+  @doc("The content of the message.")
+  content?: string;
+
+  @encodedName("application/json", "tool_call_id")
+  @doc("The ID of the tool call resolved by the provided content.")
+  toolCallId: string;
+}
+
+@doc("A representation of a chat message update as received in a streaming response.")
+model StreamingChatResponseMessageUpdate {
+  @doc("The chat role associated with the message. If present, should always be 'assistant'")
+  role?: ChatRole;
+
+  //#suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API"
+  @doc("The content of the message.")
+  content?: string;
+
+  @encodedName("application/json", "tool_calls")
+  @doc("""
+    The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
+    completions request to resolve as configured.
+    """)
+  toolCalls?: StreamingChatResponseToolCallUpdate[];
+}
+
+@doc("A representation of a chat message as received in a response.")
+model ChatResponseMessage {
+  @doc("The chat role associated with the message.")
+  role: ChatRole;
+
+  #suppress "@azure-tools/typespec-azure-core/no-nullable" "explicitly nullable in mirrored API"
+  @doc("The content of the message.")
+  content: string | null;
+
+  @encodedName("application/json", "tool_calls")
+  @doc("""
+    The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
+    completions request to resolve as configured.
+    """)
+  toolCalls?: ChatCompletionsToolCall[];
+}
+
+#suppress "@azure-tools/typespec-autorest/union-unsupported" "External API shape is defined in OpenAPI 3.0 as oneOf."
+alias ChatCompletionsToolChoice = ChatCompletionsToolChoicePreset | ChatCompletionsNamedToolChoice;
+
+@doc("Represents a generic policy for how a chat completions tool may be selected.")
+union ChatCompletionsToolChoicePreset {
+  string,
+
+  @doc("""
+    Specifies that the model may either use any of the tools provided in this chat completions request or
+    instead return a standard chat completions response as if no tools were provided.
+    """)
+  auto: "auto",
+
+  @doc("""
+    Specifies that the model should not respond with a tool call and should instead provide a standard chat
+    completions response. Response content may still be influenced by the provided tool definitions.
+    """)
+  none: "none",
+
+  @doc("""
+    Specifies that the model should respond with a call to one or more tools.
+    """)
+  required: "required",
+}
+
+@doc("A tool selection of a specific, named function tool that will limit chat completions to using the named function.")
+model ChatCompletionsNamedToolChoice {
+  @doc("The type of the tool. Currently, only `function` is supported.")
+  type: "function";
+
+  @doc("The function that should be called.")
+  function: ChatCompletionsNamedToolChoiceFunction;
+}
+
+@doc("A tool selection of a specific, named function tool that will limit chat completions to using the named function.")
+model ChatCompletionsNamedToolChoiceFunction {
+  @doc("The name of the function that should be called.")
+  name: string;
+}
+
+@doc("""
+  The definition of a chat completions tool that can call a function.
+  """)
+model ChatCompletionsToolDefinition {
+  @doc("The type of the tool. Currently, only `function` is supported.")
+  type: "function";
+
+  @doc("The function definition details for the function tool.")
+  function: FunctionDefinition;
+}
+
+@doc("""
+  The definition of a caller-specified function that chat completions may invoke in response to matching user input.
+  """)
+model FunctionDefinition {
+  @doc("The name of the function to be called.")
+  name: string;
+
+  @doc("""
+    A description of what the function does. The model will use this description when selecting the function and
+    interpreting its parameters.
+    """)
+  description?: string;
+
+  #suppress "@azure-tools/typespec-azure-core/no-unknown" "External API shape takes an arbitrary json"
+  @doc("The parameters the function accepts, described as a JSON Schema object.")
+  parameters?: Record<unknown>;
+}
+
+@doc("""
+  An update to the function tool call information requested by the AI model.
+  """)
+model StreamingChatResponseToolCallUpdate {
+  @doc("The ID of the tool call.")
+  id: string;
+
+  @doc("Updates to the function call requested by the AI model.")
+  function: FunctionCall;
+}
+
+@doc("""
+  A function tool call requested by the AI model.
+  """)
+model ChatCompletionsToolCall {
+  @doc("The ID of the tool call.")
+  id: string;
+
+  @doc("The type of tool call. Currently, only `function` is supported.")
+  type: "function";
+
+  @doc("The details of the function call requested by the AI model.")
+  function: FunctionCall;
+}
+
+@doc("The name and arguments of a function that should be called, as generated by the model.")
+model FunctionCall {
+  @doc("The name of the function to call.")
+  name: string;
+
+  @doc("""
+    The arguments to call the function with, as generated by the model in JSON format.
+    Note that the model does not always generate valid JSON, and may hallucinate parameters
+    not defined by your function schema. Validate the arguments in your code before calling
+    your function.
+    """)
+  arguments: string;
+}
+
+@doc("An abstract representation of a structured content item within a chat message.")
+@discriminator("type")
+model ChatMessageContentItem {
+  @doc("The discriminated object type.")
+  type: string;
+}
+
+@doc("A structured chat content item containing plain text.")
+model ChatMessageTextContentItem extends ChatMessageContentItem {
+  @doc("The discriminated object type: always 'text' for this type.")
+  type: "text";
+
+  @doc("The content of the message.")
+  text: string;
+}
+
+@doc("A structured chat content item containing an image reference.")
+model ChatMessageImageContentItem extends ChatMessageContentItem {
+  @doc("The discriminated object type: always 'image_url' for this type.")
+  type: "image_url";
+
+  @doc("An internet location, which must be accessible to the model,from which the image may be retrieved.")
+  @encodedName("application/json", "image_url")
+  imageUrl: ChatMessageImageUrl;
+}
+
+@doc("An internet location from which the model may retrieve an image.")
+model ChatMessageImageUrl {
+  @doc("The URL of the image.")
+  url: string;
+
+  @doc("""
+    The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and
+    accuracy.
+    """)
+  detail?: ChatMessageImageDetailLevel;
+}
+
+@doc("A representation of the possible image detail levels for image-based chat completions message content.")
+union ChatMessageImageDetailLevel {
+  string,
+
+  @doc("Specifies that the model should determine which detail level to apply using heuristics like image size.")
+  auto: "auto",
+
+  @doc("""
+    Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer
+    tokens but may also be less accurate for highly detailed images.
+    """)
+  low: "low",
+
+  @doc("""
+    Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed
+    images but may also be slower and consume more tokens.
+    """)
+  high: "high",
+}
+
+@doc("A structured chat content item for audio content passed as a url.")
+model ChatMessageAudioUrlContentItem extends ChatMessageContentItem {
+  @doc("The discriminated object type: always 'audio_url' for this type.")
+  type: "audio_url";
+
+  @doc("The details of the audio url.")
+  @encodedName("application/json", "audio_url")
+  audioUrl: ChatMessageInputAudioUrl;
+}
+
+@doc("A structured chat content item for audio content passed as base64 encoded data.")
+model ChatMessageAudioDataContentItem extends ChatMessageContentItem {
+  @doc("The discriminated object type: always 'input_audio' for this type.")
+  type: "input_audio";
+
+  @doc("The details of the input audio data.")
+  @encodedName("application/json", "input_audio")
+  inputAudio: ChatMessageInputAudio;
+}
+
+@doc("The details of the audio url.")
+model ChatMessageInputAudioUrl {
+  @doc("The URL of the audio content.")
+  url: string;
+}
+
+@doc("The details of the input audio data.")
+model ChatMessageInputAudio {
+  @doc("Base64 encoded audio data")
+  data: string;
+
+  @doc("The audio format of the audio content.")
+  format: AudioContentFormat;
+}
+
+@doc("A representation of the possible audio formats for audio.")
+union AudioContentFormat {
+  string,
+
+  @doc("Specifies audio in WAV format.")
+  wav: "wav",
+
+  @doc("Specifies audio in MP3 format.")
+  mp3: "mp3",
+}
diff --git a/specification/ai/ModelClient/models/common.tsp b/specification/ai/ModelClient/models/common.tsp
new file mode 100644
index 000000000000..e21a83db031a
--- /dev/null
+++ b/specification/ai/ModelClient/models/common.tsp
@@ -0,0 +1,156 @@
+import "@typespec/rest";
+import "@typespec/http";
+
+using TypeSpec.Rest;
+using TypeSpec.Http;
+
+namespace AI.Model;
+
+@doc("Represents the input types used for embedding search.")
+union EmbeddingInputType {
+  string,
+
+  @doc("Indicates the input is a general text input.")
+  text: "text",
+
+  @doc("Indicates the input represents a search query to find the most relevant documents in your vector database.")
+  query: "query",
+
+  @doc("Indicates the input represents a document that is stored in a vector database.")
+  document: "document",
+}
+
+@doc("""
+  The format of the embeddings result.
+  Returns a 422 error if the model doesn't support the value or parameter.
+  """)
+union EmbeddingEncodingFormat {
+  string,
+
+  @doc("Base64")
+  base64: "base64",
+
+  @doc("Binary")
+  binary: "binary",
+
+  @doc("Floating point")
+  float: "float",
+
+  @doc("Signed 8-bit integer")
+  int8: "int8",
+
+  @doc("ubinary")
+  ubinary: "ubinary",
+
+  @doc("Unsigned 8-bit integer")
+  uint8: "uint8",
+}
+
+@doc("""
+  Representation of the response data from an embeddings request.
+  Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+  recommendations, and other similar scenarios.
+  """)
+model EmbeddingsResult {
+  @doc("Unique identifier for the embeddings result.")
+  id: string;
+
+  @doc("Embedding values for the prompts submitted in the request.")
+  data: EmbeddingItem[];
+
+  @doc("Usage counts for tokens input using the embeddings API.")
+  usage: EmbeddingsUsage;
+
+  //@doc("The object type of the embeddings result. Will always be `list`.")
+  //object: string;
+
+  @doc("The model ID used to generate this result.")
+  `model`: string;
+}
+
+@doc("Representation of a single embeddings relatedness comparison.")
+model EmbeddingItem {
+  #suppress "@azure-tools/typespec-autorest/union-unsupported" "Unions with different types are not supported in OpenAPI v2."
+  @doc("""
+    List of embedding values for the input prompt. These represent a measurement of the
+    vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
+    """)
+  embedding: string | float32[];
+
+  @doc("Index of the prompt to which the EmbeddingItem corresponds.")
+  index: int32;
+
+  //@doc("The object type of this embeddings item. Will always be `embedding`.")
+  //object: string;
+}
+
+@doc("Measurement of the amount of tokens used in this request and response.")
+model EmbeddingsUsage {
+  @doc("Number of tokens in the request.")
+  prompt_tokens: int32;
+
+  @doc("""
+    Total number of tokens transacted in this request/response. Should equal the
+    number of tokens in the request.
+    """)
+  total_tokens: int32;
+}
+
+@doc("Represents some basic information about the AI model.")
+model ModelInfo {
+  @doc("The name of the AI model. For example: `Phi21`")
+  model_name: string;
+
+  @doc("The type of the AI model. A Unique identifier for the profile.")
+  model_type: ModelType;
+
+  @doc("The model provider name. For example: `Microsoft Research`")
+  model_provider_name: string;
+}
+
+@doc("The type of AI model")
+union ModelType {
+  string,
+
+  @doc("A model capable of generating embeddings from a text")
+  embeddings: "embeddings",
+
+  @doc("A model capable of generating images from an image and text description")
+  image_generation: "image_generation",
+
+  @doc("A text generation model")
+  text_generation: "text_generation",
+
+  @doc("A model capable of generating embeddings from an image")
+  image_embeddings: "image_embeddings",
+
+  @doc("A text-to-audio generative model")
+  audio_generation: "audio_generation",
+
+  @doc("A model capable of taking chat-formatted messages and generate responses")
+  chat_completion: "chat_completion",
+}
+
+alias AdditionalRequestHeaders = {
+  @doc("""
+    Controls what happens if extra parameters, undefined by the REST API,
+    are passed in the JSON request payload.
+    This sets the HTTP request header `extra-parameters`.
+    """)
+  @header("extra-parameters")
+  extra_params?: ExtraParameters;
+};
+
+@doc("Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.")
+union ExtraParameters {
+  string,
+
+  @doc("The service will error if it detected extra parameters in the request payload. This is the service default.")
+  error: "error",
+
+  @doc("The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model.")
+  drop: "drop",
+
+  @doc("The service will pass extra parameters to the back-end AI model.")
+  pass_through: "pass-through",
+}
diff --git a/specification/ai/ModelClient/models/embeddings.tsp b/specification/ai/ModelClient/models/embeddings.tsp
new file mode 100644
index 000000000000..da2155b52f0a
--- /dev/null
+++ b/specification/ai/ModelClient/models/embeddings.tsp
@@ -0,0 +1,44 @@
+import "@typespec/rest";
+import "@typespec/http";
+
+using TypeSpec.Rest;
+using TypeSpec.Http;
+
+namespace AI.Model;
+
+@doc("""
+  The configuration information for an embeddings request.
+  """)
+model EmbeddingsOptions {
+  @doc("""
+    Input text to embed, encoded as a string or array of tokens.
+    To embed multiple inputs in a single request, pass an array
+    of strings or array of token arrays.
+    """)
+  input: string[];
+
+  @doc("""
+    Optional. The number of dimensions the resulting output embeddings should have.
+    Passing null causes the model to use its default value.
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """)
+  dimensions?: int32;
+
+  @doc("""
+    Optional. The desired format for the returned embeddings.
+    """)
+  encoding_format?: EmbeddingEncodingFormat;
+
+  @doc("""
+    Optional. The type of the input.
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """)
+  input_type?: EmbeddingInputType;
+
+  @doc("""
+    ID of the specific AI model to use, if more than one model is available on the endpoint.
+    """)
+  `model`?: string;
+
+  ...Record<unknown>;
+}
diff --git a/specification/ai/ModelClient/models/image_embeddings.tsp b/specification/ai/ModelClient/models/image_embeddings.tsp
new file mode 100644
index 000000000000..e03571e12f06
--- /dev/null
+++ b/specification/ai/ModelClient/models/image_embeddings.tsp
@@ -0,0 +1,59 @@
+import "@typespec/rest";
+import "@typespec/http";
+
+using TypeSpec.Rest;
+using TypeSpec.Http;
+
+namespace AI.Model;
+
+@doc("""
+  The configuration information for an image embeddings request.
+  """)
+model ImageEmbeddingsOptions {
+  @doc("""
+    Input image to embed. To embed multiple inputs in a single request, pass an array.
+    The input must not exceed the max input tokens for the model.
+    """)
+  input: ImageEmbeddingInput[];
+
+  @doc("""
+    Optional. The number of dimensions the resulting output embeddings should have.
+    Passing null causes the model to use its default value.
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """)
+  dimensions?: int32;
+
+  @doc("""
+    Optional. The number of dimensions the resulting output embeddings should have.
+    Passing null causes the model to use its default value.
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """)
+  encoding_format?: EmbeddingEncodingFormat;
+
+  @doc("""
+    Optional. The type of the input.
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """)
+  input_type?: EmbeddingInputType;
+
+  @doc("""
+    ID of the specific AI model to use, if more than one model is available on the endpoint.
+    """)
+  `model`?: string;
+
+  ...Record<unknown>;
+}
+
+@doc("Represents an image with optional text.")
+model ImageEmbeddingInput {
+  @doc("""
+    The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`.
+    """)
+  image: string;
+
+  @doc("""
+    Optional. The text input to feed into the model (like DINO, CLIP).
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """)
+  text?: string;
+}
diff --git a/specification/ai/ModelClient/routes.tsp b/specification/ai/ModelClient/routes.tsp
new file mode 100644
index 000000000000..37be0f7a8814
--- /dev/null
+++ b/specification/ai/ModelClient/routes.tsp
@@ -0,0 +1,86 @@
+import "@azure-tools/typespec-azure-core";
+import "@typespec/rest";
+import "@typespec/http";
+import "@typespec/versioning";
+
+import "./models/common.tsp";
+import "./models/chat_completions.tsp";
+import "./models/embeddings.tsp";
+import "./models/image_embeddings.tsp";
+
+using TypeSpec.Rest;
+using TypeSpec.Http;
+using TypeSpec.Versioning;
+using Azure.Core;
+
+namespace AI.Model;
+
+@doc("""
+  Gets chat completions for the provided chat messages.
+  Completions support a wide variety of tasks and generate text that continues from or "completes"
+  provided prompt data. The method makes a REST API call to the `/chat/completions` route
+  on the given endpoint.
+  """)
+@actionSeparator("/")
+@route("chat/completions")
+op getChatCompletions is Azure.Core.RpcOperation<
+  {
+    /**
+     * The options for chat completions.
+     */
+    @bodyRoot
+    body: ChatCompletionsOptions;
+
+    ...AdditionalRequestHeaders;
+  },
+  ChatCompletions
+>;
+
+@doc("""
+  Return the embedding vectors for given text prompts.
+  The method makes a REST API call to the `/embeddings` route on the given endpoint.
+  """)
+@actionSeparator("/")
+@route("embeddings")
+op getEmbeddings is Azure.Core.RpcOperation<
+  {
+    /**
+     * The body of the request containing the options for generating embeddings.
+     */
+    @bodyRoot
+    body: EmbeddingsOptions;
+
+    ...AdditionalRequestHeaders;
+  },
+  EmbeddingsResult
+>;
+
+@doc("""
+  Return the embedding vectors for given images.
+  The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+  """)
+@actionSeparator("/")
+@route("images/embeddings")
+op getImageEmbeddings is Azure.Core.RpcOperation<
+  {
+    /**
+     * The body of the request containing options for image embeddings.
+     */
+    @bodyRoot
+    body: ImageEmbeddingsOptions;
+
+    ...AdditionalRequestHeaders;
+  },
+  EmbeddingsResult
+>;
+
+@doc("""
+  Returns information about the AI model.
+  The method makes a REST API call to the `/info` route on the given endpoint.
+  This method will only work when using Serverless API or Managed Compute endpoint.
+  It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+  """)
+@actionSeparator("/")
+@route("info")
+@get
+op getModelInfo is Azure.Core.RpcOperation<{}, ModelInfo>;
diff --git a/specification/ai/ModelClient/tspconfig.yaml b/specification/ai/ModelClient/tspconfig.yaml
new file mode 100644
index 000000000000..937bafa7ee44
--- /dev/null
+++ b/specification/ai/ModelClient/tspconfig.yaml
@@ -0,0 +1,45 @@
+parameters:
+  "service-dir":
+    default: "sdk/ai"
+  "dependencies":
+    default: ""
+emit:
+  - "@azure-tools/typespec-autorest"
+linter:
+  extends:
+    - "@azure-tools/typespec-azure-rulesets/data-plane"
+  disable:
+    "@azure-tools/typespec-azure-core/casing-style": "Disabled since JSON payload in REST API does not conform to Azure guidelines with regards to casing"
+    "@azure-tools/typespec-azure-core/no-string-discriminator": "Use an extensible union instead of a plain string"
+    "@azure-tools/typespec-azure-core/bad-record-type": "We do want to use Record<unknown>, and not Record<string>. But this needs further investigation"
+options:
+  "@azure-tools/typespec-autorest":
+    azure-resource-provider-folder: "data-plane"
+    emitter-output-dir: "{project-root}/.."
+    output-file: "{azure-resource-provider-folder}/{service-name}/{version-status}/{version}/openapi.json"
+  "@azure-tools/typespec-python":
+    package-mode: dataplane
+    package-dir: "azure-ai-inference"
+    package-name: "{package-dir}"
+    flavor: azure
+  "@azure-tools/typespec-ts":
+    package-dir: "ai-inference-rest"
+    is-modular-library: false
+    package-details:
+      name: "@azure-rest/ai-inference"
+      description: "Inference API for Azure-supported AI models"
+    flavor: azure
+  "@azure-tools/typespec-csharp":
+    package-dir: "Azure.AI.Inference"
+    namespace: "Azure.AI.Inference"
+    clear-output-folder: true
+    model-namespace: false
+    flavor: azure
+  "@azure-tools/typespec-java":
+    package-dir: "azure-ai-inference"
+    namespace: com.azure.ai.inference
+    partial-update: true
+    customization-class: customization/src/main/java/InferenceCustomizations.java
+    enable-sync-stack: true
+    generate-tests: false
+    flavor: azure
diff --git a/specification/ai/cspell.yaml b/specification/ai/cspell.yaml
index 2f9086f3bcc3..b299f873a501 100644
--- a/specification/ai/cspell.yaml
+++ b/specification/ai/cspell.yaml
@@ -74,6 +74,9 @@ overrides:
       - mednax
       - tonnis
       - tyrer
-  - filename: '**/specification/ai/data-plane/ModelInference/**/*.*'
+  - filename: '**/specification/ai/data-plane/ModelClient/**/openapi.json'
+    words:
+      - ubinary
+  - filename: '**/specification/ai/data-plane/ModelInference/**/openapi.json'
     words:
       - ubinary
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json
new file mode 100644
index 000000000000..a097e1a38d0d
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json
@@ -0,0 +1,72 @@
+{
+  "title": "maximum set chat completion",
+  "operationId": "GetChatCompletions",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "extra-parameters": "error",
+    "body": {
+      "messages": [
+        {
+          "role": "ChatRequestMessage"
+        }
+      ],
+      "frequency_penalty": -2,
+      "stream": true,
+      "presence_penalty": -1,
+      "temperature": 0,
+      "top_p": 0,
+      "max_tokens": 0,
+      "response_format": {
+        "type": "ChatCompletionsResponseFormat"
+      },
+      "stop": [
+        "dcfnxrdeumnoytdaooqkbl"
+      ],
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "ikvkykzp",
+            "description": "gofxoftbpdi"
+          }
+        }
+      ],
+      "seed": 21,
+      "model": "askiizcjob"
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kgousajxgzyhugvqekuswuqbk",
+        "created": 18,
+        "model": "zjxvtpxhzhvgjrhit",
+        "usage": {
+          "completion_tokens": 19,
+          "prompt_tokens": 28,
+          "total_tokens": 16
+        },
+        "choices": [
+          {
+            "index": 7,
+            "finish_reason": "stop",
+            "message": {
+              "role": "system",
+              "content": "jnsnrwblpuokzbkrzdcwubpfz",
+              "tool_calls": [
+                {
+                  "id": "yrobmilsrugmbwukmzo",
+                  "type": "function",
+                  "function": {
+                    "name": "ikvkykzp",
+                    "arguments": "oqxvktuduomvckic"
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json
new file mode 100644
index 000000000000..d2bc60ef2f8a
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetChatCompletions_MinimumSet_Gen.json
@@ -0,0 +1,38 @@
+{
+  "title": "minimum set chat completion",
+  "operationId": "GetChatCompletions",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "body": {
+      "messages": [
+        {
+          "role": "ChatRequestMessage"
+        }
+      ]
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kgousajxgzyhugvqekuswuqbk",
+        "created": 18,
+        "model": "zjxvtpxhzhvgjrhit",
+        "usage": {
+          "completion_tokens": 19,
+          "prompt_tokens": 28,
+          "total_tokens": 16
+        },
+        "choices": [
+          {
+            "index": 7,
+            "finish_reason": "stop",
+            "message": {
+              "role": "system",
+              "content": "jnsnrwblpuokzbkrzdcwubpfz"
+            }
+          }
+        ]
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json
new file mode 100644
index 000000000000..413c4531a81b
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MaximumSet_Gen.json
@@ -0,0 +1,34 @@
+{
+  "title": "maximum set embeddings",
+  "operationId": "GetEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "extra-parameters": "error",
+    "body": {
+      "input": [
+        "p"
+      ],
+      "dimensions": 11,
+      "encoding_format": "base64",
+      "input_type": "text",
+      "model": "kwkpluujwiabfquhkaugttxut"
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "cknxthfa",
+        "data": [
+          {
+            "index": 21
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 4,
+          "total_tokens": 22
+        },
+        "model": "uvrmctbnze"
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MinimumSet_Gen.json
new file mode 100644
index 000000000000..e3c32e0dbdfe
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetEmbeddings_MinimumSet_Gen.json
@@ -0,0 +1,32 @@
+{
+  "title": "minimum set embeddings",
+  "operationId": "GetEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "body": {
+      "input": [
+        "ujbdzqgintkjjorqvgtyqxehwod"
+      ]
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kmclubpbyrjjrcihu",
+        "data": [
+          {
+            "embedding": [
+              3
+            ],
+            "index": 20
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 17,
+          "total_tokens": 25
+        },
+        "model": "gg"
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json
new file mode 100644
index 000000000000..5a4ef17ad877
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json
@@ -0,0 +1,37 @@
+{
+  "title": "maximum set image embeddings",
+  "operationId": "GetImageEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "extra-parameters": "error",
+    "body": {
+      "input": [
+        {
+          "image": "puqkvvlvgcjyzughesnkena",
+          "text": "azrzyjsmnuefqpowpvfmyobeehqsni"
+        }
+      ],
+      "dimensions": 26,
+      "encoding_format": "base64",
+      "input_type": "text",
+      "model": "jyb"
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "cknxthfa",
+        "data": [
+          {
+            "index": 21
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 4,
+          "total_tokens": 22
+        },
+        "model": "uvrmctbnze"
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json
new file mode 100644
index 000000000000..4ea5e30e4246
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json
@@ -0,0 +1,34 @@
+{
+  "title": "minimum set image embeddings",
+  "operationId": "GetImageEmbeddings",
+  "parameters": {
+    "api-version": "2024-05-01-preview",
+    "body": {
+      "input": [
+        {
+          "image": "gvmojtfooxixxzayrditjlyymg"
+        }
+      ]
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "id": "kmclubpbyrjjrcihu",
+        "data": [
+          {
+            "embedding": [
+              3
+            ],
+            "index": 20
+          }
+        ],
+        "usage": {
+          "prompt_tokens": 17,
+          "total_tokens": 25
+        },
+        "model": "gg"
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json
new file mode 100644
index 000000000000..3e40dd40effa
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MaximumSet_Gen.json
@@ -0,0 +1,16 @@
+{
+  "title": "maximum set model information",
+  "operationId": "GetModelInfo",
+  "parameters": {
+    "api-version": "2024-05-01-preview"
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "model_name": "jno",
+        "model_type": "embeddings",
+        "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh"
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json
new file mode 100644
index 000000000000..a819c57488ff
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/examples/GetModelInfo_MinimumSet_Gen.json
@@ -0,0 +1,16 @@
+{
+  "title": "minimum set model information",
+  "operationId": "GetModelInfo",
+  "parameters": {
+    "api-version": "2024-05-01-preview"
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "model_name": "jno",
+        "model_type": "embeddings",
+        "model_provider_name": "ulyaphtaszwdkefpbkklnjtrhzh"
+      }
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json
new file mode 100644
index 000000000000..8bbde16c721d
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/preview/2024-05-01-preview/openapi.json
@@ -0,0 +1,1683 @@
+{
+  "swagger": "2.0",
+  "info": {
+    "title": "AI Model Inference",
+    "version": "2024-05-01-preview",
+    "x-typespec-generated": [
+      {
+        "emitter": "@azure-tools/typespec-autorest"
+      }
+    ]
+  },
+  "schemes": [
+    "https"
+  ],
+  "produces": [
+    "application/json"
+  ],
+  "consumes": [
+    "application/json"
+  ],
+  "security": [
+    {
+      "OAuth2Auth": [
+        "https://ml.azure.com/.default"
+      ]
+    }
+  ],
+  "securityDefinitions": {
+    "OAuth2Auth": {
+      "type": "oauth2",
+      "flow": "implicit",
+      "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
+      "scopes": {
+        "https://ml.azure.com/.default": ""
+      }
+    }
+  },
+  "tags": [],
+  "paths": {
+    "/chat/completions": {
+      "post": {
+        "operationId": "GetChatCompletions",
+        "description": "Gets chat completions for the provided chat messages.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data. The method makes a REST API call to the `/chat/completions` route\non the given endpoint.",
+        "parameters": [
+          {
+            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
+          },
+          {
+            "name": "extra-parameters",
+            "in": "header",
+            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
+            "required": false,
+            "type": "string",
+            "enum": [
+              "error",
+              "drop",
+              "pass-through"
+            ],
+            "x-ms-enum": {
+              "name": "ExtraParameters",
+              "modelAsString": true,
+              "values": [
+                {
+                  "name": "error",
+                  "value": "error",
+                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
+                },
+                {
+                  "name": "drop",
+                  "value": "drop",
+                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
+                },
+                {
+                  "name": "pass_through",
+                  "value": "pass-through",
+                  "description": "The service will pass extra parameters to the back-end AI model."
+                }
+              ]
+            },
+            "x-ms-client-name": "extra_params"
+          },
+          {
+            "name": "body",
+            "in": "body",
+            "description": "The options for chat completions.",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/ChatCompletionsOptions"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The request has succeeded.",
+            "schema": {
+              "$ref": "#/definitions/ChatCompletions"
+            }
+          },
+          "default": {
+            "description": "An unexpected error response.",
+            "schema": {
+              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
+            },
+            "headers": {
+              "x-ms-error-code": {
+                "type": "string",
+                "description": "String error code indicating what went wrong."
+              }
+            }
+          }
+        },
+        "x-ms-examples": {
+          "maximum set chat completion": {
+            "$ref": "./examples/GetChatCompletions_MaximumSet_Gen.json"
+          },
+          "minimum set chat completion": {
+            "$ref": "./examples/GetChatCompletions_MinimumSet_Gen.json"
+          }
+        }
+      }
+    },
+    "/embeddings": {
+      "post": {
+        "operationId": "GetEmbeddings",
+        "description": "Return the embedding vectors for given text prompts.\nThe method makes a REST API call to the `/embeddings` route on the given endpoint.",
+        "parameters": [
+          {
+            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
+          },
+          {
+            "name": "extra-parameters",
+            "in": "header",
+            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
+            "required": false,
+            "type": "string",
+            "enum": [
+              "error",
+              "drop",
+              "pass-through"
+            ],
+            "x-ms-enum": {
+              "name": "ExtraParameters",
+              "modelAsString": true,
+              "values": [
+                {
+                  "name": "error",
+                  "value": "error",
+                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
+                },
+                {
+                  "name": "drop",
+                  "value": "drop",
+                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
+                },
+                {
+                  "name": "pass_through",
+                  "value": "pass-through",
+                  "description": "The service will pass extra parameters to the back-end AI model."
+                }
+              ]
+            },
+            "x-ms-client-name": "extra_params"
+          },
+          {
+            "name": "body",
+            "in": "body",
+            "description": "The body of the request containing the options for generating embeddings.",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/EmbeddingsOptions"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The request has succeeded.",
+            "schema": {
+              "$ref": "#/definitions/EmbeddingsResult"
+            }
+          },
+          "default": {
+            "description": "An unexpected error response.",
+            "schema": {
+              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
+            },
+            "headers": {
+              "x-ms-error-code": {
+                "type": "string",
+                "description": "String error code indicating what went wrong."
+              }
+            }
+          }
+        },
+        "x-ms-examples": {
+          "maximum set embeddings": {
+            "$ref": "./examples/GetEmbeddings_MaximumSet_Gen.json"
+          },
+          "minimum set embeddings": {
+            "$ref": "./examples/GetEmbeddings_MinimumSet_Gen.json"
+          }
+        }
+      }
+    },
+    "/images/embeddings": {
+      "post": {
+        "operationId": "GetImageEmbeddings",
+        "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.",
+        "parameters": [
+          {
+            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
+          },
+          {
+            "name": "extra-parameters",
+            "in": "header",
+            "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.",
+            "required": false,
+            "type": "string",
+            "enum": [
+              "error",
+              "drop",
+              "pass-through"
+            ],
+            "x-ms-enum": {
+              "name": "ExtraParameters",
+              "modelAsString": true,
+              "values": [
+                {
+                  "name": "error",
+                  "value": "error",
+                  "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
+                },
+                {
+                  "name": "drop",
+                  "value": "drop",
+                  "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
+                },
+                {
+                  "name": "pass_through",
+                  "value": "pass-through",
+                  "description": "The service will pass extra parameters to the back-end AI model."
+                }
+              ]
+            },
+            "x-ms-client-name": "extra_params"
+          },
+          {
+            "name": "body",
+            "in": "body",
+            "description": "The body of the request containing options for image embeddings.",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/ImageEmbeddingsOptions"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The request has succeeded.",
+            "schema": {
+              "$ref": "#/definitions/EmbeddingsResult"
+            }
+          },
+          "default": {
+            "description": "An unexpected error response.",
+            "schema": {
+              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
+            },
+            "headers": {
+              "x-ms-error-code": {
+                "type": "string",
+                "description": "String error code indicating what went wrong."
+              }
+            }
+          }
+        },
+        "x-ms-examples": {
+          "maximum set image embeddings": {
+            "$ref": "./examples/GetImageEmbeddings_MaximumSet_Gen.json"
+          },
+          "minimum set image embeddings": {
+            "$ref": "./examples/GetImageEmbeddings_MinimumSet_Gen.json"
+          }
+        }
+      }
+    },
+    "/info": {
+      "get": {
+        "operationId": "GetModelInfo",
+        "description": "Returns information about the AI model.\nThe method makes a REST API call to the `/info` route on the given endpoint.\nThis method will only work when using Serverless API or Managed Compute endpoint.\nIt will not work for GitHub Models endpoint or Azure OpenAI endpoint.",
+        "parameters": [
+          {
+            "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "The request has succeeded.",
+            "schema": {
+              "$ref": "#/definitions/ModelInfo"
+            }
+          },
+          "default": {
+            "description": "An unexpected error response.",
+            "schema": {
+              "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse"
+            },
+            "headers": {
+              "x-ms-error-code": {
+                "type": "string",
+                "description": "String error code indicating what went wrong."
+              }
+            }
+          }
+        },
+        "x-ms-examples": {
+          "maximum set model information": {
+            "$ref": "./examples/GetModelInfo_MaximumSet_Gen.json"
+          },
+          "minimum set model information": {
+            "$ref": "./examples/GetModelInfo_MinimumSet_Gen.json"
+          }
+        }
+      }
+    }
+  },
+  "definitions": {
+    "AudioContentFormat": {
+      "type": "string",
+      "description": "A representation of the possible audio formats for audio.",
+      "enum": [
+        "wav",
+        "mp3"
+      ],
+      "x-ms-enum": {
+        "name": "AudioContentFormat",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "wav",
+            "value": "wav",
+            "description": "Specifies audio in WAV format."
+          },
+          {
+            "name": "mp3",
+            "value": "mp3",
+            "description": "Specifies audio in MP3 format."
+          }
+        ]
+      }
+    },
+    "Azure.Core.Foundations.Error": {
+      "type": "object",
+      "description": "The error object.",
+      "properties": {
+        "code": {
+          "type": "string",
+          "description": "One of a server-defined set of error codes."
+        },
+        "message": {
+          "type": "string",
+          "description": "A human-readable representation of the error."
+        },
+        "target": {
+          "type": "string",
+          "description": "The target of the error."
+        },
+        "details": {
+          "type": "array",
+          "description": "An array of details about specific errors that led to this reported error.",
+          "items": {
+            "$ref": "#/definitions/Azure.Core.Foundations.Error"
+          },
+          "x-ms-identifiers": []
+        },
+        "innererror": {
+          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
+          "description": "An object containing more specific information than the current object about the error."
+        }
+      },
+      "required": [
+        "code",
+        "message"
+      ]
+    },
+    "Azure.Core.Foundations.ErrorResponse": {
+      "type": "object",
+      "description": "A response containing error details.",
+      "properties": {
+        "error": {
+          "$ref": "#/definitions/Azure.Core.Foundations.Error",
+          "description": "The error object."
+        }
+      },
+      "required": [
+        "error"
+      ]
+    },
+    "Azure.Core.Foundations.InnerError": {
+      "type": "object",
+      "description": "An object containing more specific information about the error. As per Microsoft One API guidelines - https://github.com/Microsoft/api-guidelines/blob/vNext/Guidelines.md#7102-error-condition-responses.",
+      "properties": {
+        "code": {
+          "type": "string",
+          "description": "One of a server-defined set of error codes."
+        },
+        "innererror": {
+          "$ref": "#/definitions/Azure.Core.Foundations.InnerError",
+          "description": "Inner error."
+        }
+      }
+    },
+    "ChatChoice": {
+      "type": "object",
+      "description": "The representation of a single prompt completion as part of an overall chat completions request.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
+      "properties": {
+        "index": {
+          "type": "integer",
+          "format": "int32",
+          "description": "The ordered index associated with this chat completions choice."
+        },
+        "finish_reason": {
+          "$ref": "#/definitions/CompletionsFinishReason",
+          "description": "The reason that this chat completions choice completed its generated.",
+          "x-nullable": true
+        },
+        "message": {
+          "$ref": "#/definitions/ChatResponseMessage",
+          "description": "The chat message for a given chat completions prompt."
+        }
+      },
+      "required": [
+        "index",
+        "finish_reason",
+        "message"
+      ]
+    },
+    "ChatCompletions": {
+      "type": "object",
+      "description": "Representation of the response data from a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "A unique identifier associated with this chat completions response."
+        },
+        "created": {
+          "type": "integer",
+          "format": "unixtime",
+          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970."
+        },
+        "model": {
+          "type": "string",
+          "description": "The model used for the chat completion."
+        },
+        "choices": {
+          "type": "array",
+          "description": "The collection of completions choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
+          "minItems": 1,
+          "items": {
+            "$ref": "#/definitions/ChatChoice"
+          },
+          "x-ms-identifiers": []
+        },
+        "usage": {
+          "$ref": "#/definitions/CompletionsUsage",
+          "description": "Usage information for tokens processed and generated as part of this completions operation."
+        }
+      },
+      "required": [
+        "id",
+        "created",
+        "model",
+        "choices",
+        "usage"
+      ]
+    },
+    "ChatCompletionsNamedToolChoice": {
+      "type": "object",
+      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
+      "properties": {
+        "type": {
+          "type": "string",
+          "description": "The type of the tool. Currently, only `function` is supported.",
+          "enum": [
+            "function"
+          ],
+          "x-ms-enum": {
+            "modelAsString": false
+          }
+        },
+        "function": {
+          "$ref": "#/definitions/ChatCompletionsNamedToolChoiceFunction",
+          "description": "The function that should be called."
+        }
+      },
+      "required": [
+        "type",
+        "function"
+      ]
+    },
+    "ChatCompletionsNamedToolChoiceFunction": {
+      "type": "object",
+      "description": "A tool selection of a specific, named function tool that will limit chat completions to using the named function.",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "The name of the function that should be called."
+        }
+      },
+      "required": [
+        "name"
+      ]
+    },
+    "ChatCompletionsOptions": {
+      "type": "object",
+      "description": "The configuration information for a chat completions request.\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
+      "properties": {
+        "messages": {
+          "type": "array",
+          "description": "The collection of context messages associated with this chat completions request.\nTypical usage begins with a chat message for the System role that provides instructions for\nthe behavior of the assistant, followed by alternating messages between the User and\nAssistant roles.",
+          "minItems": 1,
+          "items": {
+            "$ref": "#/definitions/ChatRequestMessage"
+          },
+          "x-ms-identifiers": []
+        },
+        "frequency_penalty": {
+          "type": "number",
+          "format": "float",
+          "description": "A value that influences the probability of generated tokens appearing based on their cumulative\nfrequency in generated text.\nPositive values will make tokens less likely to appear as their frequency increases and\ndecrease the likelihood of the model repeating the same statements verbatim.\nSupported range is [-2, 2].",
+          "default": 0,
+          "minimum": -2,
+          "maximum": 2
+        },
+        "stream": {
+          "type": "boolean",
+          "description": "A value indicating whether chat completions should be streamed for this request."
+        },
+        "presence_penalty": {
+          "type": "number",
+          "format": "float",
+          "description": "A value that influences the probability of generated tokens appearing based on their existing\npresence in generated text.\nPositive values will make tokens less likely to appear when they already exist and increase the\nmodel's likelihood to output new topics.\nSupported range is [-2, 2].",
+          "default": 0,
+          "minimum": -2,
+          "maximum": 2
+        },
+        "temperature": {
+          "type": "number",
+          "format": "float",
+          "description": "The sampling temperature to use that controls the apparent creativity of generated completions.\nHigher values will make output more random while lower values will make results more focused\nand deterministic.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
+          "default": 0.7,
+          "minimum": 0,
+          "maximum": 1
+        },
+        "top_p": {
+          "type": "number",
+          "format": "float",
+          "description": "An alternative to sampling with temperature called nucleus sampling. This value causes the\nmodel to consider the results of tokens with the provided probability mass. As an example, a\nvalue of 0.15 will cause only the tokens comprising the top 15% of probability mass to be\nconsidered.\nIt is not recommended to modify temperature and top_p for the same completions request as the\ninteraction of these two settings is difficult to predict.\nSupported range is [0, 1].",
+          "default": 1,
+          "minimum": 0,
+          "maximum": 1
+        },
+        "max_tokens": {
+          "type": "integer",
+          "format": "int32",
+          "description": "The maximum number of tokens to generate.",
+          "minimum": 0
+        },
+        "response_format": {
+          "$ref": "#/definitions/ChatCompletionsResponseFormat",
+          "description": "An object specifying the format that the model must output.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which ensures the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length."
+        },
+        "stop": {
+          "type": "array",
+          "description": "A collection of textual sequences that will end completions generation.",
+          "minItems": 1,
+          "items": {
+            "type": "string"
+          }
+        },
+        "tools": {
+          "type": "array",
+          "description": "A list of tools the model may request to call. Currently, only functions are supported as a tool. The model\nmay response with a function call request and provide the input arguments in JSON format for that function.",
+          "minItems": 1,
+          "items": {
+            "$ref": "#/definitions/ChatCompletionsToolDefinition"
+          },
+          "x-ms-identifiers": []
+        },
+        "tool_choice": {
+          "description": "If specified, the model will configure which of the provided tools it can use for the chat completions response.",
+          "x-ms-client-name": "toolChoice"
+        },
+        "seed": {
+          "type": "integer",
+          "format": "int64",
+          "description": "If specified, the system will make a best effort to sample deterministically such that repeated requests with the\nsame seed and parameters should return the same result. Determinism is not guaranteed."
+        },
+        "model": {
+          "type": "string",
+          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
+        }
+      },
+      "required": [
+        "messages"
+      ],
+      "additionalProperties": {}
+    },
+    "ChatCompletionsResponseFormat": {
+      "type": "object",
+      "description": "Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
+      "properties": {
+        "type": {
+          "type": "string",
+          "description": "The response format type to use for chat completions."
+        }
+      },
+      "discriminator": "type",
+      "required": [
+        "type"
+      ]
+    },
+    "ChatCompletionsResponseFormatJsonObject": {
+      "type": "object",
+      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects.\nNote that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON\nvia a system or user message.",
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatCompletionsResponseFormat"
+        }
+      ],
+      "x-ms-discriminator-value": "json_object"
+    },
+    "ChatCompletionsResponseFormatJsonSchema": {
+      "type": "object",
+      "description": "A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a\nJSON schema specified by the caller.",
+      "properties": {
+        "json_schema": {
+          "$ref": "#/definitions/ChatCompletionsResponseFormatJsonSchemaDefinition",
+          "description": "The definition of the required JSON schema in the response, and associated metadata."
+        }
+      },
+      "required": [
+        "json_schema"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatCompletionsResponseFormat"
+        }
+      ],
+      "x-ms-discriminator-value": "json_schema"
+    },
+    "ChatCompletionsResponseFormatJsonSchemaDefinition": {
+      "type": "object",
+      "description": "Defines the response format for chat completions as JSON with a given schema.\nThe AI model will need to adhere to this schema when generating completions.",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
+        },
+        "schema": {
+          "type": "object",
+          "description": "The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.\nNote that AI models usually only support a subset of the keywords defined by JSON schema.\nConsult your AI model documentation to determine what is supported.",
+          "additionalProperties": {}
+        },
+        "description": {
+          "type": "string",
+          "description": "A description of the response format, used by the AI model to determine how to generate responses in this format."
+        },
+        "strict": {
+          "type": "boolean",
+          "description": "If set to true, the service will error out if the provided JSON schema contains keywords\nnot supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.\nIf false, and the provided JSON schema contains keywords not supported by the AI model,\nthe AI model will not error out. Instead it will ignore the unsupported keywords.",
+          "default": false
+        }
+      },
+      "required": [
+        "name",
+        "schema"
+      ]
+    },
+    "ChatCompletionsResponseFormatText": {
+      "type": "object",
+      "description": "A response format for Chat Completions that emits text responses. This is the default response format.",
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatCompletionsResponseFormat"
+        }
+      ],
+      "x-ms-discriminator-value": "text"
+    },
+    "ChatCompletionsToolCall": {
+      "type": "object",
+      "description": "A function tool call requested by the AI model.",
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "The ID of the tool call."
+        },
+        "type": {
+          "type": "string",
+          "description": "The type of tool call. Currently, only `function` is supported.",
+          "enum": [
+            "function"
+          ],
+          "x-ms-enum": {
+            "modelAsString": false
+          }
+        },
+        "function": {
+          "$ref": "#/definitions/FunctionCall",
+          "description": "The details of the function call requested by the AI model."
+        }
+      },
+      "required": [
+        "id",
+        "type",
+        "function"
+      ]
+    },
+    "ChatCompletionsToolChoicePreset": {
+      "type": "string",
+      "description": "Represents a generic policy for how a chat completions tool may be selected.",
+      "enum": [
+        "auto",
+        "none",
+        "required"
+      ],
+      "x-ms-enum": {
+        "name": "ChatCompletionsToolChoicePreset",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "auto",
+            "value": "auto",
+            "description": "Specifies that the model may either use any of the tools provided in this chat completions request or\ninstead return a standard chat completions response as if no tools were provided."
+          },
+          {
+            "name": "none",
+            "value": "none",
+            "description": "Specifies that the model should not respond with a tool call and should instead provide a standard chat\ncompletions response. Response content may still be influenced by the provided tool definitions."
+          },
+          {
+            "name": "required",
+            "value": "required",
+            "description": "Specifies that the model should respond with a call to one or more tools."
+          }
+        ]
+      }
+    },
+    "ChatCompletionsToolDefinition": {
+      "type": "object",
+      "description": "The definition of a chat completions tool that can call a function.",
+      "properties": {
+        "type": {
+          "type": "string",
+          "description": "The type of the tool. Currently, only `function` is supported.",
+          "enum": [
+            "function"
+          ],
+          "x-ms-enum": {
+            "modelAsString": false
+          }
+        },
+        "function": {
+          "$ref": "#/definitions/FunctionDefinition",
+          "description": "The function definition details for the function tool."
+        }
+      },
+      "required": [
+        "type",
+        "function"
+      ]
+    },
+    "ChatMessageAudioDataContentItem": {
+      "type": "object",
+      "description": "A structured chat content item for audio content passed as base64 encoded data.",
+      "properties": {
+        "input_audio": {
+          "$ref": "#/definitions/ChatMessageInputAudio",
+          "description": "The details of the input audio data.",
+          "x-ms-client-name": "inputAudio"
+        }
+      },
+      "required": [
+        "input_audio"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatMessageContentItem"
+        }
+      ],
+      "x-ms-discriminator-value": "input_audio"
+    },
+    "ChatMessageAudioUrlContentItem": {
+      "type": "object",
+      "description": "A structured chat content item for audio content passed as a url.",
+      "properties": {
+        "audio_url": {
+          "$ref": "#/definitions/ChatMessageInputAudioUrl",
+          "description": "The details of the audio url.",
+          "x-ms-client-name": "audioUrl"
+        }
+      },
+      "required": [
+        "audio_url"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatMessageContentItem"
+        }
+      ],
+      "x-ms-discriminator-value": "audio_url"
+    },
+    "ChatMessageContentItem": {
+      "type": "object",
+      "description": "An abstract representation of a structured content item within a chat message.",
+      "properties": {
+        "type": {
+          "type": "string",
+          "description": "The discriminated object type."
+        }
+      },
+      "discriminator": "type",
+      "required": [
+        "type"
+      ]
+    },
+    "ChatMessageImageContentItem": {
+      "type": "object",
+      "description": "A structured chat content item containing an image reference.",
+      "properties": {
+        "image_url": {
+          "$ref": "#/definitions/ChatMessageImageUrl",
+          "description": "An internet location, which must be accessible to the model,from which the image may be retrieved.",
+          "x-ms-client-name": "imageUrl"
+        }
+      },
+      "required": [
+        "image_url"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatMessageContentItem"
+        }
+      ],
+      "x-ms-discriminator-value": "image_url"
+    },
+    "ChatMessageImageDetailLevel": {
+      "type": "string",
+      "description": "A representation of the possible image detail levels for image-based chat completions message content.",
+      "enum": [
+        "auto",
+        "low",
+        "high"
+      ],
+      "x-ms-enum": {
+        "name": "ChatMessageImageDetailLevel",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "auto",
+            "value": "auto",
+            "description": "Specifies that the model should determine which detail level to apply using heuristics like image size."
+          },
+          {
+            "name": "low",
+            "value": "low",
+            "description": "Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer\ntokens but may also be less accurate for highly detailed images."
+          },
+          {
+            "name": "high",
+            "value": "high",
+            "description": "Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed\nimages but may also be slower and consume more tokens."
+          }
+        ]
+      }
+    },
+    "ChatMessageImageUrl": {
+      "type": "object",
+      "description": "An internet location from which the model may retrieve an image.",
+      "properties": {
+        "url": {
+          "type": "string",
+          "description": "The URL of the image."
+        },
+        "detail": {
+          "$ref": "#/definitions/ChatMessageImageDetailLevel",
+          "description": "The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and\naccuracy."
+        }
+      },
+      "required": [
+        "url"
+      ]
+    },
+    "ChatMessageInputAudio": {
+      "type": "object",
+      "description": "The details of the input audio data.",
+      "properties": {
+        "data": {
+          "type": "string",
+          "description": "Base64 encoded audio data"
+        },
+        "format": {
+          "$ref": "#/definitions/AudioContentFormat",
+          "description": "The audio format of the audio content."
+        }
+      },
+      "required": [
+        "data",
+        "format"
+      ]
+    },
+    "ChatMessageInputAudioUrl": {
+      "type": "object",
+      "description": "The details of the audio url.",
+      "properties": {
+        "url": {
+          "type": "string",
+          "description": "The URL of the audio content."
+        }
+      },
+      "required": [
+        "url"
+      ]
+    },
+    "ChatMessageTextContentItem": {
+      "type": "object",
+      "description": "A structured chat content item containing plain text.",
+      "properties": {
+        "text": {
+          "type": "string",
+          "description": "The content of the message."
+        }
+      },
+      "required": [
+        "text"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatMessageContentItem"
+        }
+      ],
+      "x-ms-discriminator-value": "text"
+    },
+    "ChatRequestAssistantMessage": {
+      "type": "object",
+      "description": "A request chat message representing response or action from the assistant.",
+      "properties": {
+        "content": {
+          "type": "string",
+          "description": "The content of the message."
+        },
+        "tool_calls": {
+          "type": "array",
+          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
+          "items": {
+            "$ref": "#/definitions/ChatCompletionsToolCall"
+          },
+          "x-ms-client-name": "toolCalls"
+        }
+      },
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatRequestMessage"
+        }
+      ],
+      "x-ms-discriminator-value": "assistant"
+    },
+    "ChatRequestDeveloperMessage": {
+      "type": "object",
+      "description": "A request chat message containing developer instructions that influence how the model will generate a chat completions\nresponse. Some AI models support a developer message instead of a system message.",
+      "properties": {
+        "content": {
+          "type": "string",
+          "description": "The contents of the developer message."
+        }
+      },
+      "required": [
+        "content"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatRequestMessage"
+        }
+      ],
+      "x-ms-discriminator-value": "developer"
+    },
+    "ChatRequestMessage": {
+      "type": "object",
+      "description": "An abstract representation of a chat message as provided in a request.",
+      "properties": {
+        "role": {
+          "$ref": "#/definitions/ChatRole",
+          "description": "The chat role associated with this message."
+        }
+      },
+      "discriminator": "role",
+      "required": [
+        "role"
+      ]
+    },
+    "ChatRequestSystemMessage": {
+      "type": "object",
+      "description": "A request chat message containing system instructions that influence how the model will generate a chat completions\nresponse.",
+      "properties": {
+        "content": {
+          "type": "string",
+          "description": "The contents of the system message."
+        }
+      },
+      "required": [
+        "content"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatRequestMessage"
+        }
+      ],
+      "x-ms-discriminator-value": "system"
+    },
+    "ChatRequestToolMessage": {
+      "type": "object",
+      "description": "A request chat message representing requested output from a configured tool.",
+      "properties": {
+        "content": {
+          "type": "string",
+          "description": "The content of the message."
+        },
+        "tool_call_id": {
+          "type": "string",
+          "description": "The ID of the tool call resolved by the provided content.",
+          "x-ms-client-name": "toolCallId"
+        }
+      },
+      "required": [
+        "tool_call_id"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatRequestMessage"
+        }
+      ],
+      "x-ms-discriminator-value": "tool"
+    },
+    "ChatRequestUserMessage": {
+      "type": "object",
+      "description": "A request chat message representing user input to the assistant.",
+      "properties": {
+        "content": {
+          "description": "The contents of the user message, with available input types varying by selected model."
+        }
+      },
+      "required": [
+        "content"
+      ],
+      "allOf": [
+        {
+          "$ref": "#/definitions/ChatRequestMessage"
+        }
+      ],
+      "x-ms-discriminator-value": "user"
+    },
+    "ChatResponseMessage": {
+      "type": "object",
+      "description": "A representation of a chat message as received in a response.",
+      "properties": {
+        "role": {
+          "$ref": "#/definitions/ChatRole",
+          "description": "The chat role associated with the message."
+        },
+        "content": {
+          "type": "string",
+          "description": "The content of the message.",
+          "x-nullable": true
+        },
+        "tool_calls": {
+          "type": "array",
+          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
+          "items": {
+            "$ref": "#/definitions/ChatCompletionsToolCall"
+          },
+          "x-ms-client-name": "toolCalls"
+        }
+      },
+      "required": [
+        "role",
+        "content"
+      ]
+    },
+    "ChatRole": {
+      "type": "string",
+      "description": "A description of the intended purpose of a message within a chat completions interaction.",
+      "enum": [
+        "system",
+        "user",
+        "assistant",
+        "tool",
+        "developer"
+      ],
+      "x-ms-enum": {
+        "name": "ChatRole",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "system",
+            "value": "system",
+            "description": "The role that instructs or sets the behavior of the assistant."
+          },
+          {
+            "name": "user",
+            "value": "user",
+            "description": "The role that provides input for chat completions."
+          },
+          {
+            "name": "assistant",
+            "value": "assistant",
+            "description": "The role that provides responses to system-instructed, user-prompted input."
+          },
+          {
+            "name": "tool",
+            "value": "tool",
+            "description": "The role that represents extension tool activity within a chat completions operation."
+          },
+          {
+            "name": "developer",
+            "value": "developer",
+            "description": "The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role."
+          }
+        ]
+      }
+    },
+    "CompletionsFinishReason": {
+      "type": "string",
+      "description": "Representation of the manner in which a completions response concluded.",
+      "enum": [
+        "stop",
+        "length",
+        "content_filter",
+        "tool_calls"
+      ],
+      "x-ms-enum": {
+        "name": "CompletionsFinishReason",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "stopped",
+            "value": "stop",
+            "description": "Completions ended normally and reached its end of token generation."
+          },
+          {
+            "name": "tokenLimitReached",
+            "value": "length",
+            "description": "Completions exhausted available token limits before generation could complete."
+          },
+          {
+            "name": "contentFiltered",
+            "value": "content_filter",
+            "description": "Completions generated a response that was identified as potentially sensitive per content\nmoderation policies."
+          },
+          {
+            "name": "toolCalls",
+            "value": "tool_calls",
+            "description": "Completion ended with the model calling a provided tool for output."
+          }
+        ]
+      }
+    },
+    "CompletionsUsage": {
+      "type": "object",
+      "description": "Representation of the token counts processed for a completions request.\nCounts consider all tokens across prompts, choices, choice alternates, best_of generations, and\nother consumers.",
+      "properties": {
+        "completion_tokens": {
+          "type": "integer",
+          "format": "int32",
+          "description": "The number of tokens generated across all completions emissions."
+        },
+        "prompt_tokens": {
+          "type": "integer",
+          "format": "int32",
+          "description": "The number of tokens in the provided prompts for the completions request."
+        },
+        "total_tokens": {
+          "type": "integer",
+          "format": "int32",
+          "description": "The total number of tokens processed for the completions request and response."
+        }
+      },
+      "required": [
+        "completion_tokens",
+        "prompt_tokens",
+        "total_tokens"
+      ]
+    },
+    "EmbeddingEncodingFormat": {
+      "type": "string",
+      "description": "The format of the embeddings result.\nReturns a 422 error if the model doesn't support the value or parameter.",
+      "enum": [
+        "base64",
+        "binary",
+        "float",
+        "int8",
+        "ubinary",
+        "uint8"
+      ],
+      "x-ms-enum": {
+        "name": "EmbeddingEncodingFormat",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "base64",
+            "value": "base64",
+            "description": "Base64"
+          },
+          {
+            "name": "binary",
+            "value": "binary",
+            "description": "Binary"
+          },
+          {
+            "name": "float",
+            "value": "float",
+            "description": "Floating point"
+          },
+          {
+            "name": "int8",
+            "value": "int8",
+            "description": "Signed 8-bit integer"
+          },
+          {
+            "name": "ubinary",
+            "value": "ubinary",
+            "description": "ubinary"
+          },
+          {
+            "name": "uint8",
+            "value": "uint8",
+            "description": "Unsigned 8-bit integer"
+          }
+        ]
+      }
+    },
+    "EmbeddingInputType": {
+      "type": "string",
+      "description": "Represents the input types used for embedding search.",
+      "enum": [
+        "text",
+        "query",
+        "document"
+      ],
+      "x-ms-enum": {
+        "name": "EmbeddingInputType",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "text",
+            "value": "text",
+            "description": "Indicates the input is a general text input."
+          },
+          {
+            "name": "query",
+            "value": "query",
+            "description": "Indicates the input represents a search query to find the most relevant documents in your vector database."
+          },
+          {
+            "name": "document",
+            "value": "document",
+            "description": "Indicates the input represents a document that is stored in a vector database."
+          }
+        ]
+      }
+    },
+    "EmbeddingItem": {
+      "type": "object",
+      "description": "Representation of a single embeddings relatedness comparison.",
+      "properties": {
+        "embedding": {
+          "description": "List of embedding values for the input prompt. These represent a measurement of the\nvector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector."
+        },
+        "index": {
+          "type": "integer",
+          "format": "int32",
+          "description": "Index of the prompt to which the EmbeddingItem corresponds."
+        }
+      },
+      "required": [
+        "embedding",
+        "index"
+      ]
+    },
+    "EmbeddingsOptions": {
+      "type": "object",
+      "description": "The configuration information for an embeddings request.",
+      "properties": {
+        "input": {
+          "type": "array",
+          "description": "Input text to embed, encoded as a string or array of tokens.\nTo embed multiple inputs in a single request, pass an array\nof strings or array of token arrays.",
+          "items": {
+            "type": "string"
+          }
+        },
+        "dimensions": {
+          "type": "integer",
+          "format": "int32",
+          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
+        },
+        "encoding_format": {
+          "$ref": "#/definitions/EmbeddingEncodingFormat",
+          "description": "Optional. The desired format for the returned embeddings."
+        },
+        "input_type": {
+          "$ref": "#/definitions/EmbeddingInputType",
+          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
+        },
+        "model": {
+          "type": "string",
+          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
+        }
+      },
+      "required": [
+        "input"
+      ],
+      "additionalProperties": {}
+    },
+    "EmbeddingsResult": {
+      "type": "object",
+      "description": "Representation of the response data from an embeddings request.\nEmbeddings measure the relatedness of text strings and are commonly used for search, clustering,\nrecommendations, and other similar scenarios.",
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Unique identifier for the embeddings result."
+        },
+        "data": {
+          "type": "array",
+          "description": "Embedding values for the prompts submitted in the request.",
+          "items": {
+            "$ref": "#/definitions/EmbeddingItem"
+          },
+          "x-ms-identifiers": []
+        },
+        "usage": {
+          "$ref": "#/definitions/EmbeddingsUsage",
+          "description": "Usage counts for tokens input using the embeddings API."
+        },
+        "model": {
+          "type": "string",
+          "description": "The model ID used to generate this result."
+        }
+      },
+      "required": [
+        "id",
+        "data",
+        "usage",
+        "model"
+      ]
+    },
+    "EmbeddingsUsage": {
+      "type": "object",
+      "description": "Measurement of the amount of tokens used in this request and response.",
+      "properties": {
+        "prompt_tokens": {
+          "type": "integer",
+          "format": "int32",
+          "description": "Number of tokens in the request."
+        },
+        "total_tokens": {
+          "type": "integer",
+          "format": "int32",
+          "description": "Total number of tokens transacted in this request/response. Should equal the\nnumber of tokens in the request."
+        }
+      },
+      "required": [
+        "prompt_tokens",
+        "total_tokens"
+      ]
+    },
+    "ExtraParameters": {
+      "type": "string",
+      "description": "Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.",
+      "enum": [
+        "error",
+        "drop",
+        "pass-through"
+      ],
+      "x-ms-enum": {
+        "name": "ExtraParameters",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "error",
+            "value": "error",
+            "description": "The service will error if it detected extra parameters in the request payload. This is the service default."
+          },
+          {
+            "name": "drop",
+            "value": "drop",
+            "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model."
+          },
+          {
+            "name": "pass_through",
+            "value": "pass-through",
+            "description": "The service will pass extra parameters to the back-end AI model."
+          }
+        ]
+      }
+    },
+    "FunctionCall": {
+      "type": "object",
+      "description": "The name and arguments of a function that should be called, as generated by the model.",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "The name of the function to call."
+        },
+        "arguments": {
+          "type": "string",
+          "description": "The arguments to call the function with, as generated by the model in JSON format.\nNote that the model does not always generate valid JSON, and may hallucinate parameters\nnot defined by your function schema. Validate the arguments in your code before calling\nyour function."
+        }
+      },
+      "required": [
+        "name",
+        "arguments"
+      ]
+    },
+    "FunctionDefinition": {
+      "type": "object",
+      "description": "The definition of a caller-specified function that chat completions may invoke in response to matching user input.",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "The name of the function to be called."
+        },
+        "description": {
+          "type": "string",
+          "description": "A description of what the function does. The model will use this description when selecting the function and\ninterpreting its parameters."
+        },
+        "parameters": {
+          "type": "object",
+          "description": "The parameters the function accepts, described as a JSON Schema object.",
+          "additionalProperties": {}
+        }
+      },
+      "required": [
+        "name"
+      ]
+    },
+    "ImageEmbeddingInput": {
+      "type": "object",
+      "description": "Represents an image with optional text.",
+      "properties": {
+        "image": {
+          "type": "string",
+          "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`."
+        },
+        "text": {
+          "type": "string",
+          "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter."
+        }
+      },
+      "required": [
+        "image"
+      ]
+    },
+    "ImageEmbeddingsOptions": {
+      "type": "object",
+      "description": "The configuration information for an image embeddings request.",
+      "properties": {
+        "input": {
+          "type": "array",
+          "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.",
+          "items": {
+            "$ref": "#/definitions/ImageEmbeddingInput"
+          },
+          "x-ms-identifiers": []
+        },
+        "dimensions": {
+          "type": "integer",
+          "format": "int32",
+          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
+        },
+        "encoding_format": {
+          "$ref": "#/definitions/EmbeddingEncodingFormat",
+          "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter."
+        },
+        "input_type": {
+          "$ref": "#/definitions/EmbeddingInputType",
+          "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter."
+        },
+        "model": {
+          "type": "string",
+          "description": "ID of the specific AI model to use, if more than one model is available on the endpoint."
+        }
+      },
+      "required": [
+        "input"
+      ],
+      "additionalProperties": {}
+    },
+    "ModelInfo": {
+      "type": "object",
+      "description": "Represents some basic information about the AI model.",
+      "properties": {
+        "model_name": {
+          "type": "string",
+          "description": "The name of the AI model. For example: `Phi21`"
+        },
+        "model_type": {
+          "$ref": "#/definitions/ModelType",
+          "description": "The type of the AI model. A Unique identifier for the profile."
+        },
+        "model_provider_name": {
+          "type": "string",
+          "description": "The model provider name. For example: `Microsoft Research`"
+        }
+      },
+      "required": [
+        "model_name",
+        "model_type",
+        "model_provider_name"
+      ]
+    },
+    "ModelType": {
+      "type": "string",
+      "description": "The type of AI model",
+      "enum": [
+        "embeddings",
+        "image_generation",
+        "text_generation",
+        "image_embeddings",
+        "audio_generation",
+        "chat_completion"
+      ],
+      "x-ms-enum": {
+        "name": "ModelType",
+        "modelAsString": true,
+        "values": [
+          {
+            "name": "embeddings",
+            "value": "embeddings",
+            "description": "A model capable of generating embeddings from a text"
+          },
+          {
+            "name": "image_generation",
+            "value": "image_generation",
+            "description": "A model capable of generating images from an image and text description"
+          },
+          {
+            "name": "text_generation",
+            "value": "text_generation",
+            "description": "A text generation model"
+          },
+          {
+            "name": "image_embeddings",
+            "value": "image_embeddings",
+            "description": "A model capable of generating embeddings from an image"
+          },
+          {
+            "name": "audio_generation",
+            "value": "audio_generation",
+            "description": "A text-to-audio generative model"
+          },
+          {
+            "name": "chat_completion",
+            "value": "chat_completion",
+            "description": "A model capable of taking chat-formatted messages and generate responses"
+          }
+        ]
+      }
+    },
+    "StreamingChatChoiceUpdate": {
+      "type": "object",
+      "description": "Represents an update to a single prompt completion when the service is streaming updates \nusing Server Sent Events (SSE).\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
+      "properties": {
+        "index": {
+          "type": "integer",
+          "format": "int32",
+          "description": "The ordered index associated with this chat completions choice."
+        },
+        "finish_reason": {
+          "$ref": "#/definitions/CompletionsFinishReason",
+          "description": "The reason that this chat completions choice completed its generated.",
+          "x-nullable": true
+        },
+        "delta": {
+          "$ref": "#/definitions/StreamingChatResponseMessageUpdate",
+          "description": "An update to the chat message for a given chat completions prompt."
+        }
+      },
+      "required": [
+        "index",
+        "finish_reason",
+        "delta"
+      ]
+    },
+    "StreamingChatCompletionsUpdate": {
+      "type": "object",
+      "description": "Represents a response update to a chat completions request, when the service is streaming updates \nusing Server Sent Events (SSE).\nCompletions support a wide variety of tasks and generate text that continues from or \"completes\"\nprovided prompt data.",
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "A unique identifier associated with this chat completions response."
+        },
+        "created": {
+          "type": "integer",
+          "format": "unixtime",
+          "description": "The first timestamp associated with generation activity for this completions response,\nrepresented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970."
+        },
+        "model": {
+          "type": "string",
+          "description": "The model used for the chat completion."
+        },
+        "choices": {
+          "type": "array",
+          "description": "An update to the collection of completion choices associated with this completions response.\nGenerally, `n` choices are generated per provided prompt with a default value of 1.\nToken limits and other settings may limit the number of choices generated.",
+          "minItems": 1,
+          "items": {
+            "$ref": "#/definitions/StreamingChatChoiceUpdate"
+          },
+          "x-ms-identifiers": []
+        },
+        "usage": {
+          "$ref": "#/definitions/CompletionsUsage",
+          "description": "Usage information for tokens processed and generated as part of this completions operation."
+        }
+      },
+      "required": [
+        "id",
+        "created",
+        "model",
+        "choices"
+      ]
+    },
+    "StreamingChatResponseMessageUpdate": {
+      "type": "object",
+      "description": "A representation of a chat message update as received in a streaming response.",
+      "properties": {
+        "role": {
+          "$ref": "#/definitions/ChatRole",
+          "description": "The chat role associated with the message. If present, should always be 'assistant'"
+        },
+        "content": {
+          "type": "string",
+          "description": "The content of the message."
+        },
+        "tool_calls": {
+          "type": "array",
+          "description": "The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat\ncompletions request to resolve as configured.",
+          "items": {
+            "$ref": "#/definitions/StreamingChatResponseToolCallUpdate"
+          },
+          "x-ms-client-name": "toolCalls"
+        }
+      }
+    },
+    "StreamingChatResponseToolCallUpdate": {
+      "type": "object",
+      "description": "An update to the function tool call information requested by the AI model.",
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "The ID of the tool call."
+        },
+        "function": {
+          "$ref": "#/definitions/FunctionCall",
+          "description": "Updates to the function call requested by the AI model."
+        }
+      },
+      "required": [
+        "id",
+        "function"
+      ]
+    }
+  },
+  "parameters": {
+    "Azure.Core.Foundations.ApiVersionParameter": {
+      "name": "api-version",
+      "in": "query",
+      "description": "The API version to use for this operation.",
+      "required": true,
+      "type": "string",
+      "minLength": 1,
+      "x-ms-parameter-location": "method",
+      "x-ms-client-name": "apiVersion"
+    }
+  }
+}
diff --git a/specification/ai/data-plane/AI.Model/readme.md b/specification/ai/data-plane/AI.Model/readme.md
new file mode 100644
index 000000000000..0009f3fbd983
--- /dev/null
+++ b/specification/ai/data-plane/AI.Model/readme.md
@@ -0,0 +1,96 @@
+# Azure AI Model Client
+
+> see https://aka.ms/autorest
+
+This is the AutoRest configuration file for Azure AI Model Client.
+
+## Getting Started
+
+To build the SDKs for My API, simply install AutoRest via `npm` (`npm install -g autorest`) and then run:
+
+> `autorest readme.md`
+
+To see additional help and options, run:
+
+> `autorest --help`
+
+For other options on installation see [Installing AutoRest](https://aka.ms/autorest/install) on the AutoRest github page.
+
+---
+
+## Configuration
+
+### Basic Information
+
+These are the global settings for the Azure AI Model Inference.
+
+```yaml
+openapi-type: data-plane
+tag: package-2024-05-01-preview
+```
+
+### Tag: package-2024-05-01-preview
+
+These settings apply only when `--tag=package-2024-05-01-preview` is specified on the command line.
+
+```yaml $(tag) == 'package-2024-05-01-preview'
+input-file:
+  - preview/2024-05-01-preview/openapi.json
+```
+
+## Suppression
+
+``` yaml
+directive:
+  - suppress: IntegerTypeMustHaveFormat
+    from: openapi.json
+    reason: We are using the format "unixtime" which is not supported by linter at the moment.
+  - suppress: AvoidAnonymousParameter
+    from: openapi.json
+    reason: The swagger file was auto-generated and I don't think we can control this.
+```
+
+<!--
+---
+
+# Code Generation
+
+## Swagger to SDK
+
+This section describes what SDK should be generated by the automatic system.
+This is not used by Autorest itself.
+
+```yaml $(swagger-to-sdk)
+swagger-to-sdk:
+  - repo: azure-sdk-for-python
+  - repo: azure-sdk-for-java
+  - repo: azure-sdk-for-go
+  - repo: azure-sdk-for-js
+  - repo: azure-resource-manager-schemas
+  - repo: azure-cli-extensions
+  - repo: azure-powershell
+```
+## Az
+
+See configuration in [readme.az.md](./readme.az.md)
+
+## Go
+
+See configuration in [readme.go.md](./readme.go.md)
+
+## Python
+
+See configuration in [readme.python.md](./readme.python.md)
+
+## TypeScript
+
+See configuration in [readme.typescript.md](./readme.typescript.md)
+
+## CSharp
+
+See configuration in [readme.csharp.md](./readme.csharp.md)
+
+## Java
+
+See configuration in [readme.java.md](./readme.java.md)
+-->
\ No newline at end of file