diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
index ee54cdc7b2c8..622d45ccc4f5 100644
--- a/sdk/ai/azure-ai-inference/README.md
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -210,7 +210,8 @@ print(response.choices[0].message.content)
 
 <!-- END SNIPPET -->
 
-The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage` (See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`).
+The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`. See [sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) for usage of `UserMessage` that
+includes uploading an image.
 
 Alternatively, you can provide the messages as dictionary instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:
 
@@ -232,7 +233,10 @@ response = client.complete(
                 "role": "assistant",
                 "content": "The main construction of the International Space Station (ISS) was completed between 1998 and 2011. During this period, more than 30 flights by US space shuttles and 40 by Russian rockets were conducted to transport components and modules to the station.",
             },
-            {"role": "user", "content": "And what was the estimated cost to build it?"},
+            {
+                "role": "user",
+                "content": "And what was the estimated cost to build it?"
+            },
         ]
     }
 )
@@ -399,7 +403,7 @@ try:
     result = client.complete( ... )
 except HttpResponseError as e:
     print(f"Status code: {e.status_code} ({e.reason})")
-    print(f"{e.message}")
+    print(e.message)
 ```
 
 For example, when you provide a wrong authentication key:
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py
index 48a52c3b763f..2426e46a006a 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py
@@ -405,9 +405,6 @@ def _complete(
                       response. Required.
                     "model": "str",  # The model used for the chat completion. Required.
                     "usage": {
-                        "capacity_type": "str",  # Indicates whether your capacity has been
-                          affected by the usage amount (token count) reported here. Required. Known
-                          values are: "usage" and "fixed".
                         "completion_tokens": 0,  # The number of tokens generated across all
                           completions emissions. Required.
                         "prompt_tokens": 0,  # The number of tokens in the provided prompts
@@ -678,9 +675,6 @@ def _embed(
                     "id": "str",  # Unique identifier for the embeddings result. Required.
                     "model": "str",  # The model ID used to generate this result. Required.
                     "usage": {
-                        "capacity_type": "str",  # Indicates whether your capacity has been
-                          affected by the usage amount (token count) reported here. Required. Known
-                          values are: "usage" and "fixed".
                         "input_tokens": 0,  # Number of tokens in the request prompt.
                           Required.
                         "prompt_tokens": 0,  # Number of tokens used for the prompt sent to
@@ -953,9 +947,6 @@ def _embed(
                     "id": "str",  # Unique identifier for the embeddings result. Required.
                     "model": "str",  # The model ID used to generate this result. Required.
                     "usage": {
-                        "capacity_type": "str",  # Indicates whether your capacity has been
-                          affected by the usage amount (token count) reported here. Required. Known
-                          values are: "usage" and "fixed".
                         "input_tokens": 0,  # Number of tokens in the request prompt.
                           Required.
                         "prompt_tokens": 0,  # Number of tokens used for the prompt sent to
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
index 9dbeb1ffee6d..05efb4031fc6 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py
@@ -95,8 +95,10 @@ def load_client(
     :raises ~azure.core.exceptions.HttpResponseError
     """
 
-    with ChatCompletionsClient(endpoint, credential, **kwargs) as client: # Pick any of the clients, it does not matter.
-        model_info = client.get_model_info() # type: ignore
+    with ChatCompletionsClient(
+        endpoint, credential, **kwargs
+    ) as client:  # Pick any of the clients, it does not matter.
+        model_info = client.get_model_info()  # type: ignore
 
     _LOGGER.info("model_info=%s", model_info)
     if not model_info.model_type:
@@ -142,7 +144,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
         self._model_info: Optional[_models.ModelInfo] = None
         super().__init__(endpoint, credential, **kwargs)
 
-
     @overload
     def complete(
         self,
@@ -164,9 +165,7 @@ def complete(
         ] = None,
         seed: Optional[int] = None,
         **kwargs: Any,
-    ) -> _models.ChatCompletions:
-        ...
-
+    ) -> _models.ChatCompletions: ...
 
     @overload
     def complete(
@@ -189,9 +188,7 @@ def complete(
         ] = None,
         seed: Optional[int] = None,
         **kwargs: Any,
-    ) -> _models.StreamingChatCompletions:
-        ...
-
+    ) -> _models.StreamingChatCompletions: ...
 
     @overload
     def complete(
@@ -535,7 +532,6 @@ def complete(
 
         return _deserialize(_models._models.ChatCompletions, response.json())  # pylint: disable=protected-access
 
-
     @distributed_trace
     def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
@@ -546,15 +542,13 @@ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         :raises ~azure.core.exceptions.HttpResponseError
         """
         if not self._model_info:
-            self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+            self._model_info = self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
         return self._model_info
 
-
     def __str__(self) -> str:
         # pylint: disable=client-method-name-no-double-underscore
         return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
 
-
     # Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
     # and you see the equivalent auto-generated method in _client.py return "Self"
     def __enter__(self) -> Self:
@@ -581,7 +575,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
         self._model_info: Optional[_models.ModelInfo] = None
         super().__init__(endpoint, credential, **kwargs)
 
-
     @overload
     def embed(
         self,
@@ -791,7 +784,6 @@ def embed(
 
         return deserialized  # type: ignore
 
-
     @distributed_trace
     def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
@@ -802,15 +794,13 @@ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         :raises ~azure.core.exceptions.HttpResponseError
         """
         if not self._model_info:
-            self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+            self._model_info = self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
         return self._model_info
 
-
     def __str__(self) -> str:
         # pylint: disable=client-method-name-no-double-underscore
         return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
 
-
     # Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
     # and you see the equivalent auto-generated method in _client.py return "Self"
     def __enter__(self) -> Self:
@@ -1046,7 +1036,6 @@ def embed(
 
         return deserialized  # type: ignore
 
-
     @distributed_trace
     def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
@@ -1057,15 +1046,13 @@ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         :raises ~azure.core.exceptions.HttpResponseError
         """
         if not self._model_info:
-            self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+            self._model_info = self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
         return self._model_info
 
-
     def __str__(self) -> str:
         # pylint: disable=client-method-name-no-double-underscore
         return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
 
-
     # Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
     # and you see the equivalent auto-generated method in _client.py return "Self"
     def __enter__(self) -> Self:
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py
index 2f781d740827..f0c6180722c8 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py
@@ -1441,7 +1441,7 @@ def _deserialize(self, target_obj, data):
         elif isinstance(response, type) and issubclass(response, Enum):
             return self.deserialize_enum(data, response)
 
-        if data is None:
+        if data is None or data is CoreNull:
             return data
         try:
             attributes = response._attribute_map  # type: ignore
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py
index 33344e718128..37fa644b4ae7 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py
@@ -274,9 +274,6 @@ async def _complete(
                       response. Required.
                     "model": "str",  # The model used for the chat completion. Required.
                     "usage": {
-                        "capacity_type": "str",  # Indicates whether your capacity has been
-                          affected by the usage amount (token count) reported here. Required. Known
-                          values are: "usage" and "fixed".
                         "completion_tokens": 0,  # The number of tokens generated across all
                           completions emissions. Required.
                         "prompt_tokens": 0,  # The number of tokens in the provided prompts
@@ -547,9 +544,6 @@ async def _embed(
                     "id": "str",  # Unique identifier for the embeddings result. Required.
                     "model": "str",  # The model ID used to generate this result. Required.
                     "usage": {
-                        "capacity_type": "str",  # Indicates whether your capacity has been
-                          affected by the usage amount (token count) reported here. Required. Known
-                          values are: "usage" and "fixed".
                         "input_tokens": 0,  # Number of tokens in the request prompt.
                           Required.
                         "prompt_tokens": 0,  # Number of tokens used for the prompt sent to
@@ -822,9 +816,6 @@ async def _embed(
                     "id": "str",  # Unique identifier for the embeddings result. Required.
                     "model": "str",  # The model ID used to generate this result. Required.
                     "usage": {
-                        "capacity_type": "str",  # Indicates whether your capacity has been
-                          affected by the usage amount (token count) reported here. Required. Known
-                          values are: "usage" and "fixed".
                         "input_tokens": 0,  # Number of tokens in the request prompt.
                           Required.
                         "prompt_tokens": 0,  # Number of tokens used for the prompt sent to
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index 9b5347bb5bc8..7a9566a676fb 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -80,8 +80,10 @@ async def load_client(
     :raises ~azure.core.exceptions.HttpResponseError
     """
 
-    async with ChatCompletionsClient(endpoint, credential, **kwargs) as client: # Pick any of the clients, it does not matter.
-        model_info = await client.get_model_info() # type: ignore
+    async with ChatCompletionsClient(
+        endpoint, credential, **kwargs
+    ) as client:  # Pick any of the clients, it does not matter.
+        model_info = await client.get_model_info()  # type: ignore
 
     _LOGGER.info("model_info=%s", model_info)
     if not model_info.model_type:
@@ -151,9 +153,7 @@ async def complete(
         ] = None,
         seed: Optional[int] = None,
         **kwargs: Any,
-    ) -> _models.ChatCompletions:
-        ...
-
+    ) -> _models.ChatCompletions: ...
 
     @overload
     async def complete(
@@ -177,9 +177,7 @@ async def complete(
         ] = None,
         seed: Optional[int] = None,
         **kwargs: Any,
-    ) -> _models.AsyncStreamingChatCompletions:
-        ...
-
+    ) -> _models.AsyncStreamingChatCompletions: ...
 
     @overload
     async def complete(
@@ -539,7 +537,6 @@ async def complete(
 
         return _deserialize(_models.ChatCompletions, response.json())  # pylint: disable=protected-access
 
-
     @distributed_trace_async
     async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
@@ -550,15 +547,13 @@ async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         :raises ~azure.core.exceptions.HttpResponseError
         """
         if not self._model_info:
-            self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+            self._model_info = await self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
         return self._model_info
 
-
     def __str__(self) -> str:
         # pylint: disable=client-method-name-no-double-underscore
         return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
 
-
     # Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
     # and you see the equivalent auto-generated method in _client.py return "Self"
     async def __aenter__(self) -> Self:
@@ -587,7 +582,6 @@ def __init__(
         self._model_info: Optional[_models.ModelInfo] = None
         super().__init__(endpoint=endpoint, credential=credential, **kwargs)
 
-
     @overload
     async def embed(
         self,
@@ -797,7 +791,6 @@ async def embed(
 
         return deserialized  # type: ignore
 
-
     @distributed_trace_async
     async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
@@ -808,15 +801,13 @@ async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         :raises ~azure.core.exceptions.HttpResponseError
         """
         if not self._model_info:
-            self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+            self._model_info = await self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
         return self._model_info
 
-
     def __str__(self) -> str:
         # pylint: disable=client-method-name-no-double-underscore
         return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
 
-
     # Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
     # and you see the equivalent auto-generated method in _client.py return "Self"
     async def __aenter__(self) -> Self:
@@ -845,7 +836,6 @@ def __init__(
         self._model_info: Optional[_models.ModelInfo] = None
         super().__init__(endpoint=endpoint, credential=credential, **kwargs)
 
-
     @overload
     async def embed(
         self,
@@ -1055,7 +1045,6 @@ async def embed(
 
         return deserialized  # type: ignore
 
-
     @distributed_trace_async
     async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         # pylint: disable=line-too-long
@@ -1066,21 +1055,20 @@ async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
         :raises ~azure.core.exceptions.HttpResponseError
         """
         if not self._model_info:
-            self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+            self._model_info = await self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
         return self._model_info
 
-
     def __str__(self) -> str:
         # pylint: disable=client-method-name-no-double-underscore
         return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
 
-
     # Remove this once https://github.com/Azure/autorest.python/issues/2619 is fixed,
     # and you see the equivalent auto-generated method in _client.py return "Self"
     async def __aenter__(self) -> Self:
         await self._client.__aenter__()
         return self
 
+
 __all__: List[str] = [
     "load_client",
     "ChatCompletionsClient",
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
index 8a7c4bbbb7b3..662f068900e1 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
@@ -16,6 +16,10 @@
 from ._models import ChatCompletionsNamedToolSelection
 from ._models import ChatCompletionsToolCall
 from ._models import ChatCompletionsToolDefinition
+from ._models import ContentItem
+from ._models import ImageContentItem
+from ._models import ImageUrl
+from ._models import TextContentItem
 from ._models import ChatRequestMessage
 from ._models import ChatResponseMessage
 from ._models import CompletionsUsage
@@ -34,11 +38,12 @@
 
 from ._enums import ChatCompletionsResponseFormat
 from ._enums import ChatCompletionsToolSelectionPreset
+from ._enums import ImageDetailLevel
 from ._enums import ChatRole
 from ._enums import EmbeddingEncodingFormat
 from ._enums import EmbeddingInputType
-from ._enums import CompletionsFinishReason
 from ._enums import ModelType
+from ._enums import CompletionsFinishReason
 
 from ._patch import StreamingChatCompletions
 from ._patch import AsyncStreamingChatCompletions
@@ -57,6 +62,10 @@
     "ChatCompletionsNamedToolSelection",
     "ChatCompletionsToolCall",
     "ChatCompletionsToolDefinition",
+    "ContentItem",
+    "ImageContentItem",
+    "ImageUrl",
+    "TextContentItem",
     "ChatRequestMessage",
     "ChatResponseMessage",
     "CompletionsUsage",
@@ -74,11 +83,12 @@
     "UserMessage",
     "ChatCompletionsResponseFormat",
     "ChatCompletionsToolSelectionPreset",
+    "ImageDetailLevel",
     "ChatRole",
     "EmbeddingEncodingFormat",
     "EmbeddingInputType",
+    "ModelType",
     "CompletionsFinishReason",
-    "ModelType"
 ]
 
 _patch_sdk()
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_enums.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_enums.py
index 0d191c4d176d..ddcdbbf81681 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_enums.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_enums.py
@@ -40,6 +40,24 @@ class ChatCompletionsToolSelectionPreset(str, Enum, metaclass=CaseInsensitiveEnu
     """Specifies that the model should respond with a call to one or more tools."""
 
 
+class ImageDetailLevel(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """A representation of the possible image detail levels for image-based chat completions message
+    content.
+    """
+
+    AUTO = "auto"
+    """Specifies that the model should determine which detail level to apply using heuristics like
+    image size."""
+    LOW = "low"
+    """Specifies that image evaluation should be constrained to the 'low-res' model that may be faster
+    and consume fewer
+    tokens but may also be less accurate for highly detailed images."""
+    HIGH = "high"
+    """Specifies that image evaluation should enable the 'high-res' model that may be more accurate
+    for highly detailed
+    images but may also be slower and consume more tokens."""
+
+
 class ChatRole(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """A description of the intended purpose of a message within a chat completions interaction."""
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
index 66fa73e9173b..9f7bb3e8d6bd 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
@@ -469,6 +469,153 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:  # pylint: disable=useles
         super().__init__(*args, type="function", **kwargs)
 
 
+class ContentItem(_model_base.Model):
+    """An abstract representation of a structured content item within a chat message.
+
+    You probably want to use the sub-classes and not this class directly. Known sub-classes are:
+    ImageContentItem, TextContentItem
+
+    All required parameters must be populated in order to send to server.
+
+    :ivar type: The discriminated object type. Required. Default value is None.
+    :vartype type: str
+    """
+
+    __mapping__: Dict[str, _model_base.Model] = {}
+    type: str = rest_discriminator(name="type")
+    """The discriminated object type. Required. Default value is None."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        type: str,
+    ): ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]):
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:  # pylint: disable=useless-super-delegation
+        super().__init__(*args, **kwargs)
+
+
+class ImageContentItem(ContentItem, discriminator="image_url"):
+    """A structured chat content item containing an image reference.
+
+    All required parameters must be populated in order to send to server.
+
+    :ivar type: The discriminated object type: always 'image_url' for this type. Required. Default
+     value is "image_url".
+    :vartype type: str
+    :ivar image_url: An internet location, which must be accessible to the model,from which the
+     image may be retrieved. Required.
+    :vartype image_url: ~azure.ai.inference.models.ImageUrl
+    """
+
+    type: Literal["image_url"] = rest_discriminator(name="type")  # type: ignore
+    """The discriminated object type: always 'image_url' for this type. Required. Default value is
+     \"image_url\"."""
+    image_url: "_models.ImageUrl" = rest_field()
+    """An internet location, which must be accessible to the model,from which the image may be
+     retrieved. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        image_url: "_models.ImageUrl",
+    ): ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]):
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:  # pylint: disable=useless-super-delegation
+        super().__init__(*args, type="image_url", **kwargs)
+
+
+class ImageUrl(_model_base.Model):
+    """An internet location from which the model may retrieve an image.
+
+    All required parameters must be populated in order to send to server.
+
+    :ivar url: The URL of the image. Required.
+    :vartype url: str
+    :ivar detail: The evaluation quality setting to use, which controls relative prioritization of
+     speed, token consumption, and
+     accuracy. Known values are: "auto", "low", and "high".
+    :vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel
+    """
+
+    url: str = rest_field()
+    """The URL of the image. Required."""
+    detail: Optional[Union[str, "_models.ImageDetailLevel"]] = rest_field()
+    """The evaluation quality setting to use, which controls relative prioritization of speed, token
+     consumption, and
+     accuracy. Known values are: \"auto\", \"low\", and \"high\"."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        url: str,
+        detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None,
+    ): ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]):
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:  # pylint: disable=useless-super-delegation
+        super().__init__(*args, **kwargs)
+
+
+class TextContentItem(ContentItem, discriminator="text"):
+    """A structured chat content item containing plain text.
+
+    All required parameters must be populated in order to send to server.
+
+    :ivar type: The discriminated object type: always 'text' for this type. Required. Default value
+     is "text".
+    :vartype type: str
+    :ivar text: The content of the message. Required.
+    :vartype text: str
+    """
+
+    type: Literal["text"] = rest_discriminator(name="type")  # type: ignore
+    """The discriminated object type: always 'text' for this type. Required. Default value is
+     \"text\"."""
+    text: str = rest_field()
+    """The content of the message. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        text: str,
+    ): ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]):
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:  # pylint: disable=useless-super-delegation
+        super().__init__(*args, type="text", **kwargs)
+
+
 class ChatResponseMessage(_model_base.Model):
     """A representation of a chat message as received in a response.
 
@@ -834,7 +981,7 @@ class ModelInfo(_model_base.Model):
 
     model_name: str = rest_field()
     """The name of the AI model. For example: ``Phi21``. Required."""
-    model_type: Union[str, "_models._enums.ModelType"] = rest_field()
+    model_type: Union[str, "_models.ModelType"] = rest_field()
     """The type of the AI model. A Unique identifier for the profile. Required. Known values are:
      \"embeddings\", \"image_generation\", \"text_generation\", \"image_embeddings\",
      \"audio_generation\", and \"chat\"."""
@@ -846,7 +993,7 @@ def __init__(
         self,
         *,
         model_name: str,
-        model_type: Union[str, "_models._enums.ModelType"],
+        model_type: Union[str, "_models.ModelType"],
         model_provider_name: str,
     ): ...
 
@@ -1060,22 +1207,22 @@ class UserMessage(ChatRequestMessage, discriminator="user"):
      messages. Required. The role that provides input for chat completions.
     :vartype role: str or ~azure.ai.inference.models.USER
     :ivar content: The contents of the user message, with available input types varying by selected
-     model. Required.
-    :vartype content: str
+     model. Required. Is either a str type or a [ContentItem] type.
+    :vartype content: str or list[~azure.ai.inference.models.ContentItem]
     """
 
     role: Literal[ChatRole.USER] = rest_discriminator(name="role")  # type: ignore
     """The chat role associated with this message, which is always 'user' for user messages. Required.
      The role that provides input for chat completions."""
-    content: str = rest_field()
+    content: Union["str", List["_models.ContentItem"]] = rest_field()
     """The contents of the user message, with available input types varying by selected model.
-     Required."""
+     Required. Is either a str type or a [ContentItem] type."""
 
     @overload
     def __init__(
         self,
         *,
-        content: str,
+        content: Union[str, List["_models.ContentItem"]],
     ): ...
 
     @overload
diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md
index b5628ef7c876..9d8e5cfe7aea 100644
--- a/sdk/ai/azure-ai-inference/samples/README.md
+++ b/sdk/ai/azure-ai-inference/samples/README.md
@@ -92,9 +92,11 @@ similarly for the other samples.
 |[sample_chat_completions_streaming.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming.py) | One chat completion operation using a synchronous client and streaming response. |
 |[sample_chat_completions_streaming_with_entra_id_auth.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py) | One chat completion operation using a synchronous client and streaming response, using Entra ID authentication. This sample also shows setting the `azureml-model-deployment` HTTP request header, which may be required for some Managed Compute endpoint. |
 |[sample_chat_completions.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py) | One chat completion operation using a synchronous client. |
+|[sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) | One chat completion operation using a synchronous client, which includes sending an input image. |
 |[sample_chat_completions_with_history.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py) | Two chat completion operations using a synchronous client, with the second completion using chat history from the first. |
 |[sample_chat_completions_from_input_bytes.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_bytes.py) | One chat completion operation using a synchronous client, with input messages provided as `IO[bytes]`. |
 |[sample_chat_completions_from_input_json.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`) |
+|[sample_chat_completions_from_input_json_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image. |
 |[sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) | Shows how do use a tool (function) in chat completions, for an AI model that supports tools |
 |[sample_load_client.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_load_client.py) | Shows how do use the function `load_client` to create the appropriate synchronous client based on the provided endpoint URL. In this example, it creates a synchronous `ChatCompletionsClient`. |
 |[sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py) | Get AI model information using the chat completions client. Similarly can be done with all other clients. |
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample1.png b/sdk/ai/azure-ai-inference/samples/async_samples/sample1.png
index ba18b500872f..59d79ff28fc5 100644
Binary files a/sdk/ai/azure-ai-inference/samples/async_samples/sample1.png and b/sdk/ai/azure-ai-inference/samples/async_samples/sample1.png differ
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample2.png b/sdk/ai/azure-ai-inference/samples/async_samples/sample2.png
index 59d79ff28fc5..ba18b500872f 100644
Binary files a/sdk/ai/azure-ai-inference/samples/async_samples/sample2.png and b/sdk/ai/azure-ai-inference/samples/async_samples/sample2.png differ
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_async.py
index bb530e6f9dc5..9a08cd3562c5 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_async.py
@@ -36,7 +36,7 @@ async def sample_chat_completions_async():
         exit()
 
     # Create a chat completion client for synchronous operations
-    async with ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) as client: 
+    async with ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key)) as client:
 
         # Do a single chat completion operation
         response = await client.complete(
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_json_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_json_async.py
index 670638fd05cc..4e65683aef99 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_json_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_json_async.py
@@ -45,12 +45,18 @@ async def sample_chat_completions_from_input_json_async():
                     "role": "system",
                     "content": "You are an AI assistant that helps people find information. Your replies are short, no more than two sentences.",
                 },
-                {"role": "user", "content": "What year was construction of the International Space Station mostly done?"},
+                {
+                    "role": "user",
+                    "content": "What year was construction of the International Space Station mostly done?",
+                },
                 {
                     "role": "assistant",
                     "content": "The main construction of the International Space Station (ISS) was completed between 1998 and 2011. During this period, more than 30 flights by US space shuttles and 40 by Russian rockets were conducted to transport components and modules to the station.",
                 },
-                {"role": "user", "content": "And what was the estimated cost to build it?"},
+                {
+                    "role": "user",
+                    "content": "And what was the estimated cost to build it?"
+                },
             ]
         }
 
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
index 5db3d52848a8..477de6b5b444 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
@@ -26,6 +26,7 @@
 """
 import asyncio
 
+
 async def sample_chat_completions_streaming_azure_openai_async():
     import os
     from azure.ai.inference.aio import ChatCompletionsClient
@@ -72,7 +73,7 @@ async def sample_chat_completions_streaming_azure_openai_async():
         messages=[
             SystemMessage(content="You are a helpful assistant."),
             UserMessage(content="Give me 5 good reasons why I should exercise every day."),
-        ]
+        ],
     )
 
     # Iterate on the response to get chat completion updates, as they arrive from the service
diff --git a/sdk/ai/azure-ai-inference/samples/sample1.png b/sdk/ai/azure-ai-inference/samples/sample1.png
index ba18b500872f..59d79ff28fc5 100644
Binary files a/sdk/ai/azure-ai-inference/samples/sample1.png and b/sdk/ai/azure-ai-inference/samples/sample1.png differ
diff --git a/sdk/ai/azure-ai-inference/samples/sample2.png b/sdk/ai/azure-ai-inference/samples/sample2.png
index 59d79ff28fc5..ba18b500872f 100644
Binary files a/sdk/ai/azure-ai-inference/samples/sample2.png and b/sdk/ai/azure-ai-inference/samples/sample2.png differ
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py
index 24fefc7f0c84..bd69deae3888 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py
@@ -18,6 +18,7 @@
     2) CHAT_COMPLETIONS_KEY - Your model key (a 32-character string). Keep it secret.
 """
 
+
 def sample_chat_completions():
     import os
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py
index f3495f7d4904..a354044eb158 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py
@@ -53,7 +53,10 @@ def sample_chat_completions_from_input_json():
                     "role": "assistant",
                     "content": "The main construction of the International Space Station (ISS) was completed between 1998 and 2011. During this period, more than 30 flights by US space shuttles and 40 by Russian rockets were conducted to transport components and modules to the station.",
                 },
-                {"role": "user", "content": "And what was the estimated cost to build it?"},
+                {
+                    "role": "user",
+                    "content": "And what was the estimated cost to build it?"
+                },
             ]
         }
     )
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py
new file mode 100644
index 000000000000..93abd4af8b33
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py
@@ -0,0 +1,86 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client, and directly providing the
+    JSON request body (containing input chat messages). The sample
+    shows how to include an image in the input chat messages.
+    This sample will only work on AI models that support image input.
+
+USAGE:
+    python sample_chat_completions_from_input_json_with_image.py
+
+    Set these two or three environment variables before running the sample:
+    1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form 
+        https://<your-deployment-name>.<your-azure-region>.inference.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) CHAT_COMPLETIONS_KEY - Your model key (a 32-character string). Keep it secret.
+    3) CHAT_COMPLETIONS_DEPLOYMENT_NAME - Optional. The value for the HTTP
+        request header `azureml-model-deployment`.
+"""
+# mypy: disable-error-code="union-attr"
+# pyright: reportAttributeAccessIssue=false
+
+
+def sample_chat_completions_from_input_json_with_image():
+    import os
+    from azure.ai.inference import ChatCompletionsClient
+    from azure.core.credentials import AzureKeyCredential
+
+    try:
+        endpoint = os.environ["CHAT_COMPLETIONS_ENDPOINT"]
+        key = os.environ["CHAT_COMPLETIONS_KEY"]
+    except KeyError:
+        print("Missing environment variable 'CHAT_COMPLETIONS_ENDPOINT' or 'CHAT_COMPLETIONS_KEY'")
+        print("Set them before running this sample.")
+        exit()
+
+    try:
+        model_deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"]
+    except KeyError:
+        print("Could not read optional environment variable `CHAT_COMPLETIONS_DEPLOYMENT_NAME`.")
+        print("HTTP request header `azureml-model-deployment` will not be set.")
+        model_deployment = None
+
+    client = ChatCompletionsClient(
+        endpoint=endpoint,
+        credential=AzureKeyCredential(key),
+        headers={"azureml-model-deployment": model_deployment}
+    )
+
+    response = client.complete(
+        {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are an AI assistant that describes images in details",
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "What's in this image?"
+                        },
+                        {
+                            "type": "image_url", 
+                            "image_url": {
+                                "url" : "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/ai/azure-ai-inference/samples/sample1.png",
+                                "detail": "high",
+                            }
+                        },
+                    ],
+                },
+            ]
+        }
+    )
+    
+    print(response.choices[0].message.content)
+
+
+if __name__ == "__main__":
+    sample_chat_completions_from_input_json_with_image()
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py
index aac9a6e290d4..3adcf051dc47 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py
@@ -51,7 +51,7 @@ def sample_chat_completions_streaming_with_entra_id_auth():
     client = ChatCompletionsClient(
         endpoint=endpoint,
         credential=DefaultAzureCredential(exclude_interactive_browser_credential=False),
-        headers={"azureml-model-deployment": model_deployment}
+        headers={"azureml-model-deployment": model_deployment},
     )
 
     response = client.complete(
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py
new file mode 100644
index 000000000000..9e57692d8eec
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py
@@ -0,0 +1,77 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client. The sample
+    shows how to include an image in the input chat messages.
+    This sample will only work on AI models that support image input.
+
+USAGE:
+    python sample_chat_completions_with_images.py
+
+    Set these two or three environment variables before running the sample:
+    1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form 
+        https://<your-deployment-name>.<your-azure-region>.inference.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) CHAT_COMPLETIONS_KEY - Your model key (a 32-character string). Keep it secret.
+    3) CHAT_COMPLETIONS_DEPLOYMENT_NAME - Optional. The value for the HTTP
+        request header `azureml-model-deployment`.
+"""
+
+
+def sample_chat_completions_with_images():
+    import os
+    from azure.ai.inference import ChatCompletionsClient
+    from azure.ai.inference.models import (
+        SystemMessage, UserMessage, TextContentItem,
+        ImageContentItem, ImageUrl, ImageDetailLevel
+    )
+    from azure.core.credentials import AzureKeyCredential
+
+    try:
+        endpoint = os.environ["CHAT_COMPLETIONS_ENDPOINT"]
+        key = os.environ["CHAT_COMPLETIONS_KEY"]
+    except KeyError:
+        print("Missing environment variable 'CHAT_COMPLETIONS_ENDPOINT' or 'CHAT_COMPLETIONS_KEY'")
+        print("Set them before running this sample.")
+        exit()
+
+    try:
+        model_deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"]
+    except KeyError:
+        print("Could not read optional environment variable `CHAT_COMPLETIONS_DEPLOYMENT_NAME`.")
+        print("HTTP request header `azureml-model-deployment` will not be set.")
+        model_deployment = None
+
+    client = ChatCompletionsClient(
+        endpoint=endpoint,
+        credential=AzureKeyCredential(key),
+        headers={"azureml-model-deployment": model_deployment},
+    )
+
+    response = client.complete(
+        messages=[
+            SystemMessage(content="You are an AI assistant that describes images in details."),
+            UserMessage(
+                content=[
+                    TextContentItem(text="What's in this image?"),
+                    ImageContentItem(
+                        image_url=ImageUrl(
+                            url="https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/ai/azure-ai-inference/samples/sample1.png",
+                            detail=ImageDetailLevel.HIGH,
+                        ),
+                    ),
+                ],
+            ),
+        ],
+    )
+
+    print(response.choices[0].message.content)
+
+
+if __name__ == "__main__":
+    sample_chat_completions_with_images()
diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml
index e107572a4177..25d60882cc16 100644
--- a/sdk/ai/azure-ai-inference/tsp-location.yaml
+++ b/sdk/ai/azure-ai-inference/tsp-location.yaml
@@ -1,4 +1,4 @@
 directory: specification/ai/ModelClient
-commit: 907e4e19cf76132ea281e060fedcfee0eb671e92
+commit: 36d9e575521d4aa5cc7ae3978e32dbfa3959ca4e
 repo: Azure/azure-rest-api-specs
 additionalDirectories: