diff --git a/sdk/openai/azure-openai/dev_requirements.txt b/sdk/openai/azure-openai/dev_requirements.txt
index 4db1476d66e1..02917563be53 100644
--- a/sdk/openai/azure-openai/dev_requirements.txt
+++ b/sdk/openai/azure-openai/dev_requirements.txt
@@ -4,3 +4,4 @@
 numpy
 aiohttp
 requests
+httpx
diff --git a/sdk/openai/azure-openai/tests/conftest.py b/sdk/openai/azure-openai/tests/conftest.py
index e837ee30a51e..bb3eb7b5d91e 100644
--- a/sdk/openai/azure-openai/tests/conftest.py
+++ b/sdk/openai/azure-openai/tests/conftest.py
@@ -55,7 +55,7 @@
 ENV_AZURE_OPENAI_SEARCH_KEY = "AZURE_OPENAI_SEARCH_KEY"
 ENV_AZURE_OPENAI_SEARCH_INDEX = "AZURE_OPENAI_SEARCH_INDEX"
 
-ENV_AZURE_OPENAI_API_VERSION = "2023-12-01-preview"
+ENV_AZURE_OPENAI_API_VERSION = "2024-02-15-preview"
 ENV_AZURE_OPENAI_COMPLETIONS_NAME = "gpt-35-turbo-instruct"
 ENV_AZURE_OPENAI_CHAT_COMPLETIONS_NAME = "gpt-35-turbo-16k"
 ENV_AZURE_OPENAI_EMBEDDINGS_NAME = "text-embedding-ada-002"
@@ -357,32 +357,3 @@ def wrapper(*args, **kwargs):
 
     return wrapper
 
-
-def setup_adapter(deployment_id):
-
-    class CustomAdapter(requests.adapters.HTTPAdapter):
-
-        def send(self, request, **kwargs):
-            request.url = f"{openai.api_base}/openai/deployments/{deployment_id}/extensions/chat/completions?api-version={openai.api_version}"
-            return super().send(request, **kwargs)
-
-    session = requests.Session()
-
-    session.mount(
-        prefix=f"{openai.api_base}/openai/deployments/{deployment_id}",
-        adapter=CustomAdapter()
-    )
-
-    openai.requestssession = session
-
-
-def setup_adapter_async(deployment_id):
-
-    class CustomAdapterAsync(aiohttp.ClientRequest):
-
-        async def send(self, conn) -> aiohttp.ClientResponse:
-            self.url = yarl.URL(f"{openai.api_base}/openai/deployments/{deployment_id}/extensions/chat/completions?api-version={openai.api_version}")
-            return await super().send(conn)
-    
-    session = aiohttp.ClientSession(request_class=CustomAdapterAsync)
-    openai.aiosession.set(session)
diff --git a/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0.py b/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0.py
index 3083e9d5d79e..9347efa9c817 100644
--- a/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0.py
+++ b/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0.py
@@ -6,7 +6,7 @@
 import pytest
 import openai
 from devtools_testutils import AzureRecordedTestCase
-from conftest import configure_v0, AZURE, OPENAI, ALL, AZURE_AD, setup_adapter
+from conftest import configure_v0, AZURE, OPENAI, ALL, AZURE_AD
 
 
 class TestChatCompletions(AzureRecordedTestCase):
@@ -696,11 +696,11 @@ def test_chat_completion_byod(self, set_vars, azure_openai_creds, api_type):
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": "How is Azure machine learning different than Azure OpenAI?"}
         ]
-        setup_adapter(azure_openai_creds["chat_completions_name"])
+
         completion = openai.ChatCompletion.create(
             messages=messages,
             deployment_id=azure_openai_creds["chat_completions_name"],
-            dataSources=[
+            data_sources=[
                 {
                     "type": "AzureCognitiveSearch",
                     "parameters": {
@@ -720,9 +720,8 @@ def test_chat_completion_byod(self, set_vars, azure_openai_creds, api_type):
         assert completion.choices[0].index is not None
         assert completion.choices[0].message.content is not None
         assert completion.choices[0].message.role
-        assert completion.choices[0].message.context.messages[0].role == "tool"
-        assert completion.choices[0].message.context.messages[0].content
-        openai.requestssession = None
+        assert completion.choices[0].message.context.citations
+        assert completion.choices[0].message.context.intent
 
     @pytest.mark.parametrize("api_type", [AZURE])
     @configure_v0
@@ -731,11 +730,11 @@ def test_streamed_chat_completions_byod(self, set_vars, azure_openai_creds, api_
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": "How is Azure machine learning different than Azure OpenAI?"}
         ]
-        setup_adapter(azure_openai_creds["chat_completions_name"])
+
         response = openai.ChatCompletion.create(
             messages=messages,
             deployment_id=azure_openai_creds["chat_completions_name"],
-            dataSources=[
+            data_sources=[
                 {
                     "type": "AzureCognitiveSearch",
                     "parameters": {
@@ -756,11 +755,9 @@ def test_streamed_chat_completions_byod(self, set_vars, azure_openai_creds, api_
                 assert c.index is not None
                 assert c.delta is not None
                 if c.delta.get("context"):
-                    assert c.delta.context.messages[0].role == "tool"
-                    assert c.delta.context.messages[0].content.find("citations") != -1
+                    assert c.delta.context.intent
+                    assert c.delta.context.citations
                 if c.delta.get("role"):
                     assert c.delta.role == "assistant"
                 if c.delta.get("content"):
                     assert c.delta.content is not None
-
-        openai.requestssession = None
diff --git a/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0_async.py b/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0_async.py
index e2a190611cd1..17664321c844 100644
--- a/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0_async.py
+++ b/sdk/openai/azure-openai/tests/v0_tests/test_chat_completions_v0_async.py
@@ -6,7 +6,7 @@
 import pytest
 import openai
 from devtools_testutils import AzureRecordedTestCase
-from conftest import configure_v0_async, ALL, AZURE, OPENAI, AZURE_AD, setup_adapter_async
+from conftest import configure_v0_async, ALL, AZURE, OPENAI, AZURE_AD
 
 
 class TestChatCompletionsAsync(AzureRecordedTestCase):
@@ -711,11 +711,11 @@ async def test_chat_completion_byod(self, set_vars, azure_openai_creds, api_type
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": "How is Azure machine learning different than Azure OpenAI?"}
         ]
-        setup_adapter_async(azure_openai_creds["chat_completions_name"])
+
         completion = await openai.ChatCompletion.acreate(
             messages=messages,
             deployment_id=azure_openai_creds["chat_completions_name"],
-            dataSources=[
+            data_sources=[
                 {
                     "type": "AzureCognitiveSearch",
                     "parameters": {
@@ -735,9 +735,8 @@ async def test_chat_completion_byod(self, set_vars, azure_openai_creds, api_type
         assert completion.choices[0].index is not None
         assert completion.choices[0].message.content is not None
         assert completion.choices[0].message.role
-        assert completion.choices[0].message.context.messages[0].role == "tool"
-        assert completion.choices[0].message.context.messages[0].content
-        openai.aiosession.set(None)
+        assert completion.choices[0].message.context.citations
+        assert completion.choices[0].message.context.intent
 
     @pytest.mark.asyncio
     @pytest.mark.parametrize("api_type", [AZURE])
@@ -747,11 +746,11 @@ async def test_streamed_chat_completions_byod(self, set_vars, azure_openai_creds
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": "How is Azure machine learning different than Azure OpenAI?"}
         ]
-        setup_adapter_async(azure_openai_creds["chat_completions_name"])
+
         response = await openai.ChatCompletion.acreate(
             messages=messages,
             deployment_id=azure_openai_creds["chat_completions_name"],
-            dataSources=[
+            data_sources=[
                 {
                     "type": "AzureCognitiveSearch",
                     "parameters": {
@@ -772,11 +771,9 @@ async def test_streamed_chat_completions_byod(self, set_vars, azure_openai_creds
                 assert c.index is not None
                 assert c.delta is not None
                 if c.delta.get("context"):
-                    assert c.delta.context.messages[0].role == "tool"
-                    assert c.delta.context.messages[0].content.find("citations") != -1
+                    assert c.delta.context.citations
+                    assert c.delta.context.intent
                 if c.delta.get("role"):
                     assert c.delta.role == "assistant"
                 if c.delta.get("content"):
                     assert c.delta.content is not None
-
-        openai.aiosession.set(None)
diff --git a/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions.py b/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions.py
index 29796d74915b..e6c4d8bcf4a2 100644
--- a/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions.py
+++ b/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions.py
@@ -653,10 +653,9 @@ def test_chat_completion_byod(self, client, azure_openai_creds, api_type, **kwar
         ]
 
         completion = client.chat.completions.create(
-            model=f"{azure_openai_creds['chat_completions_name']}/extensions",
             messages=messages,
             extra_body={
-                "dataSources":[
+                "data_sources":[
                     {
                         "type": "AzureCognitiveSearch",
                         "parameters": {
@@ -666,7 +665,8 @@ def test_chat_completion_byod(self, client, azure_openai_creds, api_type, **kwar
                         }
                     }
                 ],
-            }
+            },
+            **kwargs
         )
         assert completion.id
         assert completion.object == "extensions.chat.completion"
@@ -677,8 +677,8 @@ def test_chat_completion_byod(self, client, azure_openai_creds, api_type, **kwar
         assert completion.choices[0].index is not None
         assert completion.choices[0].message.content is not None
         assert completion.choices[0].message.role
-        assert completion.choices[0].message.model_extra["context"]["messages"][0]["role"] == "tool"
-        assert completion.choices[0].message.model_extra["context"]["messages"][0]["content"]
+        assert completion.choices[0].message.context["citations"]
+        assert completion.choices[0].message.context["intent"]
 
     @configure
     @pytest.mark.parametrize("api_type", [AZURE])
@@ -689,10 +689,9 @@ def test_streamed_chat_completions_byod(self, client, azure_openai_creds, api_ty
         ]
 
         response = client.chat.completions.create(
-            model=f"{azure_openai_creds['chat_completions_name']}/extensions",
             messages=messages,
             extra_body={
-                "dataSources":[
+                "data_sources":[
                     {
                         "type": "AzureCognitiveSearch",
                         "parameters": {
@@ -703,7 +702,8 @@ def test_streamed_chat_completions_byod(self, client, azure_openai_creds, api_ty
                     }
                 ],
             },
-            stream=True
+            stream=True,
+            **kwargs
         )
         for chunk in response:
             assert chunk.id
@@ -713,9 +713,9 @@ def test_streamed_chat_completions_byod(self, client, azure_openai_creds, api_ty
             for c in chunk.choices:
                 assert c.index is not None
                 assert c.delta is not None
-                if c.delta.model_extra.get("context"):
-                    assert c.delta.model_extra["context"]["messages"][0]["role"] == "tool"
-                    assert c.delta.model_extra["context"]["messages"][0]["content"]
+                if hasattr(c.delta, "context"):
+                    assert c.delta.context["citations"]
+                    assert c.delta.context["intent"]
                 if c.delta.role:
                     assert c.delta.role == "assistant"
                 if c.delta.content:
@@ -1050,6 +1050,26 @@ def test_chat_completion_vision(self, client, azure_openai_creds, api_type, **kw
                 }
             ],
         )
+        assert completion.object == "chat.completion"
+        assert len(completion.choices) == 1
+        assert completion.choices[0].index is not None
+        assert completion.choices[0].message.content is not None
+        assert completion.choices[0].message.role
+
+    @configure
+    @pytest.mark.parametrize("api_type", [OPENAI])
+    def test_chat_completion_logprobs(self, client, azure_openai_creds, api_type, **kwargs):
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"}
+        ]
+
+        completion = client.chat.completions.create(
+            messages=messages,
+            logprobs=True,
+            top_logprobs=3,
+            **kwargs
+        )
         assert completion.id
         assert completion.object == "chat.completion"
         assert completion.model
@@ -1058,6 +1078,12 @@ def test_chat_completion_vision(self, client, azure_openai_creds, api_type, **kw
         assert completion.usage.prompt_tokens is not None
         assert completion.usage.total_tokens == completion.usage.completion_tokens + completion.usage.prompt_tokens
         assert len(completion.choices) == 1
+        assert completion.choices[0].finish_reason
         assert completion.choices[0].index is not None
         assert completion.choices[0].message.content is not None
         assert completion.choices[0].message.role
+        assert completion.choices[0].logprobs.content
+        for logprob in completion.choices[0].logprobs.content:
+            assert logprob.token
+            assert logprob.logprob
+            assert logprob.bytes
diff --git a/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions_async.py b/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions_async.py
index d7b6269f6bd4..d8515714275a 100644
--- a/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions_async.py
+++ b/sdk/openai/azure-openai/tests/v1_tests/test_chat_completions_async.py
@@ -667,10 +667,9 @@ async def test_chat_completion_byod(self, client_async, azure_openai_creds, api_
         ]
 
         completion = await client_async.chat.completions.create(
-            model=f"{azure_openai_creds['chat_completions_name']}/extensions",
             messages=messages,
             extra_body={
-                "dataSources":[
+                "data_sources":[
                     {
                         "type": "AzureCognitiveSearch",
                         "parameters": {
@@ -681,6 +680,7 @@ async def test_chat_completion_byod(self, client_async, azure_openai_creds, api_
                     }
                 ],
             },
+            **kwargs
         )
         assert completion.id
         assert completion.object == "extensions.chat.completion"
@@ -691,8 +691,8 @@ async def test_chat_completion_byod(self, client_async, azure_openai_creds, api_
         assert completion.choices[0].index is not None
         assert completion.choices[0].message.content is not None
         assert completion.choices[0].message.role
-        assert completion.choices[0].message.model_extra["context"]["messages"][0]["role"] == "tool"
-        assert completion.choices[0].message.model_extra["context"]["messages"][0]["content"]
+        assert completion.choices[0].message.context["citations"]
+        assert completion.choices[0].message.context["intent"]
 
     @configure_async
     @pytest.mark.asyncio
@@ -704,10 +704,9 @@ async def test_streamed_chat_completions_byod(self, client_async, azure_openai_c
         ]
 
         response = await client_async.chat.completions.create(
-            model=f"{azure_openai_creds['chat_completions_name']}/extensions",
             messages=messages,
             extra_body={
-                "dataSources":[
+                "data_sources":[
                     {
                         "type": "AzureCognitiveSearch",
                         "parameters": {
@@ -718,7 +717,8 @@ async def test_streamed_chat_completions_byod(self, client_async, azure_openai_c
                     }
                 ],
             },
-            stream=True
+            stream=True,
+            **kwargs
         )
         async for chunk in response:
             assert chunk.id
@@ -728,9 +728,9 @@ async def test_streamed_chat_completions_byod(self, client_async, azure_openai_c
             for c in chunk.choices:
                 assert c.index is not None
                 assert c.delta is not None
-                if c.delta.model_extra.get("context"):
-                    assert c.delta.model_extra["context"]["messages"][0]["role"] == "tool"
-                    assert c.delta.model_extra["context"]["messages"][0]["content"]
+                if hasattr(c.delta, "context"):
+                    assert c.delta.context["citations"]
+                    assert c.delta.context["intent"]
                 if c.delta.role:
                     assert c.delta.role == "assistant"
                 if c.delta.content:
@@ -1072,6 +1072,27 @@ async def test_chat_completion_vision(self, client_async, azure_openai_creds, ap
                 }
             ],
         )
+        assert completion.object == "chat.completion"
+        assert len(completion.choices) == 1
+        assert completion.choices[0].index is not None
+        assert completion.choices[0].message.content is not None
+        assert completion.choices[0].message.role
+
+    @configure_async
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("api_type", [OPENAI])
+    async def test_chat_completion_logprobs(self, client_async, azure_openai_creds, api_type, **kwargs):
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"}
+        ]
+
+        completion = await client_async.chat.completions.create(
+            messages=messages,
+            logprobs=True,
+            top_logprobs=3,
+            **kwargs
+        )
         assert completion.id
         assert completion.object == "chat.completion"
         assert completion.model
@@ -1080,6 +1101,12 @@ async def test_chat_completion_vision(self, client_async, azure_openai_creds, ap
         assert completion.usage.prompt_tokens is not None
         assert completion.usage.total_tokens == completion.usage.completion_tokens + completion.usage.prompt_tokens
         assert len(completion.choices) == 1
+        assert completion.choices[0].finish_reason
         assert completion.choices[0].index is not None
         assert completion.choices[0].message.content is not None
         assert completion.choices[0].message.role
+        assert completion.choices[0].logprobs.content
+        for logprob in completion.choices[0].logprobs.content:
+            assert logprob.token
+            assert logprob.logprob
+            assert logprob.bytes
diff --git a/sdk/openai/azure-openai/tests/v1_tests/test_client.py b/sdk/openai/azure-openai/tests/v1_tests/test_client.py
index 7a6a966ad6f3..c63043911b44 100644
--- a/sdk/openai/azure-openai/tests/v1_tests/test_client.py
+++ b/sdk/openai/azure-openai/tests/v1_tests/test_client.py
@@ -6,6 +6,7 @@
 import os
 import pytest
 import openai
+import httpx
 from devtools_testutils import AzureRecordedTestCase
 from azure.identity import DefaultAzureCredential
 from conftest import (
@@ -15,7 +16,8 @@
     ENV_AZURE_OPENAI_API_VERSION,
     ENV_AZURE_OPENAI_CHAT_COMPLETIONS_NAME,
     configure,
-    reload
+    reload,
+    ENV_OPENAI_TEST_MODE,
 )
 
 
@@ -235,3 +237,35 @@ def test_client_env_vars_token(self, client, azure_openai_creds, api_type, **kwa
                 del os.environ['AZURE_OPENAI_ENDPOINT']
                 del os.environ['AZURE_OPENAI_AD_TOKEN']
                 del os.environ['OPENAI_API_VERSION']
+
+    @pytest.mark.parametrize(
+        "headers,timeout",
+        [
+            ({"retry-after-ms": "2000"}, 2.0),
+            ({"retry-after-ms": "2", "retry-after": "1"}, 0.002),
+            ({"Retry-After-Ms": "2", "Retry-After": "1"}, 0.002),
+            ({"retry-after-ms": "invalid"}, ...),
+            ({}, ...),
+            (None, ...),
+        ],
+    )
+    def test_parse_retry_after_ms_header(self, headers, timeout, **kwargs):
+        if os.getenv(ENV_OPENAI_TEST_MODE) != "v1":
+            pytest.skip("Skipping - tests set to run against v1.")
+
+        client = openai.AzureOpenAI(
+            azure_endpoint=os.getenv(ENV_AZURE_OPENAI_ENDPOINT),
+            api_key="key",
+            api_version=ENV_AZURE_OPENAI_API_VERSION,
+        )
+        response_headers = httpx.Headers(headers)
+        options = openai._models.FinalRequestOptions(method="post", url="/completions")
+        retry_timeout = client._calculate_retry_timeout(
+            remaining_retries=2,
+            options=options,
+            response_headers=response_headers
+        )
+        if headers is None or headers == {} or headers.get("retry-after-ms") == "invalid":
+            assert retry_timeout  # uses the default implementation
+        else:
+            assert retry_timeout == timeout  # uses retry-after-ms
diff --git a/sdk/openai/azure-openai/tests/v1_tests/test_client_async.py b/sdk/openai/azure-openai/tests/v1_tests/test_client_async.py
index 58e8ee4e2139..f56c70867f9d 100644
--- a/sdk/openai/azure-openai/tests/v1_tests/test_client_async.py
+++ b/sdk/openai/azure-openai/tests/v1_tests/test_client_async.py
@@ -6,6 +6,7 @@
 import os
 import pytest
 import openai
+import httpx
 from devtools_testutils import AzureRecordedTestCase
 from azure.identity.aio import DefaultAzureCredential
 from conftest import (
@@ -16,6 +17,7 @@
     ENV_AZURE_OPENAI_CHAT_COMPLETIONS_NAME,
     configure_async,
     reload,
+    ENV_OPENAI_TEST_MODE,
 )
 
 
@@ -250,3 +252,35 @@ async def test_client_env_vars_token(self, client_async, azure_openai_creds, api
                 del os.environ['AZURE_OPENAI_ENDPOINT']
                 del os.environ['AZURE_OPENAI_AD_TOKEN']
                 del os.environ['OPENAI_API_VERSION']
+
+    @pytest.mark.parametrize(
+        "headers,timeout",
+        [
+            ({"retry-after-ms": "2000"}, 2.0),
+            ({"retry-after-ms": "2", "retry-after": "1"}, 0.002),
+            ({"Retry-After-Ms": "2", "Retry-After": "1"}, 0.002),
+            ({"retry-after-ms": "invalid"}, ...),
+            ({}, ...),
+            (None, ...),
+        ],
+    )
+    def test_parse_retry_after_ms_header(self, headers, timeout, **kwargs):
+        if os.getenv(ENV_OPENAI_TEST_MODE) != "v1":
+            pytest.skip("Skipping - tests set to run against v1.")
+
+        client = openai.AsyncAzureOpenAI(
+            azure_endpoint=os.getenv(ENV_AZURE_OPENAI_ENDPOINT),
+            api_key="key",
+            api_version=ENV_AZURE_OPENAI_API_VERSION,
+        )
+        response_headers = httpx.Headers(headers)
+        options = openai._models.FinalRequestOptions(method="post", url="/completions")
+        retry_timeout = client._calculate_retry_timeout(
+            remaining_retries=2,
+            options=options,
+            response_headers=response_headers
+        )
+        if headers is None or headers == {} or headers.get("retry-after-ms") == "invalid":
+            assert retry_timeout  # uses the default implementation
+        else:
+            assert retry_timeout == timeout  # uses retry-after-ms