updates for aoai

Azure · kristapratico · Feb 23, 2024 · Jan 10, 2024 · Jan 20, 2024 · Feb 6, 2024
commit d08d8e6780e2de802473ef8147034edbaa56869c
@@ -33,6 +33,8 @@
 WHISPER_AZURE_AD = "whisper_azuread"
 WHISPER_ALL = ["whisper_azure", "whisper_azuread", "openai"]
 TTS_OPENAI = "tts_openai"
+TTS_AZURE = "tts_azure"
+TTS_AZURE_AD = "tts_azuread"
 DALLE_AZURE = "dalle_azure"
 DALLE_AZURE_AD = "dalle_azuread"
 DALLE_ALL = ["dalle_azure", "dalle_azuread", "openai"]
@@ -56,13 +58,14 @@
 ENV_AZURE_OPENAI_SEARCH_KEY = "AZURE_OPENAI_SEARCH_KEY"
 ENV_AZURE_OPENAI_SEARCH_INDEX = "AZURE_OPENAI_SEARCH_INDEX"
 
-ENV_AZURE_OPENAI_API_VERSION = "2023-12-01-preview"
+ENV_AZURE_OPENAI_API_VERSION = "2024-01-01-preview"
 ENV_AZURE_OPENAI_COMPLETIONS_NAME = "gpt-35-turbo-instruct"
 ENV_AZURE_OPENAI_CHAT_COMPLETIONS_NAME = "gpt-35-turbo-16k"
 ENV_AZURE_OPENAI_EMBEDDINGS_NAME = "text-embedding-ada-002"
 ENV_AZURE_OPENAI_AUDIO_NAME = "whisper"
 ENV_AZURE_OPENAI_DALLE_NAME = "dall-e-3"
 ENV_AZURE_OPENAI_CHAT_COMPLETIONS_GPT4_NAME = "gpt-4-1106-preview"
+ENV_AZURE_OPENAI_TTS_NAME = "tts"
 
 ENV_OPENAI_KEY = "OPENAI_KEY"
 ENV_OPENAI_COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct"
@@ -143,13 +146,13 @@ def client(api_type):
         client = openai.OpenAI(
             api_key=os.getenv(ENV_OPENAI_KEY)
         )
-    elif api_type == "whisper_azure":
+    elif api_type in ["whisper_azure", "tts_azure"]:
         client = openai.AzureOpenAI(
             azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
             api_key=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_KEY),
             api_version=ENV_AZURE_OPENAI_API_VERSION,
         )
-    elif api_type == "whisper_azuread":
+    elif api_type in ["whisper_azuread", "tts_azuread"]:
         client = openai.AzureOpenAI(
             azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
             azure_ad_token_provider=get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"),
@@ -190,13 +193,13 @@ def client_async(api_type):
         client = openai.AsyncOpenAI(
             api_key=os.getenv(ENV_OPENAI_KEY)
         )
-    elif api_type == "whisper_azure":
+    elif api_type in ["whisper_azure", "tts_azure"]:
         client = openai.AsyncAzureOpenAI(
             azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
             api_key=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_KEY),
             api_version=ENV_AZURE_OPENAI_API_VERSION,
         )
-    elif api_type == "whisper_azuread":
+    elif api_type in ["whisper_azuread", "tts_azuread"]:
         client = openai.AsyncAzureOpenAI(
             azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
             azure_ad_token_provider=get_bearer_token_provider_async(AsyncDefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"),
@@ -226,6 +229,8 @@ def build_kwargs(args, api_type):
             return {"model": ENV_OPENAI_AUDIO_MODEL}
         elif api_type == "tts_openai":
             return {"model": ENV_OPENAI_TTS_MODEL}
+        elif api_type in ["tts_azure", "tts_azuread"]:
+            return {"model": ENV_AZURE_OPENAI_TTS_NAME}
     if test_feature.startswith("test_chat_completions") \
         or test_feature.startswith(("test_client", "test_models")):
         if api_type in ["azure", "azuread"]:

@@ -8,7 +8,7 @@
 import pathlib
 import uuid
 from devtools_testutils import AzureRecordedTestCase
-from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure, TTS_OPENAI
+from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure, TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD
 
 audio_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/hello.m4a"))
 audio_long_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/wikipediaOcelot.wav"))
@@ -215,7 +215,7 @@ def test_translate_options(self, client, azure_openai_creds, api_type, **kwargs)
         assert result.text == "Hello"
 
     @configure
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD])
     def test_tts(self, client, azure_openai_creds, api_type, **kwargs):
 
         speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
@@ -225,27 +225,27 @@ def test_tts(self, client, azure_openai_creds, api_type, **kwargs):
                 input="The quick brown fox jumped over the lazy dog.",
                 **kwargs,
             )
-            response.stream_to_file(speech_file_path)
+            assert response.encoding
+            assert response.content
+            assert response.text
+            response.write_to_file(speech_file_path)
         finally:
             os.remove(speech_file_path)
 
     @configure
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
-    def test_tts_hd(self, client, azure_openai_creds, api_type, **kwargs):
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
+    def test_tts_hd_streaming(self, client, azure_openai_creds, api_type, **kwargs):
+
+        with client.audio.speech.with_streaming_response.create(
+            voice="echo",
+            input="The quick brown fox jumped over the lazy dog.",
+            model="tts-1-hd"
+        ) as response:
+            response.read()
 
-        speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
-        try:
-            response = client.audio.speech.create(
-                voice="echo",
-                input="The quick brown fox jumped over the lazy dog.",
-                model="tts-1-hd"
-            )
-            response.stream_to_file(speech_file_path)
-        finally:
-            os.remove(speech_file_path)
 
     @configure
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
     def test_tts_response_format(self, client, azure_openai_creds, api_type, **kwargs):
 
         speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.flac"
@@ -256,12 +256,15 @@ def test_tts_response_format(self, client, azure_openai_creds, api_type, **kwarg
                 response_format="flac",
                 **kwargs
             )
-            response.stream_to_file(speech_file_path)
+            assert response.encoding
+            assert response.content
+            assert response.text
+            response.stream_to_file(speech_file_path)  # deprecated
         finally:
             os.remove(speech_file_path)
 
     @configure
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
     def test_tts_speed(self, client, azure_openai_creds, api_type, **kwargs):
 
         speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
@@ -272,6 +275,9 @@ def test_tts_speed(self, client, azure_openai_creds, api_type, **kwargs):
                 speed=3.0,
                 **kwargs
             )
-            response.stream_to_file(speech_file_path)
+            assert response.encoding
+            assert response.content
+            assert response.text
+            response.write_to_file(speech_file_path)
         finally:
             os.remove(speech_file_path)
@@ -8,7 +8,7 @@
 import pathlib
 import uuid
 from devtools_testutils import AzureRecordedTestCase
-from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure_async, TTS_OPENAI
+from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure_async, TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD
 
 audio_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/hello.m4a"))
 audio_long_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/wikipediaOcelot.wav"))
@@ -231,7 +231,7 @@ async def test_translate_options(self, client_async, azure_openai_creds, api_typ
 
     @configure_async
     @pytest.mark.asyncio
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD])
     async def test_tts(self, client_async, azure_openai_creds, api_type, **kwargs):
 
         speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
@@ -241,29 +241,28 @@ async def test_tts(self, client_async, azure_openai_creds, api_type, **kwargs):
                 input="The quick brown fox jumped over the lazy dog.",
                 **kwargs,
             )
-            response.stream_to_file(speech_file_path)
+            assert response.encoding
+            assert response.content
+            assert response.text
+            response.write_to_file(speech_file_path)
         finally:
             os.remove(speech_file_path)
 
     @configure_async
     @pytest.mark.asyncio
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
     async def test_tts_hd(self, client_async, azure_openai_creds, api_type, **kwargs):
 
-        speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
-        try:
-            response = await client_async.audio.speech.create(
-                voice="echo",
-                input="The quick brown fox jumped over the lazy dog.",
-                model="tts-1-hd"
-            )
-            response.stream_to_file(speech_file_path)
-        finally:
-            os.remove(speech_file_path)
+        async with client_async.audio.speech.with_streaming_response.create(
+            voice="echo",
+            input="The quick brown fox jumped over the lazy dog.",
+            model="tts-1-hd"
+        ) as response:
+            await response.read()
 
     @configure_async
     @pytest.mark.asyncio
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
     async def test_tts_response_format(self, client_async, azure_openai_creds, api_type, **kwargs):
 
         speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.flac"
@@ -274,13 +273,16 @@ async def test_tts_response_format(self, client_async, azure_openai_creds, api_t
                 response_format="flac",
                 **kwargs
             )
-            response.stream_to_file(speech_file_path)
+            assert response.encoding
+            assert response.content
+            assert response.text
+            await response.astream_to_file(speech_file_path)  # deprecated
         finally:
             os.remove(speech_file_path)
 
     @configure_async
     @pytest.mark.asyncio
-    @pytest.mark.parametrize("api_type", [TTS_OPENAI])
+    @pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
     async def test_tts_speed(self, client_async, azure_openai_creds, api_type, **kwargs):
 
         speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
@@ -291,6 +293,9 @@ async def test_tts_speed(self, client_async, azure_openai_creds, api_type, **kwa
                 speed=3.0,
                 **kwargs
             )
-            response.stream_to_file(speech_file_path)
+            assert response.encoding
+            assert response.content
+            assert response.text
+            response.write_to_file(speech_file_path)
         finally:
             os.remove(speech_file_path)