Address PR comments

dotnet · stephentoub · Apr 2, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 29, 2025
commit 0830a514428856a0d199094efa67cf11ef4ddd0f
@@ -5,6 +5,7 @@
 using System.Diagnostics.CodeAnalysis;
 using System.IO;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Threading;
 using System.Threading.Tasks;
 using Microsoft.Shared.Diagnostics;
@@ -46,7 +47,10 @@ public static async Task<SpeechToTextResponse> GetTextAsync(
         _ = Throw.IfNull(client);
         _ = Throw.IfNull(audioSpeechContent);
 
-        using var audioSpeechStream = new MemoryStream(audioSpeechContent.Data.ToArray());
+        using var audioSpeechStream = MemoryMarshal.TryGetArray(audioSpeechContent.Data, out var array) ?
+            new MemoryStream(array.Array!, array.Offset, array.Count) :
+            new MemoryStream(audioSpeechContent.Data.ToArray());
+
         return await client.GetTextAsync(audioSpeechStream, options, cancellationToken).ConfigureAwait(false);
     }
 
@@ -65,7 +69,10 @@ public static async IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTex
         _ = Throw.IfNull(client);
         _ = Throw.IfNull(audioSpeechContent);
 
-        using var audioSpeechStream = new MemoryStream(audioSpeechContent.Data.ToArray());
+        using var audioSpeechStream = MemoryMarshal.TryGetArray(audioSpeechContent.Data, out var array) ?
+            new MemoryStream(array.Array!, array.Offset, array.Count) :
+            new MemoryStream(audioSpeechContent.Data.ToArray());
+
         await foreach (var update in client.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken).ConfigureAwait(false))
         {
             yield return update;

@@ -16,10 +16,10 @@ public class SpeechToTextClientMetadata
     /// appropriate name defined in the OpenTelemetry Semantic Conventions for Generative AI systems.
     /// </param>
     /// <param name="providerUri">The URL for accessing the speech to text  provider, if applicable.</param>
-    /// <param name="modelId">The ID of the speech to text  model used, if applicable.</param>
-    public SpeechToTextClientMetadata(string? providerName = null, Uri? providerUri = null, string? modelId = null)
+    /// <param name="defaultModelId">The ID of the speech to text used by default, if applicable.</param>
+    public SpeechToTextClientMetadata(string? providerName = null, Uri? providerUri = null, string? defaultModelId = null)
     {
-        ModelId = modelId;
+        DefaultModelId = defaultModelId;
         ProviderName = providerName;
         ProviderUri = providerUri;
     }
@@ -34,10 +34,10 @@ public SpeechToTextClientMetadata(string? providerName = null, Uri? providerUri
     /// <summary>Gets the URL for accessing the speech to text provider.</summary>
     public Uri? ProviderUri { get; }
 
-    /// <summary>Gets the ID of the model used by this speech to text provider.</summary>
+    /// <summary>Gets the ID of the default model used by this speech to text client.</summary>
     /// <remarks>
     /// This value can be null if either the name is unknown or there are multiple possible models associated with this instance.
     /// An individual request may override this value via <see cref="SpeechToTextOptions.ModelId"/>.
     /// </remarks>
-    public string? ModelId { get; }
+    public string? DefaultModelId { get; }
 }
@@ -9,10 +9,6 @@ namespace Microsoft.Extensions.AI;
 [Experimental("MEAI001")]
 public class SpeechToTextOptions
 {
-    /// <summary>Gets or sets the ID for the speech to text.</summary>
-    /// <remarks>Long running jobs may use this ID for status pooling.</remarks>
-    public string? ResponseId { get; set; }
-
     /// <summary>Gets or sets the model ID for the speech to text.</summary>
     public string? ModelId { get; set; }
 
@@ -22,9 +18,6 @@ public class SpeechToTextOptions
     /// <summary>Gets or sets the language for the target generated text.</summary>
     public string? TextLanguage { get; set; }
 
-    /// <summary>Gets or sets the prompt to be used for the speech to text request.</summary>
-    public string? Prompt { get; set; }
-
     /// <summary>Gets or sets the sample rate of the speech input audio.</summary>
     public int? SpeechSampleRate { get; set; }
 
@@ -37,12 +30,10 @@ public virtual SpeechToTextOptions Clone()
     {
         SpeechToTextOptions options = new()
         {
-            ResponseId = ResponseId,
             ModelId = ModelId,
             SpeechLanguage = SpeechLanguage,
             TextLanguage = TextLanguage,
             SpeechSampleRate = SpeechSampleRate,
-            Prompt = Prompt,
             AdditionalProperties = AdditionalProperties?.Clone(),
         };
 

@@ -4,11 +4,11 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
-using System.Linq;
-
 using System.Text.Json.Serialization;
 using Microsoft.Shared.Diagnostics;
 
+#pragma warning disable EA0011 // Consider removing unnecessary conditional access operators
+
 namespace Microsoft.Extensions.AI;
 
 /// <summary>Represents the result of an speech to text request.</summary>
@@ -62,29 +62,12 @@ public SpeechToTextResponse(string? content)
     /// <summary>Gets or sets any additional properties associated with the speech to text completion.</summary>
     public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
 
-    /// <summary>
-    /// Gets or sets the text of the first <see cref="TextContent"/> instance in <see cref="Contents" />.
-    /// </summary>
+    /// <summary>Gets the text of this speech to text response.</summary>
     /// <remarks>
-    /// If there is no <see cref="TextContent"/> instance in <see cref="Contents" />, then the getter returns <see langword="null" />,
-    /// and the setter adds a new <see cref="TextContent"/> instance with the provided value.
+    /// This property concatenates the text of all <see cref="TextContent"/> objects in <see cref="Contents"/>.
     /// </remarks>
     [JsonIgnore]
-    public string? Text
-    {
-        get => Contents.OfType<TextContent>().FirstOrDefault()?.Text;
-        set
-        {
-            if (Contents.OfType<TextContent>().FirstOrDefault() is { } textContent)
-            {
-                textContent.Text = value;
-            }
-            else if (value is not null)
-            {
-                Contents.Add(new TextContent(value));
-            }
-        }
-    }
+    public string Text => Contents?.ConcatText() ?? string.Empty;
 
     /// <inheritdoc />
     public override string ToString() => Contents.ConcatText();

@@ -4,10 +4,11 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
-using System.Linq;
 using System.Text.Json.Serialization;
 using Microsoft.Shared.Diagnostics;
 
+#pragma warning disable EA0011 // Consider removing unnecessary conditional access operators
+
 namespace Microsoft.Extensions.AI;
 
 /// <summary>
@@ -81,29 +82,12 @@ public SpeechToTextResponseUpdate(string? content)
     /// <summary>Gets or sets additional properties for the update.</summary>
     public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
 
-    /// <summary>
-    /// Gets or sets the text of the first <see cref="TextContent"/> instance in <see cref="Contents" />.
-    /// </summary>
+    /// <summary>Gets the text of this speech to text response.</summary>
     /// <remarks>
-    /// If there is no <see cref="TextContent"/> instance in <see cref="Contents" />, then the getter returns <see langword="null" />,
-    /// and the setter adds a new <see cref="TextContent"/> instance with the provided value.
+    /// This property concatenates the text of all <see cref="TextContent"/> objects in <see cref="Contents"/>.
     /// </remarks>
     [JsonIgnore]
-    public string? Text
-    {
-        get => Contents.OfType<TextContent>().FirstOrDefault()?.Text;
-        set
-        {
-            if (Contents.OfType<TextContent>().FirstOrDefault() is { } textContent)
-            {
-                textContent.Text = value;
-            }
-            else if (value is not null)
-            {
-                Contents.Add(new TextContent(value));
-            }
-        }
-    }
+    public string Text => Contents?.ConcatText() ?? string.Empty;
 
     /// <summary>Gets or sets the generated content items.</summary>
     [AllowNull]

@@ -18,14 +18,9 @@ public static class SpeechToTextResponseUpdateExtensions
 {
     /// <summary>Combines <see cref="SpeechToTextResponseUpdate"/> instances into a single <see cref="SpeechToTextResponse"/>.</summary>
     /// <param name="updates">The updates to be combined.</param>
-    /// <param name="coalesceContent">
-    /// <see langword="true"/> to attempt to coalesce contiguous <see cref="AIContent"/> items, where applicable,
-    /// into a single <see cref="AIContent"/>. When <see langword="false"/>, the original content items are used.
-    /// The default is <see langword="true"/>.
-    /// </param>
     /// <returns>The combined <see cref="SpeechToTextResponse"/>.</returns>
     public static SpeechToTextResponse ToSpeechToTextResponse(
-        this IEnumerable<SpeechToTextResponseUpdate> updates, bool coalesceContent = true)
+        this IEnumerable<SpeechToTextResponseUpdate> updates)
     {
         _ = Throw.IfNull(updates);
 
@@ -41,10 +36,7 @@ public static SpeechToTextResponse ToSpeechToTextResponse(
             ProcessUpdate(update, contents, ref responseId, ref modelId, ref rawRepresentation, ref additionalProperties);
         }
 
-        if (coalesceContent)
-        {
-            ChatResponseExtensions.CoalesceTextContent(contents);
-        }
+        ChatResponseExtensions.CoalesceTextContent(contents);
 
         response.Contents = contents;
         response.ResponseId = responseId;
@@ -57,22 +49,17 @@ public static SpeechToTextResponse ToSpeechToTextResponse(
 
     /// <summary>Combines <see cref="SpeechToTextResponseUpdate"/> instances into a single <see cref="SpeechToTextResponse"/>.</summary>
     /// <param name="updates">The updates to be combined.</param>
-    /// <param name="coalesceContent">
-    /// <see langword="true"/> to attempt to coalesce contiguous <see cref="AIContent"/> items, where applicable,
-    /// into a single <see cref="AIContent"/>. When <see langword="false"/>, the original content items are used.
-    /// The default is <see langword="true"/>.
-    /// </param>
     /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
     /// <returns>The combined <see cref="SpeechToTextResponse"/>.</returns>
     public static Task<SpeechToTextResponse> ToSpeechToTextResponseAsync(
-        this IAsyncEnumerable<SpeechToTextResponseUpdate> updates, bool coalesceContent = true, CancellationToken cancellationToken = default)
+        this IAsyncEnumerable<SpeechToTextResponseUpdate> updates, CancellationToken cancellationToken = default)
     {
         _ = Throw.IfNull(updates);
 
-        return ToResponseAsync(updates, coalesceContent, cancellationToken);
+        return ToResponseAsync(updates, cancellationToken);
 
         static async Task<SpeechToTextResponse> ToResponseAsync(
-            IAsyncEnumerable<SpeechToTextResponseUpdate> updates, bool coalesceContent, CancellationToken cancellationToken)
+            IAsyncEnumerable<SpeechToTextResponseUpdate> updates, CancellationToken cancellationToken)
         {
             SpeechToTextResponse response = new();
             List<AIContent> contents = [];
@@ -86,10 +73,7 @@ static async Task<SpeechToTextResponse> ToResponseAsync(
                 ProcessUpdate(update, contents, ref responseId, ref modelId, ref rawRepresentation, ref additionalProperties);
             }
 
-            if (coalesceContent)
-            {
-                ChatResponseExtensions.CoalesceTextContent(contents);
-            }
+            ChatResponseExtensions.CoalesceTextContent(contents);
 
             response.Contents = contents;
             response.ResponseId = responseId;

diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/StreamExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/StreamExtensions.cs
diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/AsyncEnumerableExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/AsyncEnumerableExtensions.cs
@@ -3,10 +3,8 @@
 
 using System;
 using System.ComponentModel;
-using System.Diagnostics.CodeAnalysis;
 using Microsoft.Shared.Diagnostics;
 using OpenAI;
-using OpenAI.Audio;
 using OpenAI.Chat;
 using OpenAI.Embeddings;
 using OpenAI.Responses;
@@ -37,21 +35,6 @@ public static IChatClient AsIChatClient(this ChatClient chatClient) =>
     public static IChatClient AsIChatClient(this OpenAIResponseClient responseClient) =>
         new OpenAIResponseChatClient(responseClient);
 
-    /// <summary>Gets an <see cref="ISpeechToTextClient"/> for use with this <see cref="OpenAIClient"/>.</summary>
-    /// <param name="openAIClient">The client.</param>
-    /// <param name="modelId">The model.</param>
-    /// <returns>An <see cref="ISpeechToTextClient"/> that can be used to transcribe audio via the <see cref="OpenAIClient"/>.</returns>
-    [Experimental("MEAI001")]
-    public static ISpeechToTextClient AsSpeechToTextClient(this OpenAIClient openAIClient, string modelId) =>
-        new OpenAISpeechToTextClient(openAIClient, modelId);
-
-    /// <summary>Gets an <see cref="ISpeechToTextClient"/> for use with this <see cref="AudioClient"/>.</summary>
-    /// <param name="audioClient">The client.</param>
-    /// <returns>An <see cref="ISpeechToTextClient"/> that can be used to transcribe audio via the <see cref="AudioClient"/>.</returns>
-    [Experimental("MEAI001")]
-    public static ISpeechToTextClient AsSpeechToTextClient(this AudioClient audioClient) =>
-        new OpenAISpeechToTextClient(audioClient);
-
     /// <summary>Gets an <see cref="IEmbeddingGenerator{String, Single}"/> for use with this <see cref="OpenAIClient"/>.</summary>
     /// <param name="openAIClient">The client.</param>
     /// <param name="modelId">The model to use.</param>