Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ff760d9
[multimodal] support qwen2audio example
yuekaizhang Aug 28, 2025
83768fa
fix doc
yuekaizhang Aug 28, 2025
fe1df16
add disagg serving
yuekaizhang Aug 28, 2025
1b10ff1
add audio_url support
yuekaizhang Aug 29, 2025
f08e365
rebase with latest changes
yuekaizhang Sep 9, 2025
8fbe586
update audio url
yuekaizhang Sep 22, 2025
ddd75b6
update audio lib
yuekaizhang Sep 22, 2025
d579da2
fix pydantic import error
yuekaizhang Sep 22, 2025
1b32cc3
fix lint
yuekaizhang Sep 22, 2025
7616ed4
remove rebuild
yuekaizhang Sep 26, 2025
063131e
Merge branch 'main' into pr
krishung5 Sep 29, 2025
e6de1c9
Merge branch 'main' into pr
krishung5 Oct 1, 2025
7af96a7
Increase timeout for audio test
krishung5 Oct 1, 2025
318a3d0
Merge branch 'main' into pr
krishung5 Oct 1, 2025
56b4d34
Merge remote-tracking branch 'upstream/main' into pr
krishung5 Oct 1, 2025
6a3815d
Merge remote-tracking branch 'upstream/main' into pr
krishung5 Oct 2, 2025
c2a7b49
Merge remote-tracking branch 'upstream/main' into pr
krishung5 Oct 6, 2025
d434df2
Merge remote-tracking branch 'upstream/main' into pr
krishung5 Oct 6, 2025
be9ad22
Download audio deps
krishung5 Oct 6, 2025
dc2328c
remove graph
krishung5 Oct 6, 2025
09d7b32
Merge branch 'main' into pr
krishung5 Oct 7, 2025
052b6a7
Merge branch 'main' into pr
krishung5 Oct 8, 2025
53bcda3
Merge branch 'main' into pr
krishung5 Oct 15, 2025
83d48c4
Fix links
krishung5 Oct 15, 2025
9683914
Merge branch 'main' into pr
krishung5 Oct 16, 2025
17a8151
Merge branch 'main' into pr
krishung5 Oct 17, 2025
697d539
Merge branch 'main' into pr
krishung5 Oct 20, 2025
c02e867
fix audio url case
yuekaizhang Nov 3, 2025
95a4909
add lru cache
yuekaizhang Nov 4, 2025
5ab7e9c
Merge branch 'main' into pr
krishung5 Nov 8, 2025
00484b8
Merge branch 'main' into pr
krishung5 Nov 12, 2025
50d3684
Remove static from dynamo_worker
krishung5 Nov 12, 2025
bcf5e9e
Merge branch 'main' into pr
krishung5 Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add lru cache
Signed-off-by: Yuekai Zhang <[email protected]>
  • Loading branch information
yuekaizhang committed Nov 4, 2025
commit 95a4909b05c1890c6d31cd00bdabfec707489f38
73 changes: 33 additions & 40 deletions examples/multimodal/utils/audio_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.

import asyncio
import functools
import logging
from io import BytesIO
from typing import Tuple
Expand All @@ -31,54 +32,46 @@ class AudioLoader:

def __init__(self, cache_size: int = CACHE_SIZE_MAXIMUM):
self._http_timeout = 30.0
self._http_client = httpx.AsyncClient(timeout=self._http_timeout)
self._audio_cache: dict[str, Tuple[np.ndarray, float]] = {}
self._cache_queue: asyncio.Queue[str] = asyncio.Queue(maxsize=cache_size)
# functools.lru_cache is not directly compatible with async methods.
# We create a synchronous method for fetching and processing audio,
# and then apply lru_cache to it. This cached synchronous method
# is then called from our async method using asyncio.to_thread.
self._load_and_process_audio_cached = functools.lru_cache(maxsize=cache_size)(
self._load_and_process_audio
)

def _load_and_process_audio(
self, audio_url: str, sampling_rate: int
) -> Tuple[np.ndarray, float]:
"""
Synchronously loads and processes audio from a URL.
This method is memoized using lru_cache.
"""
with httpx.Client(timeout=self._http_timeout) as client:
response = client.get(audio_url)
response.raise_for_status()

if not response.content:
raise ValueError("Empty response content from audio URL")

audio_data_stream = BytesIO(response.content)
audio_data, sr = librosa.load(audio_data_stream, sr=sampling_rate)
return audio_data, sr

async def load_audio(
self, audio_url: str, sampling_rate: int = 16000
) -> Tuple[np.ndarray, float]:
parsed_url = urlparse(audio_url)

# For HTTP(S) URLs, check cache first
if parsed_url.scheme in ("http", "https"):
if audio_url in self._audio_cache:
logger.debug(f"Audio found in cache for URL: {audio_url}")
return self._audio_cache[audio_url]
if parsed_url.scheme not in ("http", "https"):
raise ValueError(f"Invalid audio source scheme: {parsed_url.scheme}")

try:
if parsed_url.scheme in ("http", "https"):
if not self._http_client:
raise RuntimeError("HTTP client not initialized")

response = await self._http_client.get(audio_url)
response.raise_for_status()

if not response.content:
raise ValueError("Empty response content from audio URL")

audio_data_stream = BytesIO(response.content)
else:
raise ValueError(f"Invalid audio source scheme: {parsed_url.scheme}")

# librosa.load is sync, so offload to a thread to avoid blocking the event loop
def _load_audio():
return librosa.load(audio_data_stream, sr=16000)

audio_data, sr = await asyncio.to_thread(_load_audio)

# Cache HTTP(S) URLs
if parsed_url.scheme in ("http", "https"):
# Cache the audio for future use, and evict the oldest audio if the cache is full
if self._cache_queue.full():
oldest_audio_url = await self._cache_queue.get()
del self._audio_cache[oldest_audio_url]

self._audio_cache[audio_url] = (audio_data, sr)
await self._cache_queue.put(audio_url)

return audio_data, sr

# Offload the synchronous, cached function to a separate thread
# to avoid blocking the asyncio event loop.
return await asyncio.to_thread(
self._load_and_process_audio_cached, audio_url, sampling_rate
)
except httpx.HTTPError as e:
logger.error(f"HTTP error loading audio: {e}")
raise
Expand Down
Loading