Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Cleaning up comments + Logs
Signed-off-by: Krishnan Prashanth <[email protected]>
  • Loading branch information
KrishnanPrash committed Nov 13, 2025
commit e5c495b099fa26dc5f6d1aa3710e6819f688162c
16 changes: 7 additions & 9 deletions components/src/dynamo/vllm/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator, Dict, Final

import PIL
import torch
from PIL import Image
from vllm.inputs import TokensPrompt
from vllm.sampling_params import SamplingParams
from vllm.v1.engine.exceptions import EngineDeadError
Expand Down Expand Up @@ -126,7 +126,7 @@ def cleanup(self):

async def _read_decoded_image_via_nixl(
self, decoded_meta: Dict[str, Any]
) -> Image.Image:
) -> PIL.Image.Image:
"""Read decoded image via NIXL RDMA and convert to PIL.Image."""
# Lazy-init connector
if self._connector is None:
Expand Down Expand Up @@ -170,7 +170,7 @@ async def _read_decoded_image_via_nixl(
modes = {1: "L", 3: "RGB", 4: "RGBA"}
if modes[shape[2]] == "L":
arr = arr.squeeze(-1)
return Image.fromarray(arr, modes[shape[2]])
return PIL.Image.fromarray(arr, modes[shape[2]])

async def _extract_multimodal_data(
self, request: Dict[str, Any]
Expand All @@ -179,8 +179,8 @@ async def _extract_multimodal_data(
Extract and decode multimodal data from PreprocessedRequest.

Supports two variants:
1. Url: Frontend passes URL, backend decodes (fallback, slower)
2. Decoded: Frontend decoded, NIXL RDMA transfer (optimal, faster)
1. Url: Frontend passes URL, backend decodes
2. Decoded: Frontend decoded, NIXL RDMA transfer
"""
if "multi_modal_data" not in request or request["multi_modal_data"] is None:
return None
Expand All @@ -192,21 +192,19 @@ async def _extract_multimodal_data(
images = []
for item in mm_map.get(IMAGE_URL_KEY, []):
if isinstance(item, dict) and DECODED_VARIANT_KEY in item:
# Fast path: Frontend decoded, NIXL RDMA transfer (PR #3988)
decoded_meta = item[DECODED_VARIANT_KEY]
image = await self._read_decoded_image_via_nixl(decoded_meta)
images.append(image)
logger.info(
f"Using DECODED path: Loaded image via NIXL RDMA "
f"Using DECODED path: Loaded image via NIXL RDMA "
f"(shape={decoded_meta.get('shape')}, dtype={decoded_meta.get('dtype')})"
)
elif isinstance(item, dict) and URL_VARIANT_KEY in item:
# Fallback path: Decode URL in Python backend (current behavior)
url = item[URL_VARIANT_KEY]
image = await self.image_loader.load_image(url)
images.append(image)
logger.info(
f"Using URL path: Loaded image from URL (type={url.split(':')[0]})"
f"Using URL path: Loaded image from URL (type={url.split(':')[0]})"
)

if images:
Expand Down
3 changes: 0 additions & 3 deletions components/src/dynamo/vllm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,6 @@ async def register_vllm_model(
data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1)
runtime_config.data_parallel_size = data_parallel_size

# Enable frontend RDMA decoding with default settings
# MediaDecoder defaults: 128MB limit, sensible image size limits
# MediaFetcher defaults: 30s timeout, secure (no direct IP/port)
await register_llm(
model_input,
model_type,
Expand Down
Loading