Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
update variable name + add --frontend-decoding flag
Signed-off-by: Krishnan Prashanth <[email protected]>
  • Loading branch information
KrishnanPrash committed Nov 24, 2025
commit 7f41e85847e00d4752eb3c4c01341e69646d00b9
11 changes: 11 additions & 0 deletions components/src/dynamo/vllm/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Config:
multimodal_decode_worker: bool = False
multimodal_encode_prefill_worker: bool = False
mm_prompt_template: str = "USER: <image>\n<prompt> ASSISTANT:"
frontend_decoding: bool = False
# dump config to file
dump_config_to: Optional[str] = None

Expand Down Expand Up @@ -175,6 +176,16 @@ def parse_args() -> Config:
"'USER: <image> please describe the image ASSISTANT:'."
),
)
parser.add_argument(
"--frontend-decoding",
action="store_true",
help=(
"EXPERIMENTAL: Enable frontend decoding of multimodal images. "
"When enabled, images are decoded in the Rust frontend and transferred to the backend via NIXL RDMA. "
"Requires building Dynamo's Rust components with '--features media-nixl'. "
"Without this flag, images are decoded in the Python backend (default behavior)."
),
)
parser.add_argument(
"--store-kv",
type=str,
Expand Down
26 changes: 22 additions & 4 deletions components/src/dynamo/vllm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

from dynamo.common.config_dump import dump_config
from dynamo.llm import (
MediaDecoder,
MediaFetcher,
ModelInput,
ModelRuntimeConfig,
ModelType,
Expand Down Expand Up @@ -311,6 +309,26 @@ async def register_vllm_model(
data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1)
runtime_config.data_parallel_size = data_parallel_size

# Conditionally enable frontend decoding if --frontend-decoding flag is set
media_decoder = None
media_fetcher = None
if config.frontend_decoding:
try:
from dynamo.llm import MediaDecoder, MediaFetcher

media_decoder = MediaDecoder()
media_fetcher = MediaFetcher()
logger.info(
"Frontend decoding enabled: images will be decoded in Rust frontend "
"and transferred via NIXL RDMA"
)
except ImportError as e:
raise RuntimeError(
"Frontend decoding (--frontend-decoding) requires building Dynamo's "
"Rust components with '--features media-nixl'. "
f"Import failed: {e}"
) from e

await register_llm(
model_input,
model_type,
Expand All @@ -321,8 +339,8 @@ async def register_vllm_model(
migration_limit=migration_limit,
runtime_config=runtime_config,
custom_template_path=config.custom_jinja_template,
media_decoder=MediaDecoder(),
media_fetcher=MediaFetcher(),
media_decoder=media_decoder,
media_fetcher=media_fetcher,
)


Expand Down
7 changes: 4 additions & 3 deletions lib/llm/src/preprocessor/media/rdma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,10 @@ pub fn get_nixl_metadata(agent: &NixlAgent, _storage: &SystemStorage) -> Result<

// Compress metadata before base64 encoding (matches Python nixl_connect behavior)
// Backend expects: b64:<base64_of_compressed_bytes>
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(6));
encoder.write_all(&nixl_md)?;
let compressed = encoder.finish()?;
// Note: Python nixl_connect automatically decompresses when seeing "b64:" prefix
let mut zlib_encoder = ZlibEncoder::new(Vec::new(), Compression::new(6));
zlib_encoder.write_all(&nixl_md)?;
let compressed = zlib_encoder.finish()?;

let b64_encoded = general_purpose::STANDARD.encode(&compressed);
Ok(format!("b64:{}", b64_encoded))
Expand Down
Loading