Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: switch to media-nixl feature flag
Signed-off-by: Alexandre Milesi <[email protected]>
  • Loading branch information
milesial committed Nov 10, 2025
commit b0221bb915e3da9042c758deaaf5f90d35cc9340
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

110 changes: 2 additions & 108 deletions lib/bindings/python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions lib/llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ testing-etcd = []
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec"]
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
media-nixl = ["dep:dynamo-memory", "dep:nixl-sys"]
media-nixl = ["dep:nixl-sys", "dep:dynamo-memory"]

[[bench]]
name = "tokenizer"
Expand Down Expand Up @@ -98,7 +98,7 @@ dialoguer = { version = "0.11", default-features = false, features = [

# block_manager
aligned-vec = { version = "0.6.4", optional = true }
nixl-sys = { git = "https://github.com/ai-dynamo/nixl", rev = "00bac00", optional = true }
nixl-sys = { version = "0.7", optional = true }
cudarc = { workspace = true, optional = true }
nix = { version = "0.26", optional = true }

Expand Down
1 change: 1 addition & 0 deletions lib/llm/src/mocker/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<LLMEngineOutput>, Error>
input: SingleIn<PreprocessedRequest>,
) -> Result<ManyOut<LLMEngineOutput>, Error> {
let (request, ctx) = input.into_parts();
println!("request: {request:?}");

// Extract dp_rank from request field (defaults to 0 if not set)
let dp_rank = request.dp_rank.unwrap_or(0);
Expand Down
26 changes: 17 additions & 9 deletions lib/llm/src/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ use std::{collections::HashMap, pin::Pin, sync::Arc};
use tracing;

use crate::model_card::{ModelDeploymentCard, ModelInfo};
use crate::preprocessor::media::MediaLoader;
#[cfg(feature = "media-nixl")]
use crate::preprocessor::media::{MediaDecoder, MediaLoader, MediaFetcher};
use crate::preprocessor::prompt::OAIChatLikeRequest;
use crate::protocols::common::preprocessor::{
MultimodalData, MultimodalDataMap, PreprocessedRequestBuilder,
Expand Down Expand Up @@ -114,6 +115,7 @@ pub struct OpenAIPreprocessor {
/// Per-model runtime configuration propagated to response generator (e.g., reasoning/tool parser)
runtime_config: crate::local_model::runtime_config::ModelRuntimeConfig,
tool_call_parser: Option<String>,
#[cfg(feature = "media-nixl")]
media_loader: Option<MediaLoader>,
}

Expand Down Expand Up @@ -143,14 +145,16 @@ impl OpenAIPreprocessor {

// // Initialize runtime config from the ModelDeploymentCard
let runtime_config = mdc.runtime_config.clone();
let media_loader = None; // TODO: enable with decoder config from MDC
#[cfg(feature = "media-nixl")]
let media_loader = Some(MediaLoader::new(MediaDecoder::default(), MediaFetcher::default())?);
Ok(Arc::new(Self {
formatter,
tokenizer,
model_info,
mdcsum,
runtime_config,
tool_call_parser,
#[cfg(feature = "media-nixl")]
media_loader,
}))
}
Expand Down Expand Up @@ -279,7 +283,8 @@ impl OpenAIPreprocessor {
let messages = request.messages();
let message_count = messages.len().unwrap_or(0);
let mut media_map: MultimodalDataMap = HashMap::new();
let mut fetch_tasks = Vec::new();
#[cfg(feature = "media-nixl")]
let mut fetch_tasks: Vec<(String, ChatCompletionRequestUserMessageContentPart)> = Vec::new();

for idx in 0..message_count {
let msg = messages
Expand Down Expand Up @@ -312,19 +317,22 @@ impl OpenAIPreprocessor {
_ => continue,
};

#[cfg(feature = "media-nixl")]
if self.media_loader.is_some() {
fetch_tasks.push((type_str, content_part.clone()));
} else {
// No loader, just pass the URL through
media_map
.entry(type_str)
.or_default()
.push(MultimodalData::Url(url));
continue;
}

//Fallback: ust pass the URL through
media_map
.entry(type_str)
.or_default()
.push(MultimodalData::Url(url));
}
}

// Execute all fetch tasks
#[cfg(feature = "media-nixl")]
if !fetch_tasks.is_empty() {
let loader = self.media_loader.as_ref().unwrap();
let results = futures::future::join_all(
Expand Down
5 changes: 4 additions & 1 deletion lib/llm/src/preprocessor/media.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ mod rdma;
pub use common::EncodedMediaData;
pub use decoders::{Decoder, ImageDecoder, MediaDecoder};
pub use loader::{MediaFetcher, MediaLoader};
pub use rdma::{DecodedMediaData, RdmaMediaDataDescriptor, get_nixl_agent, get_nixl_metadata};

pub use rdma::{DecodedMediaData, RdmaMediaDataDescriptor};
#[cfg(feature = "media-nixl")]
pub use rdma::{get_nixl_agent, get_nixl_metadata};
Loading
Loading