Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
chore: cleanups
Signed-off-by: Alexandre Milesi <[email protected]>
  • Loading branch information
milesial committed Nov 10, 2025
commit f145c1ca755458facd9d1d38449cbb03f1c08df7
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/bindings/python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ dialoguer = { version = "0.11", default-features = false, features = [

# block_manager
aligned-vec = { version = "0.6.4", optional = true }
nixl-sys = { git = "https://github.com/ai-dynamo/nixl", rev = "ae3f8af", optional = true }
nixl-sys = { git = "https://github.com/ai-dynamo/nixl", rev = "00bac00", optional = true }
cudarc = { workspace = true, optional = true }
nix = { version = "0.26", optional = true }

Expand Down
29 changes: 29 additions & 0 deletions lib/llm/src/preprocessor/media/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Media decoding in the frontend


This component performs media download, base64 decoding, media decoding and NIXL registration. Today, this is used in the OpenAI preprocessor, to transform multimodal inputs (image_url, video_url, audio_url) into fully decoded data (pixel values, ...) accessible to the backends via NIXL.



## TODOs

### Modalities

- [x] Image decoding
- [ ] Video decoding
- [ ] Audio decoding

### Performance

- [x] Image SW decoding
- [ ] Video HW decoding (NVDEC)
- [ ] JPEG HW decoding (nvJPEG)
- [ ] Sparse video sampling (seek-forward)
- [ ] Memory slab pre-allocation/registration

### Memory management
- [ ] Memory spilling to lower storage tiers
- [ ] Early-free memory on client notifications

### Observability
- [ ] Observability on performance, memory usage and input distributions
1 change: 0 additions & 1 deletion lib/llm/src/preprocessor/media/decoders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,5 @@ pub struct MediaDecoder {

#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
pub enum DecodedMediaMetadata {
#[allow(dead_code)] // used in followup MR
Image(ImageMetadata),
}
6 changes: 3 additions & 3 deletions lib/llm/src/preprocessor/media/decoders/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ pub enum ImageLayout {

#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
pub struct ImageMetadata {
#[allow(dead_code)] // used in followup MR
pub(crate) format: Option<ImageFormat>,
#[allow(dead_code)] // used in followup MR
pub(crate) color_type: ColorType,
pub(crate) layout: ImageLayout,
}

Expand All @@ -66,7 +65,7 @@ impl Decoder for ImageDecoder {
let img = reader.decode()?;
let n_channels = img.color().channel_count();

let (data, _color_type) = match n_channels {
let (data, color_type) = match n_channels {
1 => (img.to_luma8().into_raw(), ColorType::L8),
2 => (img.to_luma_alpha8().into_raw(), ColorType::La8),
3 => (img.to_rgb8().into_raw(), ColorType::Rgb8),
Expand All @@ -80,6 +79,7 @@ impl Decoder for ImageDecoder {
let mut decoded: DecodedMediaData = array.try_into()?;
decoded.tensor_info.metadata = Some(DecodedMediaMetadata::Image(ImageMetadata {
format,
color_type,
layout: ImageLayout::HWC,
}));
Ok(decoded)
Expand Down
4 changes: 3 additions & 1 deletion lib/llm/src/preprocessor/media/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,16 @@ mod tests {
);

let result = loader.fetch_and_decode_media_part(&content_part).await;

let descriptor = match result {
Ok(descriptor) => descriptor,
Err(e) if e.to_string().contains("NIXL agent is not available") => {
eprintln!("Skipping test: NIXL agent not available");
println!("test test_fetch_and_decode ... ignored (NIXL agent not available)");
return;
}
Err(e) => panic!("Failed to fetch and decode image: {}", e),
};
mock.assert_async().await;
assert_eq!(descriptor.tensor_info.dtype, DataType::UINT8);

// Verify image dimensions: 1,999px × 1,125px (width × height)
Expand Down
2 changes: 1 addition & 1 deletion lib/memory/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dynamo-config = { workspace = true }

anyhow = { workspace = true }
cudarc = { workspace = true }
nixl-sys = { git = "https://github.com/ai-dynamo/nixl", rev = "ae3f8af" }
nixl-sys = { git = "https://github.com/ai-dynamo/nixl", rev = "00bac00" }
serde = { workspace = true}
thiserror = { workspace = true }
tracing = { workspace = true }
Expand Down