Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,11 @@ generated-values.yaml
.build/
**/.devcontainer/.env
TensorRT-LLM


# START Ruler Generated Files
/.cursor/instructions.md
/.cursor/instructions.md.bak
/CLAUDE.md
/CLAUDE.md.bak
# END Ruler Generated Files
1 change: 0 additions & 1 deletion dynamo.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"settings": {
"rust-analyzer.linkedProjects": [
"Cargo.toml",
"launch/dynamo-run/Cargo.toml",
"lib/bindings/python/Cargo.toml"
],
"rust-analyzer.procMacro.enable": true,
Expand Down
2 changes: 1 addition & 1 deletion lib/bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ name = "_core"
crate-type = ["cdylib", "rlib"]

[features]
default = []
default = ["block-manager"]
block-manager = ["dynamo-llm/block-manager", "dep:dlpark", "dep:cudarc"]

[dependencies]
Expand Down
2 changes: 2 additions & 0 deletions lib/bindings/python/rust/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<llm::entrypoint::KvRouterConfig>()?;
m.add_class::<llm::kv::WorkerMetricsPublisher>()?;
m.add_class::<llm::model_card::ModelDeploymentCard>()?;
m.add_class::<llm::vllm_scheduler::RustSchedulerState>()?;
m.add_class::<llm::local_model::ModelRuntimeConfig>()?;
m.add_class::<llm::preprocessor::OAIChatPreprocessor>()?;
m.add_class::<llm::backend::Backend>()?;
Expand Down Expand Up @@ -120,6 +121,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {

engine::add_to_module(m)?;
parsers::add_to_module(m)?;
llm::scheduler_connector::register_module(m)?;

#[cfg(feature = "block-manager")]
llm::block_manager::add_to_module(m)?;
Expand Down
2 changes: 2 additions & 0 deletions lib/bindings/python/rust/llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub mod local_model;
pub mod model_card;
pub mod nats;
pub mod preprocessor;
pub mod scheduler_connector;
pub mod vllm_scheduler;

#[cfg(feature = "block-manager")]
pub mod block_manager;
10 changes: 10 additions & 0 deletions lib/bindings/python/rust/llm/block_manager/vllm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use crate::to_pyerr;
mod block_list;
mod connector;
mod request;
// mod scheduler; // TODO: Fix PyO3 bindings
mod slot;

pub use block_list::{BlockListType, BlockState, BlockStates, KvbmBlockList};
Expand All @@ -53,6 +54,15 @@ fn _vllm_integration(m: &Bound<'_, PyModule>) -> PyResult<()> {
// TODO: use TRTLLM own integration module
m.add_class::<connector::trtllm_worker::PyTrtllmKvConnectorWorker>()?;
m.add_class::<connector::trtllm_leader::PyTrtllmKvConnectorLeader>()?;

// Add scheduler recorder and conversion functions
// TODO: Fix PyO3 bindings for these types
// m.add_class::<scheduler::recorder_bindings::PySchedulerRecorder>()?;
// m.add_function(wrap_pyfunction!(scheduler::scheduler_types::convert_scheduler_output, m)?)?;
// m.add_function(wrap_pyfunction!(scheduler::scheduler_types::convert_model_runner_output, m)?)?;
// m.add_function(wrap_pyfunction!(scheduler::scheduler_types::convert_engine_core_outputs, m)?)?;
// m.add_function(wrap_pyfunction!(scheduler::recorder_bindings::load_scheduler_trace, m)?)?;

Ok(())
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

pub mod recorder_bindings;
pub mod scheduler_types;

use pyo3::prelude::*;
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Python bindings for the SchedulerRecorder

use dynamo_llm::integrations::vllm::recorder::SchedulerRecorder as RustRecorder;
use dynamo_llm::integrations::vllm::types::*;
use pyo3::prelude::*;
use std::path::PathBuf;

/// Python-accessible SchedulerRecorder
#[pyclass(name = "SchedulerRecorder")]
pub struct PySchedulerRecorder {
inner: RustRecorder,
}

#[pymethods]
impl PySchedulerRecorder {
/// Create a new SchedulerRecorder
#[new]
#[pyo3(signature = (model, vllm_version))]
fn new(model: String, vllm_version: String) -> Self {
Self {
inner: RustRecorder::new(model, vllm_version),
}
}

/// Record a scheduler output (already converted to Rust)
fn record_schedule_output(&mut self, output: SchedulerOutput) -> PyResult<()> {
self.inner.record_schedule_output(output);
Ok(())
}

/// Record a model runner output (already converted to Rust)
fn record_model_runner_output(&mut self, output: ModelRunnerOutput) -> PyResult<()> {
self.inner.record_model_runner_output(output);
Ok(())
}

/// Record engine core outputs (already converted to Rust)
fn record_engine_core_outputs(&mut self, outputs: EngineCoreOutputs) -> PyResult<()> {
self.inner.record_engine_core_outputs(outputs);
Ok(())
}

/// Move to the next iteration
fn next_iteration(&mut self) -> PyResult<()> {
self.inner.next_iteration();
Ok(())
}

/// Get the current iteration number
fn current_iteration(&self) -> u64 {
self.inner.current_iteration()
}

/// Save the recording to a JSON file
fn save_to_file(&mut self, path: String) -> PyResult<()> {
let path = PathBuf::from(path);
self.inner
.save_to_file(&path)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))
}

/// Clear all recordings
fn clear(&mut self) -> PyResult<()> {
self.inner.clear();
Ok(())
}

/// Get the number of recorded iterations
fn num_iterations(&self) -> usize {
self.inner.get_trace().iterations.len()
}
}

/// Load a recording from a JSON file
#[pyfunction]
pub fn load_scheduler_trace(path: String) -> PyResult<SchedulerTrace> {
let path = PathBuf::from(path);
RustRecorder::load_from_file(&path)
.map_err(|e| PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("{}", e)))
}
Loading
Loading