Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fully resetting the slot state if we are in a prefilling state on an …
…unexpected call to get_num_new_matched_tokens
  • Loading branch information
ryanolson committed Aug 7, 2025
commit 002da46ad46c1d1ed01fbd57707c2673f22d331e
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ impl Leader for KvConnectorLeader {
.lock()
.map_err(|e| anyhow::anyhow!("Failed to lock slot: {}", e))?;

if slot.state() == SlotState::Prefilling {
tracing::warn!("slot is in the Prefilled state; this seems like we need to reset the slot and start over");
slot.reset();
}

// early exit if we cannot match full block
if (slot.sequence().total_tokens() - num_computed_tokens) < self.block_size {
return Ok((0, false));
Expand Down Expand Up @@ -319,7 +324,7 @@ impl Leader for KvConnectorLeader {

// todo: we probably need to reset the slot state and reload it from `cache_req`; however, we do not
// know if it will take another pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
slot.reset_after_preemption()?;
slot.reset_after_preemption();

// note, we can not trigger onboarding here -- perhaps we are supposed to or perhaps will get another
// pass at `get_num_new_matched_tokens` or `update_state_after_alloc`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ pub trait Slot: std::fmt::Debug {
fn record_cached_disk_tokens(&mut self, num_tokens: usize);

/// Reset the slot after preemption.
fn reset_after_preemption(&mut self) -> Result<(), SlotError>;
fn reset_after_preemption(&mut self);

/// Reset the slot.
fn reset(&mut self);
}

pub trait ExternallyManagedDeviceSlot: Slot {
Expand Down Expand Up @@ -347,7 +350,7 @@ impl Slot for VllmConnectorSlot {
self.state
}

fn reset_after_preemption(&mut self) -> Result<(), SlotError> {
fn reset_after_preemption(&mut self) {
assert!(self.staging_from_disk.is_none());
assert!(self.staging_from_host.is_none());
assert!(self.pending_operations.is_none());
Expand All @@ -360,7 +363,11 @@ impl Slot for VllmConnectorSlot {
self.tokens_cached_from_device = 0;
self.tokens_cached_from_host = 0;
self.tokens_cached_from_disk = 0;
Ok(())
}

fn reset(&mut self) {
self.reset_after_preemption();
self.state = SlotState::Initialized;
}

fn record_cached_device_tokens(&mut self, num_tokens: usize) {
Expand Down
3 changes: 3 additions & 0 deletions lib/llm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ pub mod types;
#[cfg(feature = "block-manager")]
pub mod block_manager;

#[cfg(feature = "block-manager")]
pub mod integrations;

/// Reads a JSON file, extracts a specific field, and deserializes it into type T.
///
/// # Arguments
Expand Down