Skip to content

Commit 2e7da20

Browse files
authored
[fix] Release slots with spec decode + disagg (#5975)
Signed-off-by: Iman Tabrizian <itabrizian@nvidia.com> Signed-off-by: Iman Tabrizian <10105175+Tabrizian@users.noreply.github.com>
1 parent 332a65b commit 2e7da20

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -869,8 +869,7 @@ def _executor_loop(self):
869869

870870
self._pad_attention_dp_dummy_request()
871871

872-
if self.draft_model_engine is not None or is_ngram or hasattr(
873-
self, 'drafter') and self.drafter is not None:
872+
if self.draft_model_engine is not None or is_ngram:
874873
self._prepare_draft_requests(self.active_requests)
875874

876875
scheduled_batch, fitting_disagg_gen_init_requests, num_fitting_reqs = self._schedule(
@@ -1595,8 +1594,13 @@ def _send_disagg_ctx_cache(self, scheduled_ctx_requests):
15951594
if req.is_context_only_request and (req.is_context_finished or
15961595
req.is_finished_due_to_length):
15971596
self.kv_cache_transceiver.respond_and_send_async(req)
1598-
self.resource_manager.resource_managers[
1599-
ResourceManagerType.SEQ_SLOT_MANAGER].free_resources(req)
1597+
for resource_mgr_type in (
1598+
ResourceManagerType.SEQ_SLOT_MANAGER,
1599+
ResourceManagerType.SPEC_RESOURCE_MANAGER):
1600+
if resource_mgr_type in self.resource_manager.resource_managers and self.resource_manager.resource_managers[
1601+
resource_mgr_type] is not None:
1602+
self.resource_manager.resource_managers[
1603+
resource_mgr_type].free_resources(req)
16001604

16011605
self.kv_cache_transceiver.check_context_transfer_status(0)
16021606

0 commit comments

Comments
 (0)