Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
quit silently
  • Loading branch information
tedzhouhk committed Jul 31, 2025
commit 510cc778a53a14b1b901fda70d8acaa65330d3c3
6 changes: 3 additions & 3 deletions components/backends/vllm/src/dynamo/vllm/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ async def generate(self, request):
async for tok in self.generate_tokens(prompt, sampling_params, request_id):
yield tok
except EngineShutdownError:
# Re-raise EngineShutdownError to propagate the shutdown signal
raise
# here we silently quit so that router can migrate the request
return


class PrefillWorkerHandler(BaseWorkerHandler):
Expand Down Expand Up @@ -197,5 +197,5 @@ async def generate(self, request):
kv_transfer_params=res.kv_transfer_params,
).model_dump_json()
except asyncio.CancelledError:
# Convert CancelledError to EngineShutdownError when the engine is shut down
# raise the error because we cannot migrate prefill requests
raise EngineShutdownError("Engine was shut down during token generation") from None