Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions components/src/dynamo/vllm/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@ def parse_args() -> Config:
args = parser.parse_args()
engine_args = AsyncEngineArgs.from_cli_args(args)

if hasattr(engine_args, "stream_interval") and engine_args.stream_interval != 1:
logger.warning(
"--stream-interval is currently not respected in Dynamo. "
"Dynamo uses its own post-processing implementation on the frontend, "
"bypassing vLLM's OutputProcessor buffering. "
)

if engine_args.enable_prefix_caching is None:
logger.debug(
"--enable-prefix-caching or --no-enable-prefix-caching not specified. Defaulting to True (vLLM v1 default behavior)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@
57: "Clocks Event: Power limit exceeded",
# Common Graphics XIDs (often seen in test environments)
13: "Graphics Engine Exception",
31: "GPU stopped responding", # Can be both MMU or timeout context
# 31: "GPU stopped responding",
# Note: XID 31 is already defined above as "MMU Error" (can also be GPU timeout)
45: "Preemptive Cleanup, due to previous errors",
69: "Graphics Exception: Class Error",
}
Expand Down
Loading