diff --git a/components/src/dynamo/vllm/args.py b/components/src/dynamo/vllm/args.py index 4c0d0a0988..f3b9f7a574 100644 --- a/components/src/dynamo/vllm/args.py +++ b/components/src/dynamo/vllm/args.py @@ -198,6 +198,13 @@ def parse_args() -> Config: args = parser.parse_args() engine_args = AsyncEngineArgs.from_cli_args(args) + if hasattr(engine_args, "stream_interval") and engine_args.stream_interval != 1: + logger.warning( + "--stream-interval is currently not respected in Dynamo. " + "Dynamo uses its own post-processing implementation on the frontend, " + "bypassing vLLM's OutputProcessor buffering. " + ) + if engine_args.enable_prefix_caching is None: logger.debug( "--enable-prefix-caching or --no-enable-prefix-caching not specified. Defaulting to True (vLLM v1 default behavior)"