diff --git a/fastchat/serve/vllm_worker.py b/fastchat/serve/vllm_worker.py index 2fe8e6304..b1dc54f3d 100644 --- a/fastchat/serve/vllm_worker.py +++ b/fastchat/serve/vllm_worker.py @@ -210,7 +210,7 @@ async def api_model_details(request: Request): args.model = args.model_path if args.num_gpus > 1: args.tensor_parallel_size = args.num_gpus - if args.quantizaiton: + if args.quantization: args.quantization = args.quantization engine_args = AsyncEngineArgs.from_cli_args(args)