diff --git a/fastchat/serve/vllm_worker.py b/fastchat/serve/vllm_worker.py index 9da06bfdc..0af680bb5 100644 --- a/fastchat/serve/vllm_worker.py +++ b/fastchat/serve/vllm_worker.py @@ -55,6 +55,10 @@ def __init__( f"Loading the model {self.model_names} on worker {worker_id}, worker type: vLLM worker..." ) self.tokenizer = llm_engine.engine.tokenizer + # This is to support vllm >= 0.2.7 where TokenizerGroup was introduced + # and llm_engine.engine.tokenizer was no longer a raw tokenizer + if hasattr(self.tokenizer, "tokenizer"): + self.tokenizer = llm_engine.engine.tokenizer.tokenizer self.context_len = get_context_length(llm_engine.engine.model_config.hf_config) if not no_register: