fix the problem of not using the decoding method corresponding to the…

… base model in peft mode
lm-sys · merrymercy · Dec 28, 2023 · Dec 27, 2023 · Dec 27, 2023 · Dec 27, 2023
commit 46d559eb9cb2f73afaaf10c9e88c0936f9642a12
diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -370,10 +370,14 @@ def get_generate_stream_function(model: torch.nn.Module, model_path: str):
     from fastchat.serve.inference import generate_stream
 
     model_type = str(type(model)).lower()
+    is_peft = "peft" in model_type
+    if is_peft:
+        model.set_adapter(model_path)
+        model_type = str(type(model.base_model.model))
+
     is_chatglm = "chatglm" in model_type
     is_falcon = "rwforcausallm" in model_type
     is_codet5p = "codet5p" in model_type
-    is_peft = "peft" in model_type
     is_exllama = "exllama" in model_type
     is_xft = "xft" in model_type
 
@@ -418,6 +422,11 @@ def generate_stream_peft(
     else:
         return generate_stream
 
+def get_result(model, tokenizer, prompt):
+    for response, history in model.stream_chat(tokenizer, prompt, [], past_key_values=None, return_past_key_values=False, max_length=8192, top_p=0.8, temperature=0.95):
+        pass
+    return response
+
 
 def add_model_args(parser):
     parser.add_argument(