Fix Salesforce xgen inference (lm-sys#2350)

shaleprotocol · renning22 · Nov 27, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 4, 2023
commit 658736fc45356e574ee62e991603307ffa4c8f55
diff --git a/fastchat/conversation.py b/fastchat/conversation.py
@@ -765,11 +765,10 @@ def get_conv_template(name: str) -> Conversation:
     Conversation(
         name="xgen",
         system_message="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
-        roles=("### Human: ", "###"),
-        sep_style=SeparatorStyle.NO_COLON_SINGLE,
+        roles=("### Human", "### Assistant"),
+        sep_style=SeparatorStyle.ADD_COLON_SINGLE,
         sep="\n",
-        stop_token_ids=[50256, 0, 1, 2],
-        stop_str="<|endoftext|>",
+        stop_token_ids=[50256],
     )
 )
 

diff --git a/fastchat/serve/inference.py b/fastchat/serve/inference.py
@@ -80,7 +80,8 @@ def generate_stream(
     echo = bool(params.get("echo", True))
     stop_str = params.get("stop", None)
     stop_token_ids = params.get("stop_token_ids", None) or []
-    stop_token_ids.append(tokenizer.eos_token_id)
+    if tokenizer.eos_token_id not in stop_token_ids:
+        stop_token_ids.append(tokenizer.eos_token_id)
 
     logits_processor = prepare_logits_processor(
         temperature, repetition_penalty, top_p, top_k