diff --git a/fastchat/conversation.py b/fastchat/conversation.py index a01b9d50c..7bd836a28 100644 --- a/fastchat/conversation.py +++ b/fastchat/conversation.py @@ -535,7 +535,7 @@ def get_conv_template(name: str) -> Conversation: roles=("### Instruction:", "### Response:"), sep="\n", stop_str="<|EOT|>", - sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE + sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE, ) ) diff --git a/fastchat/serve/inference.py b/fastchat/serve/inference.py index 8a6636372..6d155aab7 100644 --- a/fastchat/serve/inference.py +++ b/fastchat/serve/inference.py @@ -118,6 +118,7 @@ def generate_stream( token_logprobs = [None] # The first token has no logprobs. sent_interrupt = False finish_reason = None + stopped = False for i in range(max_new_tokens): if i == 0: # prefill if model.config.is_encoder_decoder: