Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/model_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
- [OpenLemur/lemur-70b-chat-v1](https://huggingface.co/OpenLemur/lemur-70b-chat-v1)
- [allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b)
- [Microsoft/Orca-2-7b](https://huggingface.co/microsoft/Orca-2-7b)
- [deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat)
- [deepseek-ai/deepseek-coder-33b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct)
- Any [EleutherAI](https://huggingface.co/EleutherAI) pythia model such as [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
- Any [Peft](https://github.com/huggingface/peft) adapter trained on top of a
model above. To activate, must have `peft` in the model path. Note: If
Expand Down
26 changes: 25 additions & 1 deletion fastchat/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class SeparatorStyle(IntEnum):
ROBIN = auto()
FALCON_CHAT = auto()
CHATGLM3 = auto()
DEEPSEEK_CHAT = auto()


@dataclasses.dataclass
Expand Down Expand Up @@ -224,6 +225,15 @@ def get_prompt(self) -> str:
ret += role + ":"

return ret
elif self.sep_style == SeparatorStyle.DEEPSEEK_CHAT:
seps = [self.sep, self.sep2]
ret = system_prompt
for i, (role, message) in enumerate(self.messages):
if message:
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
return ret
Comment on lines +228 to +236
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to confirm, there's no existing template suitable for deepseek?

Copy link
Collaborator Author

@BabyChouSr BabyChouSr Dec 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The closest I could find is SeparatorStyle.ADD_COLON_TWO, the only issue is that it has an extra system_prompt + seps[0] instead of just system_prompt, which leads to an extra \n\n if we use that conversation style.

The difficulty comes from:

  1. We need to prepend a <|begin_of_sentence|> token before the user response which I think the easiest location to put that is the system_message.
  2. There is an extra <|end_of_sentence|> after the Assistant's message, which requires us to have two seps

TLDR: I couldn't find another suitable template

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, thanks! moving forwrad, we should consider migrating to HF's chat template so we can leave this responsibility to model developers.
https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/blob/79648bef7658bb824e4630740f6e1484c1b0620b/tokenizer_config.json#L34

else:
raise ValueError(f"Invalid style: {self.sep_style}")

Expand Down Expand Up @@ -530,7 +540,7 @@ def get_conv_template(name: str) -> Conversation:
# Deepseek code default template
register_conv_template(
Conversation(
name="deepseek",
name="deepseek-coder",
system_template="You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.",
roles=("### Instruction:", "### Response:"),
sep="\n",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so deepseek-coder's sep_style is different from deepseek?

        sep_style=SeparatorStyle.DEEPSEEK_CHAT,

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, it's style is different according to this: https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct/blob/main/tokenizer_config.json

Deepseek-coder, seems to do

'###Instruction:\n' + message['content'] + '\n'
'###Response:\n + message['content'] + '\n<|EOT|>\n'

So, there's an extra '\n' after instruction or response.

Expand Down Expand Up @@ -1265,6 +1275,20 @@ def get_conv_template(name: str) -> Conversation:
)
)

# Deepseek-chat template
# reference: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/blob/main/tokenizer_config.json
register_conv_template(
Conversation(
name="deepseek-chat",
system_message="<|begin▁of▁sentence|>", # must add a bos token before first message
roles=("User", "Assistant"),
sep_style=SeparatorStyle.DEEPSEEK_CHAT,
sep="\n\n",
sep2="<|end▁of▁sentence|>",
stop_str="<|end▁of▁sentence|>",
)
)

if __name__ == "__main__":
from fastchat.conversation import get_conv_template

Expand Down
24 changes: 24 additions & 0 deletions fastchat/model/model_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,28 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("Yi-34b-chat")


class DeepseekCoderAdapter(BaseModelAdapter):
"""The model adapter for deepseek-ai's coder models"""

def match(self, model_path: str):
return "deepseek-coder" in model_path.lower()

def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("deepseek-coder")


class DeepseekChatAdapter(BaseModelAdapter):
"""The model adapter for deepseek-ai's chat models"""

# Note: that this model will require tokenizer version >= 0.13.3 because the tokenizer class is LlamaTokenizerFast

def match(self, model_path: str):
return "deepseek-llm" in model_path.lower() and "chat" in model_path.lower()

def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("deepseek-chat")


# Note: the registration order matters.
# The one registered earlier has a higher matching priority.
register_model_adapter(PeftModelAdapter)
Expand Down Expand Up @@ -2005,6 +2027,8 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
register_model_adapter(PygmalionAdapter)
register_model_adapter(MicrosoftOrcaAdapter)
register_model_adapter(YiAdapter)
register_model_adapter(DeepseekCoderAdapter)
register_model_adapter(DeepseekChatAdapter)

# After all adapters, try the default base adapter.
register_model_adapter(BaseModelAdapter)