Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
0d47c43
gguf: add GGUFReader.read_field(field) method + read template example
Apr 27, 2024
0d1d46e
grammars: add troubleshooting section to readme
Apr 8, 2024
63d1324
server.py: hacky code
Mar 25, 2024
ffc7436
agents: scripts to run scripts as sandboxed fastapi servers
Mar 26, 2024
d5d9993
server.py: default tools work!
Mar 26, 2024
8afd4de
server.py: make tools work w/ mixtral-8x7b-instruct
Mar 27, 2024
aa9605c
server.py: kinda api-compliant output, disabled grammar
Mar 27, 2024
a406293
server.py: reenable grammar, accommodate mistral's escaped underscores
Mar 27, 2024
63a384d
server.py: raise n_predict
Mar 28, 2024
5f3de16
server.py: pass all request options, comments in ts sigs, render tool…
Mar 28, 2024
59b4114
server.py: refactor chat handlers
Mar 29, 2024
253b68d
server.py: crude reactor
Mar 29, 2024
e874565
agent: split code from openai example
Mar 29, 2024
b63f91a
Update agent.py
Mar 29, 2024
c340e8c
Update example_weather_tools.py
Mar 29, 2024
ce2fb01
agent: add --allow_parallel_calls
Mar 29, 2024
ea34bd3
agent/openai:nits
Mar 29, 2024
80c7930
openai: fix message merging for mixtral (parallel calls)
Mar 29, 2024
9ab493f
Update prompting.py
Mar 29, 2024
e0c8af4
agent: --style
Mar 29, 2024
b4e292e
Create requirements.txt
Mar 29, 2024
d1d8602
agent: disable parallel by default
Mar 29, 2024
eb9a552
agent: nits
Mar 29, 2024
3da30ed
agent: fix functionary tool_calls templating
Mar 29, 2024
ff6563a
Delete test.sh
Mar 29, 2024
dd11bb6
agent: format still broken
Mar 29, 2024
22b980f
agent: update readme
Mar 29, 2024
61f35e0
agent: prepare to test various templates
Mar 29, 2024
d8a53ea
openai: test features of templates at runtime, to make sure no bits o…
Mar 30, 2024
ad2f4c1
Update test_chat_handlers.py
Mar 30, 2024
3c3eff5
openai: quiet + update prompt output
Mar 30, 2024
6935503
openai: refactor chat handler vs. template
Mar 30, 2024
d9f30f8
Update test_chat_handlers.md
Mar 30, 2024
da2067a
openai: only special-format assistant in thoughtful mode
Mar 30, 2024
09de4eb
openai: actually use thoughtful examples in tests
Mar 30, 2024
19811a4
openai: tests didn't catch output format
Mar 30, 2024
22fe86d
openai tools: TS signatures work well too at a fraction of the eval cost
Mar 30, 2024
6e52a9c
Update test_chat_handlers.md
Apr 8, 2024
701a66d
agent: fix response_format
Apr 9, 2024
b447a74
agent: revert to json schemas (ts not ready for refs)
Apr 9, 2024
85820f4
agent: fix sandbox dockerfile
Apr 9, 2024
6880f1d
agent: support basic openapi tools (incl. from fastify sandbox)
Apr 9, 2024
0532680
agent: nits
Apr 9, 2024
a634e03
agent: cache_prompt=True
Apr 10, 2024
9fe269e
openai: nit
Apr 10, 2024
a61ebeb
agent: hint at math import in python tool
Apr 10, 2024
24e34f1
agent: nit
Apr 10, 2024
1475b1e
agent: fix killing of subprocesses
Apr 10, 2024
6c00378
agent: nits
Apr 10, 2024
082d54d
agent: rename fake weather tools
Apr 10, 2024
f9afb04
agent: python tool: test serializability of variables
Apr 10, 2024
a98f483
agent: python tool: return errors
Apr 10, 2024
ea0c31b
agent: ensure DATA_DIR exists
Apr 10, 2024
89dcc06
agent: mypy type fixes
Apr 10, 2024
0120f7c
agent: fix wait --std-tools
Apr 10, 2024
09c2565
grammars: early exit when no next_candidates to reject
Apr 21, 2024
00c709e
grammars: cache decoded tokens
Apr 21, 2024
8d503ef
grammars: faster llama_grammar_copy
Apr 21, 2024
b4a00ce
Merge branch 'gguf-read' into agent-example
Apr 27, 2024
7675ac6
Merge remote-tracking branch 'origin/master' into agent-example
Apr 30, 2024
312e20b
openai: update after merge
Apr 30, 2024
ca1a640
server: tool call grammar-constraints
May 2, 2024
2b2127c
agent: url params
May 2, 2024
e41b6ce
server: update tool calling, introduce system prompt for json schema
May 2, 2024
a1d64cf
openai: function call arguments must be returned stringified!
May 18, 2024
3f5a25f
Merge remote-tracking branch 'origin/master' into agent-example
May 18, 2024
5ea637e
openai: fix merge
May 21, 2024
6dadcd2
Merge remote-tracking branch 'origin/master' into agent-example
May 21, 2024
c8458fa
openai: make content optional for tool call grammar gen
May 22, 2024
a39e6e0
openai: pretty indent json response
May 22, 2024
793f4ff
agent: support OpenAI: --endpoint https://api.openai.com --auth "Bear…
May 22, 2024
a1c4aac
server: ultra basic tools, tool_choice, tool_calls support
May 22, 2024
298c098
Merge remote-tracking branch 'origin/master' into agent-example
Jun 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
server.py: pass all request options, comments in ts sigs, render tool…
… calls
  • Loading branch information
ochafik committed Apr 27, 2024
commit 5f3de16116db536fe33d0859a79ff96e4d4f9d7e
29 changes: 26 additions & 3 deletions examples/openai/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Dict, Literal, Optional, Union
from pydantic import BaseModel, Json
from pydantic import BaseModel, Json, TypeAdapter

class FunctionCall(BaseModel):
name: str
Expand Down Expand Up @@ -31,10 +31,33 @@ class ResponseFormat(BaseModel):
class ChatCompletionRequest(BaseModel):
model: str
tools: Optional[list[Tool]] = None
messages: list[Message]
messages: list[Message] = None
prompt: Optional[str] = None
response_format: Optional[ResponseFormat] = None
temperature: float = 1.0

stream: bool = False
cache_prompt: Optional[bool] = None
n_predict: Optional[int] = None
top_k: Optional[int] = None
top_p: Optional[float] = None
min_p: Optional[float] = None
tfs_z: Optional[float] = None
typical_p: Optional[float] = None
temperature: float = 1.0
dynatemp_range: Optional[float] = None
dynatemp_exponent: Optional[float] = None
repeat_last_n: Optional[int] = None
repeat_penalty: Optional[float] = None
frequency_penalty: Optional[float] = None
presense_penalty: Optional[float] = None
mirostat: Optional[bool] = None
mirostat_tau: Optional[float] = None
mirostat_eta: Optional[float] = None
penalize_nl: Optional[bool] = None
n_keep: Optional[int] = None
seed: Optional[int] = None
n_probs: Optional[int] = None
min_keep: Optional[int] = None

class Choice(BaseModel):
index: int
Expand Down
74 changes: 47 additions & 27 deletions examples/openai/prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def add_system_prompt(self, messages: list[Message], system_prompt: Message) ->
system_message = next(((i, m) for i, m in enumerate(messages) if m.role == "system"), None)
if system_message is not None:
(i, m) = system_message
return messages[:i] + [Message(role="system", content=m.content + '\n' + system_prompt.content)] + messages[i+1:]
return messages[:i] + [Message(role="system", content=system_prompt.content + '\n' + m.content)] + messages[i+1:]
else:
return [system_prompt] + messages

Expand All @@ -63,22 +63,32 @@ def render(self, messages: list[Message], add_generation_prompt: bool, omit_bos:
assert messages[i+1].role == 'user'
new_messages.append(Message(
role="user",
content=f'[SYS]{messages[i].content}[/SYS]\n{messages[i+1].content}'))
content=f'[SYS]{messages[i].content}[/SYS]\n{messages[i+1].content}'
))
i += 2
elif messages[i].role == 'assistant' and messages[i].tool_calls and messages[i].content:
tc = '\n'.join(f'<tool_call>{json.dumps(tc.model_dump())}</tool_call>' for tc in messages[i].tool_calls)
new_messages.append(Message(
role="assistant",
content=f'{messages[i].content}\n{tc}'
))
i += 1
else:
new_messages.append(messages[i])
i += 1
# print(f'new_messages={json.dumps(new_messages, indent=2)}')
messages = new_messages
# print(f'messages={messages}')

return self.template.render(
result = self.template.render(
messages=messages,
eos_token=self.eos_token,
bos_token='' if omit_bos else self.bos_token,
raise_exception=raise_exception,
add_generation_prompt=add_generation_prompt,
)
sys.stderr.write(f'\n# RENDERED:\n\n{result}\n\n')
return result

# While the API will be usable with a generic tools usage like OpenAI,
# (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models),
Expand Down Expand Up @@ -120,38 +130,29 @@ def make_tools_prompt(chat_format: ChatFormat, tools: list[Tool], indent=2) -> M
return Message(
role="system",
content='\n'.join([
'''You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.''',
# '''You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.''',
'''You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:''',
'''<tools>''',
*(json.dumps(tool.model_dump(), indent=indent) for tool in tools),
_tools_typescript_signatures(tools),
# _tools_schema_signatures(tools, indent=indent),
'''</tools>''',
'',
'''Use the following json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}''',
'',
# '''Use the following json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}''',
# '',
# '''For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:''',
'''To call each function, give its name and arguments within <tool_call></tool_call> XML tags as follows:''',
'''<tool_call>''',
'''{"arguments": <args-dict>, "name": <function-name>}''',
'''{"name": <function-name>, "arguments": <args-dict>}''',
'''</tool_call>''',
'''This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it.''',
# '''This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with <tool_call>...</tool_call>.''',
])
)

elif chat_format.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
ts_converter = SchemaToTypeScriptConverter()

return Message(
role="system",
content='\n'.join([
'// Supported function definitions that should be called when necessary.'
'namespace functions {',
*[
'// ' + tool.function.description.replace('\n', '\n// ') + '\n' + ''
'type ' + tool.function.name + ' = (_: ' + ts_converter.visit(tool.function.parameters) + ") => any;\n"
for tool in tools
],
'} // namespace functions',
])
content= '// Supported function definitions that should be called when necessary.\n' +
_tools_typescript_signatures(tools)
)

elif chat_format.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
Expand All @@ -170,6 +171,20 @@ def make_tools_prompt(chat_format: ChatFormat, tools: list[Tool], indent=2) -> M
else:
raise ValueError(f"Unsupported tool call style: {chat_format.tool_style}")

def _tools_typescript_signatures(tools: list[Tool]) -> str:
ts_converter = SchemaToTypeScriptConverter()
return 'namespace functions {' + '\n'.join(
'// ' + tool.function.description.replace('\n', '\n// ') + '\n' + ''
'type ' + tool.function.name + ' = (_: ' + ts_converter.visit(tool.function.parameters) + ") => any;\n"
for tool in tools
) + '} // namespace functions'

def _tools_schema_signatures(tools: list[Tool], indent=None) -> str:
return '\n'.join(
json.dumps(tool.model_dump(), indent=indent)
for tool in tools
)

@typechecked
def _outputs_tool_call_tags(style: ToolsPromptStyle) -> bool:
return style in (
Expand Down Expand Up @@ -199,6 +214,8 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
assert planted_prompt.startswith(empty_prompt), f"Planted prompt does not start with empty prompt: {planted_prompt} vs {empty_prompt}"
[prefix, suffix] = planted_prompt[len(empty_prompt):].split(delimiter)

allow_parallel_calls = False

def strip_suffix(s: str) -> str:
if s.endswith(suffix):
return s[:-len(suffix)]
Expand Down Expand Up @@ -235,17 +252,19 @@ def format_literal(s: str) -> str:

tool_call_rule = converter._add_rule(
'tool_call',
format_literal("<tool_call>") + " (" +
format_literal("<tool_call>") + " space (" +
' | '.join(tool_rules) +
") " + format_literal("</tool_call>"))
") space " + format_literal("</tool_call>"))# + ' space')

# Ideally we'd want a negative lookahead of /<tool\\?_call>/, but it's just too hard to express in GBNF for now.
# So we just over-constrain the content rule to not contain literals dangerously getting close to <tool_call>
content_rule = converter._add_rule('content', '[^<] | "<" [^t<]? | "<t" [^o<]?')
content_rule = converter._add_rule('content', '[^<] | "<" [^t<] | "<t" [^o<]')
# content_rule = converter._add_rule('content', converter.not_literal('<tool_call>'))
converter._add_rule(
'root',
f'{content_rule}* ({tool_call_rule}+ {content_rule}*)?')
# tool_call_rule)
f'{content_rule}* ({tool_call_rule}+ {content_rule}*)?' if allow_parallel_calls \
else f'{content_rule}* {tool_call_rule}?')

# # Constrain the output to be a non-tool-call message (constrained to a JSON schema or not)
# #Β OR a tool-call message respecting the schema of any of the tools
Expand Down Expand Up @@ -285,7 +304,7 @@ def parse(s: str) -> Optional[Message]:
id=gen_callid(),
function=FunctionCall(**fc)))

content = '(...)'.join(content).strip()
content = '\n'.join(content).strip()
return Message(role="assistant", content=content if content else None, tool_calls=tool_calls)

# if '<tool_call>'.startswith(ls) or ls.startswith('<tool_call>'):
Expand Down Expand Up @@ -338,7 +357,8 @@ def parse(s: str) -> Optional[Message]:
converter._add_rule(
'root',
f'{content_without_start_rule} {content_rule}* ({tool_call_rule}+ {content_rule}*)? | '
f'{tool_call_without_start_rule} {tool_call_rule}* {content_rule}*')
f'{tool_call_without_start_rule} {tool_call_rule}* {content_rule}*' if allow_parallel_calls \
else f'{content_without_start_rule} {tool_call_rule}? | {tool_call_without_start_rule}')

# converter._add_rule(
# "root",
Expand Down
32 changes: 23 additions & 9 deletions examples/openai/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ def main(
async def chat_completions(request: Request, chat_request: ChatCompletionRequest):
headers = {
"Content-Type": "application/json",
"Authorization": request.headers.get("Authorization"),
}
if (auth := request.headers.get("Authorization")):
headers["Authorization"] = auth

if chat_request.response_format is not None:
assert chat_request.response_format.type == "json_object", f"Unsupported response format: {chat_request.response_format.type}"
Expand All @@ -75,18 +76,31 @@ async def chat_completions(request: Request, chat_request: ChatCompletionRequest
(grammar, parser) = make_grammar(chat_format, chat_request.tools, response_schema)

# TODO: Test whether the template supports formatting tool_calls
sys.stderr.write(f'\n{grammar}\n\n')

prompt = chat_format.render(messages, add_generation_prompt=True)

sys.stderr.write(f'\n# PROMPT:\n\n{prompt}\n\n')
sys.stderr.write(f'\n# GRAMMAR:\n\n{grammar}\n\n')

data = LlamaCppServerCompletionRequest(
**{
k: v
for k, v in chat_request.model_dump().items()
if k not in (
"prompt",
"tools",
"messages",
"response_format",
)
},
prompt=prompt,
grammar=grammar,
).model_dump()
sys.stderr.write(json.dumps(data, indent=2) + "\n")
async with httpx.AsyncClient() as client:
response = await client.post(
f"{cpp_server_endpoint}/completions",
json=LlamaCppServerCompletionRequest(
prompt=prompt,
stream=chat_request.stream,
n_predict=1000,
grammar=grammar,
).model_dump(),
json=data,
headers=headers,
timeout=None)

Expand All @@ -96,11 +110,11 @@ async def chat_completions(request: Request, chat_request: ChatCompletionRequest
return StreamingResponse(generate_chunks(response), media_type="text/event-stream")
else:
result = response.json()
sys.stderr.write("# RESULT:\n\n" + json.dumps(result, indent=2) + "\n\n")
if 'content' not in result:
# print(json.dumps(result, indent=2))
return JSONResponse(result)

sys.stderr.write(json.dumps(result, indent=2) + "\n")
# print(json.dumps(result.get('content'), indent=2))
message = parser(result["content"])
assert message is not None, f"Failed to parse response:\n{response.text}\n\n"
Expand Down
13 changes: 11 additions & 2 deletions examples/openai/ts_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,21 @@ class SchemaToTypeScriptConverter:
# // where to get weather.
# location: string,
# }) => any;
def _desc_comment(self, schema: dict):
desc = schema.get("description", "").replace("\n", "\n// ") if 'description' in schema else None
return f'// {desc}\n' if desc else ''

def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], additional_properties: Union[bool, Any]):
if additional_properties == True:
additional_properties = {}
elif additional_properties == False:
additional_properties = None

return "{" + ', '.join([
f'{prop_name}{"" if prop_name in required else "?"}: {self.visit(prop_schema)}'
f'{self._desc_comment(prop_schema)}{prop_name}{"" if prop_name in required else "?"}: {self.visit(prop_schema)}'
for prop_name, prop_schema in properties
] + (
[f"[key: string]: {self.visit(additional_properties)}"]
[f"{self._desc_comment(additional_properties) if additional_properties else ''}[key: string]: {self.visit(additional_properties)}"]
if additional_properties is not None else []
)) + "}"

Expand Down