agent: --style

ggml-org · ochafik · Apr 27, 2024 · Apr 8, 2024 · Mar 25, 2024 · Mar 26, 2024
commit e0c8af4ba06cbbaef7fa7c3facb9345dfd3a438b
diff --git a/examples/agent/README.md b/examples/agent/README.md
@@ -8,7 +8,6 @@ python -m examples.agent \
     --tools examples/agent/tools/example_math_tools.py \
     --goal "What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?"
 ```
-<!-- --format float \ -->
 
 <details>
 <summary>Show output</summary>
@@ -37,6 +36,23 @@ python -m examples.agent \
 <summary>Show output</summary>
 
 ```bash
+💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
+⚙️  get_current_weather(location=San Francisco, format=fahrenheit) -> ...
+💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
+⚙️  get_n_day_weather_forecast(location=San Francisco, format=fahrenheit, num_days=4) -> ...
+💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
+⚙️  get_n_day_weather_forecast(location=Glasgow, format=celsius, num_days=4) -> ...
+The current weather in San Francisco is sunny and 87.8F. Here is the 4-day weather forecast:
+
+For San Francisco:
+- In 1 day: Cloudy, 60.8F
+- In 2 days: Sunny, 73.4F
+- In 3 days: Cloudy, 62.6F
+
+For Glasgow:
+- In 1 day: Cloudy, 16C
+- In 2 days: Sunny, 23C
+- In 3 days: Cloudy, 17C
 ```
 
 </details>

diff --git a/examples/agent/agent.py b/examples/agent/agent.py
@@ -12,6 +12,7 @@
 from examples.agent.tools.std_tools import StandardTools
 from examples.openai.api import ChatCompletionRequest, ChatCompletionResponse, Message, Tool, ToolFunction
 from examples.agent.utils import collect_functions, load_module
+from examples.openai.prompting import ToolsPromptStyle
 
 def _get_params_schema(fn: Callable, verbose):
     converter = SchemaConverter(prop_order={}, allow_fetch=False, dotall=False, raw_pattern=False)
@@ -130,6 +131,7 @@ def main(
     auth: Optional[str] = None,
     parallel_calls: Optional[bool] = True,
     verbose: bool = False,
+    style: Optional[ToolsPromptStyle] = None,
 
     model: Annotated[Optional[Path], typer.Option("--model", "-m")] = "models/7B/ggml-model-f16.gguf",
     endpoint: Optional[str] = None,
@@ -175,8 +177,8 @@ def main(
             "--model", model,
             *(['--verbose'] if verbose else []),
             *(['--parallel-calls'] if parallel_calls else []),
-            *(['--context-length={context_length}'] if context_length else []),
-            *([])
+            *([f'--context-length={context_length}'] if context_length else []),
+            *([f'--style={style.value}'] if style else []),
         ]
         server_process = subprocess.Popen(cmd, stdout=sys.stderr)
         atexit.register(server_process.kill)
@@ -196,7 +198,7 @@ def main(
     if std_tools:
         tool_functions.extend(collect_functions(StandardTools))
 
-    response_model = None#str
+    response_model = str
     if format:
         if format in types:
             response_model = types[format]
@@ -245,6 +247,7 @@ def main(
         }]
     )
     print(result if response_model else f'➡️ {result}')
+    # exit(0)
 
 if __name__ == '__main__':
     typer.run(main)

diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py
@@ -14,45 +14,45 @@
 from examples.openai.gguf_kvs import GGUFKeyValues, Keys
 from examples.openai.ts_converter import SchemaToTypeScriptConverter
 
+_THOUGHT_KEY = "thought"
+# _THOUGHT_KEY = "thought_about_next_step_only"
+
 # While the API will be usable with a generic tools usage like OpenAI,
 # (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models),
 # each model may need specific prompting (and/or constrained output,
 # especially for models not fine-tuned for tool usage / function calling).
-class ToolsPromptStyle(Enum):
+class ToolsPromptStyle(str, Enum):
     # Short prompt w/ <tools>schemas</tools>, <tool_call>...</tool_call> output
-    TOOLS_SHORT = 1
+    TOOLS_SHORT = "short"
 
     # Longer prompt w/ <tools>schemas</tools>, <tool_call>...</tool_call> output
-    TOOLS_LONG = 2
+    TOOLS_LONG = "long"
 
     # Bespoke constrained output format that favours thought and reasoning
     # while allowing unambiguous parsing of parallel tool calling.
-    TOOLS_BESPOKE = 3
+    TOOLS_CONSTRAINED = "thoughtful_steps"
 
     # Large prompt for https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
     # <tool_call>...</tool_call> output
     # Requires:
     # - git clone https://github.com/NousResearch/Hermes-Function-Calling examples/openai/hermes_function_calling
     # - Set large context length as their prompts are super long
-    TOOLS_HERMES_2_PRO = 4
+    TOOLS_HERMES_2_PRO = "tools_hermes_2_pro"
 
     # Seems to want to escape underscores in tool names and in the <tool\_call>...</tool\_call> tags
-    TOOLS_MISTRAL = 5
+    TOOLS_MIXTRAL = "mixtral"
 
     # Short prompt w/ TypeScript definitions for https://github.com/MeetKai/functionary
     # https://github.com/MeetKai/functionary/blob/main/functionary/prompt_template/prompt_template_v2.py
     # Note: see this prior attempt to support Functionary: https://github.com/ggerganov/llama.cpp/pull/5695
-    TYPESCRIPT_FUNCTIONARY_V2 = 6
+    TYPESCRIPT_FUNCTIONARY_V2 = "functionary_v2"
 
 def raise_exception(msg: str):
     raise Exception(msg)
 
 class ChatTemplate(BaseModel):
     template: str
-
-    @property
-    def tool_style(self) -> 'ToolsPromptStyle':
-        return self._tool_style
+    inferred_tool_style: Optional['ToolsPromptStyle'] = None
 
     def __init__(self, template: str, eos_token: str, bos_token: str):
         super().__init__(template=template
@@ -65,12 +65,12 @@ def __init__(self, template: str, eos_token: str, bos_token: str):
         self._strict_user_assistant_alternation = "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception" in template
 
         if "<|recipient|>' + tool_call['function']['name']" in template:
-            self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
+            self.inferred_tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
         else:
-            self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
-            # self._tool_style = ToolsPromptStyle.TOOLS_LONG
-            # self._tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
-            # self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL
+            self.inferred_tool_style = ToolsPromptStyle.TOOLS_CONSTRAINED
+            # self.inferred_tool_style = ToolsPromptStyle.TOOLS_LONG
+            # self.inferred_tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
+            # self.inferred_tool_style = ToolsPromptStyle.TOOLS_MIXTRAL
 
         # TODO: Test whether the template supports formatting tool_calls
 
@@ -399,7 +399,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
         "type": "object",
         "properties": {
             # "original_goal": {"title": "Original Goal", "type": "string"},
-            "thought_about_next_step_only": {
+            _THOUGHT_KEY: {
                 "title": "Thought about next step",
                 # "title": "Thought about how the next step brings us closer to achieving the original goal",
                 "type": "string"
@@ -430,7 +430,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
                 ]
             },
         },
-        "required": ["original_goal", "thought_about_next_step_only", "next_step"]
+        "required": ["original_goal", _THOUGHT_KEY, "next_step"]
         # "required": ["next_step"]
     }
 
@@ -505,7 +505,7 @@ def parse(self, s: str) -> Optional[Message]:
         elif 'tool_calls' in next_step:
             return Message(
                 role="assistant",
-                content=data["thought_about_next_step_only"] if "thought_about_next_step_only" in data else None,
+                content=data.get(_THOUGHT_KEY),
                 tool_calls=[
                     ToolCall(id=gen_callid(), function=FunctionCall(**tc))
                     for tc in next_step['tool_calls']
@@ -539,20 +539,28 @@ def parse(self, s: str) -> Optional[Message]:
     # 'This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with <tool_call>...</tool_call>.''',
 ])
 
-def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool) -> ChatHandler:
+def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool, tool_style: Optional[ToolsPromptStyle] = None) -> ChatHandler:
+    tool_style = tool_style or args.chat_template.inferred_tool_style
+
     if not args.tools:
         return NoToolsChatHandler(args)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
+
+    elif tool_style == ToolsPromptStyle.TOOLS_CONSTRAINED:
+        return BespokeToolsChatHandler(args, parallel_calls=parallel_calls)
+
+    elif tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
         return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_SHORT:
+
+    elif tool_style == ToolsPromptStyle.TOOLS_SHORT:
         return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_LONG:
+
+    elif tool_style == ToolsPromptStyle.TOOLS_LONG:
         return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_MISTRAL:
+
+    elif tool_style == ToolsPromptStyle.TOOLS_MIXTRAL:
         return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_BESPOKE:
-        return BespokeToolsChatHandler(args, parallel_calls=parallel_calls)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
+
+    elif tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
         return Hermes2ProToolsChatHandler(args)
     else:
         raise ValueError(f"Unsupported tool call style: {args.chat_template.tool_style}")

diff --git a/examples/openai/server.py b/examples/openai/server.py
@@ -12,7 +12,7 @@
 from examples.openai.llama_cpp_server_api import LlamaCppServerCompletionRequest
 from examples.openai.gguf_kvs import GGUFKeyValues, Keys
 from examples.openai.api import ChatCompletionResponse, Choice, Message, ChatCompletionRequest, Usage
-from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, get_chat_handler, ChatHandler
+from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, ToolsPromptStyle, get_chat_handler, ChatHandler
 
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
@@ -32,6 +32,7 @@ def main(
     host: str = "localhost",
     port: int = 8080,
     parallel_calls: Optional[bool] = True,
+    style: Optional[ToolsPromptStyle] = None,
     auth: Optional[str] = None,
     verbose: bool = False,
     context_length: Optional[int] = None,
@@ -92,7 +93,8 @@ async def chat_completions(request: Request, chat_request: ChatCompletionRequest
 
         chat_handler = get_chat_handler(
             ChatHandlerArgs(chat_template=chat_template, response_schema=response_schema, tools=chat_request.tools),
-            parallel_calls=parallel_calls
+            parallel_calls=parallel_calls,
+            tool_style=style,
         )
 
         messages = chat_request.messages