Added chat function along with token counting

zabirauf · zabirauf · commit bd4c33d7c9c7 · 2023-04-12T00:55:54.000-07:00
diff --git a/app/commandPlugins/TaskCompleteCommandPlugins.ts b/app/commandPlugins/TaskCompleteCommandPlugins.ts
@@ -0,0 +1,19 @@
+import { CommandPlugin } from "./CommandPlugin";
+
+export let taskComplete = false;
+
+const TaskCompleteCommandPlugins: CommandPlugin[] = [
+  {
+    command: "task_complete",
+    name: "Task Complete (Shutdown)",
+    arguments: {
+      reason: "reason",
+    },
+    execute: async (args) => {
+      taskComplete = true;
+      console.debug("Task complete", args["reason"]);
+      return "Thank you";
+    },
+  },
+];
+export default TaskCompleteCommandPlugins;
diff --git a/app/commandPlugins/index.ts b/app/commandPlugins/index.ts
@@ -1,11 +1,13 @@
 import AgentCommandPlugins from "./AgentCommandPlugins";
 import MemoryCommandPlugins from "./MemoryCommandPlugins";
 import FileOperationCommandPlugins from "./FileOperationCommandPlugins";
+import TaskCompleteCommandPlugins from "./TaskCompleteCommandPlugins";
 
 export const CommandPlugins = [
   ...MemoryCommandPlugins,
   ...AgentCommandPlugins,
-  ...FileOperationCommandPlugins
+  ...FileOperationCommandPlugins,
+  ...TaskCompleteCommandPlugins
 ];
 
 export async function executeCommand(
diff --git a/app/utils/asserts.ts b/app/utils/asserts.ts
@@ -0,0 +1,3 @@
+export function assertNever(arg: never) {
+    throw new Error("Type not handled");
+}
diff --git a/app/utils/chat.ts b/app/utils/chat.ts
@@ -0,0 +1,100 @@
+import { callLLMChatCompletion, LLMMessage } from "./llmUtils";
+import { Config } from "./config";
+import { countMessageTokens } from "./tokenCounter";
+
+interface ChatWithAiArgs {
+  prompt: string;
+  userInput: string;
+  fullMessageHistory: LLMMessage[];
+  permanentMemory: string[];
+  tokenLimit: number;
+  debug?: boolean;
+}
+
+export async function chatWithAI({
+  prompt,
+  userInput,
+  fullMessageHistory,
+  permanentMemory: permanentMemory,
+  tokenLimit: tokenLimit,
+  debug = false,
+}: ChatWithAiArgs): Promise<string> {
+  while (true) {
+    try {
+      const model = Config.fast_llm_model;
+      const sendTokenLimit = tokenLimit - 1000;
+
+      const currentContext: LLMMessage[] = [
+        { role: "system", content: prompt },
+        { role: "system", content: `Permanent memory: ${permanentMemory}` },
+      ];
+
+      let nextMessageToAddIndex = fullMessageHistory.length - 1;
+      let currentTokensUsed = 0;
+      const insertionIndex = currentContext.length;
+
+      currentTokensUsed = countMessageTokens(currentContext, model);
+      currentTokensUsed += countMessageTokens(
+        [{ role: "user", content: userInput }],
+        model
+      );
+
+      while (nextMessageToAddIndex >= 0) {
+        const messageToAdd = fullMessageHistory[nextMessageToAddIndex];
+        const tokensToAdd = countMessageTokens([messageToAdd], model);
+
+        if (currentTokensUsed + tokensToAdd > sendTokenLimit) {
+          break;
+        }
+
+        currentContext.splice(
+          insertionIndex,
+          0,
+          fullMessageHistory[nextMessageToAddIndex]
+        );
+        currentTokensUsed += tokensToAdd;
+        nextMessageToAddIndex -= 1;
+      }
+
+      currentContext.push({ role: "user", content: userInput });
+      const tokensRemaining = tokenLimit - currentTokensUsed;
+
+      if (debug) {
+        console.log(`Token limit: ${tokenLimit}`);
+        console.log(`Send Token Count: ${currentTokensUsed}`);
+        console.log(`Tokens remaining for response: ${tokensRemaining}`);
+        console.log("------------ CONTEXT SENT TO AI ---------------");
+        for (const message of currentContext) {
+          if (message.role === "system" && message.content === prompt) {
+            continue;
+          }
+          console.log(
+            `${message.role.charAt(0).toUpperCase() + message.role.slice(1)}: ${
+              message.content
+            }`
+          );
+          console.log();
+        }
+        console.log("----------- END OF CONTEXT ----------------");
+      }
+
+      const assistantReply = await callLLMChatCompletion(
+        currentContext,
+        model,
+        undefined /* temperature */,
+        tokensRemaining
+      );
+
+      fullMessageHistory.push({ role: "user", content: userInput });
+      fullMessageHistory.push({
+        role: "assistant",
+        content: assistantReply,
+      });
+
+      return assistantReply;
+    } catch (error) {
+      console.error("Error calling chat", error);
+      throw error;
+    }
+  }
+}
diff --git a/app/utils/prompt.ts b/app/utils/prompt.ts
@@ -0,0 +1,57 @@
+import { CommandPlugins } from "../commandPlugins";
+
+export function generatePrompt() {
+  const commandsStr = CommandPlugins.map((commandPlugin, index) => {
+    const argsStr = Object.entries(commandPlugin)
+      .map(([key, val]) => `"${key}": "<${val}>"`)
+      .join(", ");
+    return `${index + 1}. ${commandPlugin.name}: "${
+      commandPlugin.command
+    }", args: ${argsStr}`;
+  }).join("\n");
+
+  return `
+CONSTRAINTS:
+
+1. ~4000 word limit for memory. Your memory is short, so immidiately save important information to long term memory and code to files.
+2. No user assistance
+
+COMMANDS:
+
+${commandsStr}
+
+RESOURCES:
+
+1. Long Term memory management.
+2. GPT-3.5 powered Agents for delegation of simple tasks.
+3. File output.
+
+PERFORMANCE EVALUATION:
+
+1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities. 
+2. Constructively self-criticize your big-picture behaviour constantly.
+3. Reflect on past decisions and strategies to refine your approach.
+4. Every command has a cost, so be smart and efficent. Aim to complete tasks in the least number of steps.
+
+You should only respond in JSON format as described below
+
+RESPONSE FORMAT:
+{
+    "command": {
+        "name": "command name",
+        "args":{
+            "arg name": "value"
+        }
+    },
+    "thoughts":
+    {
+        "text": "thought",
+        "reasoning": "reasoning",
+        "plan": "- short bulleted\n- list that conveys\n- long-term plan",
+        "criticism": "constructive self-criticism",
+        "speak": "thoughts summary to say to user"
+    }
+}
+
+Ensure the response can be parsed by Python json.loads`;
+}
diff --git a/app/utils/tokenCounter.ts b/app/utils/tokenCounter.ts
@@ -0,0 +1,44 @@
+import { encoding_for_model } from "@dqbd/tiktoken";
+import type { LLMMessage, LLMModel } from "./llmUtils";
+import type { TiktokenModel } from "@dqbd/tiktoken";
+import { assertNever } from "./asserts";
+
+export function countMessageTokens(messages: LLMMessage[], model: LLMModel): number {
+    const encoding = encoding_for_model(model as TiktokenModel);
+
+    let tokensPerMessage = 0;
+    let tokensPerName = 0;
+
+    if (model === "gpt-3.5-turbo") {
+        return countMessageTokens(messages, "gpt-3.5-turbo-0301");
+    } else if (model === "gpt-4") {
+        return countMessageTokens(messages, "gpt-4-0314");
+    } else if (model === "gpt-3.5-turbo-0301") {
+        tokensPerMessage = 4;
+        tokensPerName = -1;
+    } else if (model === "gpt-4-0314") {
+        tokensPerMessage = 3;
+        tokensPerName = 1;
+    } else {
+        assertNever(model);
+    }
+
+    let numTokens = 0;
+    for(const message of messages) {
+        numTokens += tokensPerMessage;
+        for(const [key, val] of Object.entries(message)) {
+            numTokens += encoding.encode(val).length;
+            if(key === "name") {
+                numTokens += tokensPerName;
+            }
+        }
+    }
+
+    numTokens += 3 // every reply is primed with <|start|>assistant<|message|>
+    return numTokens;
+}
+
+export function countStringTokens(str: string, model: LLMModel): number {
+    const encoding = encoding_for_model(model as TiktokenModel);
+    return encoding.encode(str).length;
+}
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -29,6 +29,7 @@
     "/public/build"
   ],
   "dependencies": {
+    "@dqbd/tiktoken": "^1.0.2",
     "@prisma/client": "^4.11.0",
     "@remix-run/node": "^1.15.0",
     "@remix-run/react": "^1.15.0",

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+export function assertNever(arg: never) {`
	`2`	`+ throw new Error("Type not handled");`
	`3`	`+}`