Added self-correcting assistant functionality to diagnose, generate new SQL, and retry failed queries.y

disler · disler · commit 6b8e1c89451f · 2023-11-25T12:57:38.000-06:00
diff --git a/api-server/api/index.py b/api-server/api/index.py
@@ -2,10 +2,12 @@
 from flask import Flask, Request, Response, jsonify, request, make_response
 import dotenv
 from modules import db, llm, emb, instruments
+from modules.turbo4 import Turbo4
 
 import os
 
 from modules.models import TurboTool
+from psycopg2 import Error as PostgresError
 
 app = Flask(__name__)
 
@@ -34,17 +36,97 @@ def make_cors_response():
     return response
 
 
+# ---------------- Self Correcting Assistant ----------------
+
+
+def self_correcting_assistant(
+    db: db.PostgresManager,
+    agent_instruments: instruments.AgentInstruments,
+    tools: TurboTool,
+    error: PostgresError,
+):
+    # reset db - to unblock transactions
+    db.roll_back()
+
+    all_table_definitions = db.get_table_definitions_for_prompt()
+
+    print(f"Loaded all table definitions")
+
+    # ------ File prep
+
+    file_path = agent_instruments.self_correcting_table_def_file
+
+    # write all_table_definitions to file
+    with open(file_path, "w") as f:
+        f.write(all_table_definitions)
+
+    files_to_upload = [file_path]
+
+    sql_query = open(agent_instruments.sql_query_file).read()
+
+    # ------ Prompts
+
+    output_file_path = agent_instruments.run_sql_results_file
+
+    diagnosis_prompt = f"Given the table_definitions.sql file, the following SQL_ERROR, and the SQL_QUERY, describe the most likely cause of the error. Think step by step.\n\nSQL_ERROR: {error}\n\nSQL_QUERY: {sql_query}"
+
+    generation_prompt = (
+        f"Based on your diagnosis, generate a new SQL query that will run successfully."
+    )
+
+    run_sql_prompt = "Use the run_sql function to run the SQL you've just generated."
+
+    assistant_name = "SQL Self Correction"
+
+    turbo4_assistant = Turbo4().get_or_create_assistant(assistant_name)
+
+    print(f"Generated Assistant: {assistant_name}")
+
+    file_ids = turbo4_assistant.upsert_files(files_to_upload)
+
+    print(f"Uploaded files: {file_ids}")
+
+    print(f"Running Self Correction Assistant...")
+
+    (
+        turbo4_assistant.set_instructions(
+            "You're an elite SQL developer. You generate the most concise and performant SQL queries. You review failed queries and generate new SQL queries to fix them."
+        )
+        .enable_retrieval()
+        .equip_tools(tools)
+        .make_thread()
+        # 1/3 STEP PATTERN: diagnose
+        .add_message(diagnosis_prompt, file_ids=file_ids)
+        .run_thread()
+        .spy_on_assistant(agent_instruments.make_agent_chat_file(assistant_name))
+        # 2/3 STEP PATTERN: generate
+        .add_message(generation_prompt)
+        .run_thread()
+        .spy_on_assistant(agent_instruments.make_agent_chat_file(assistant_name))
+        # 3/3 STEP PATTERN: execute
+        .add_message(run_sql_prompt)
+        .run_thread(toolbox=[tools[0].name])
+        .spy_on_assistant(agent_instruments.make_agent_chat_file(assistant_name))
+        # clean up, logging, reporting, cost
+        .run_validation(agent_instruments.validate_file_exists(output_file_path))
+        .spy_on_assistant(agent_instruments.make_agent_chat_file(assistant_name))
+        .get_costs_and_tokens(agent_instruments.make_agent_cost_file(assistant_name))
+    )
+
+    pass
+
+
 # ---------------- Primary Endpoint ----------------
 
 
 @app.route("/prompt", methods=["POST", "OPTIONS"])
 def prompt():
-    response = make_cors_response()
-
     # Set CORS headers for the main request
+    response = make_cors_response()
     if request.method == "OPTIONS":
         return response
 
+    # Get access to db, state, and functions
     with instruments.PostgresAgentInstruments(DB_URL, "prompt-endpoint") as (
         agent_instruments,
         db,
@@ -59,7 +141,10 @@ def prompt():
         )
 
         if len(similar_tables) == 0:
-            return jsonify({"error": "No similar tables found."})
+            print(f"No similar tables found for prompt: {base_prompt}")
+            response.status_code = 400
+            response.data = "No similar tables found."
+            return response
 
         print("similar_tables", similar_tables)
 
@@ -73,7 +158,7 @@ def prompt():
             similar_tables,
         )
 
-        # ---------------- Run Data Team - Generate SQL & Results ----------------
+        # ---------------- Run 2 Agent Team - Generate SQL & Results ----------------
 
         tools = [
             TurboTool("run_sql", llm.run_sql_tool_config, agent_instruments.run_sql),
@@ -84,14 +169,24 @@ def prompt():
             model="gpt-4-1106-preview",
             instructions="You're an elite SQL developer. You generate the most concise and performant SQL queries.",
         )
-        llm.prompt_func(
-            "Use the run_sql function to run the SQL you've just generated: "
-            + sql_response,
-            model="gpt-4-1106-preview",
-            instructions="You're an elite SQL developer. You generate the most concise and performant SQL queries.",
-            turbo_tools=tools,
-        )
-        agent_instruments.validate_run_sql()
+        try:
+            llm.prompt_func(
+                "Use the run_sql function to run the SQL you've just generated: "
+                + sql_response,
+                model="gpt-4-1106-preview",
+                instructions="You're an elite SQL developer. You generate the most concise and performant SQL queries.",
+                turbo_tools=tools,
+            )
+            agent_instruments.validate_run_sql()
+        except PostgresError as e:
+            print(
+                f"Received PostgresError -> Running Self Correction Team To Resolve: {e}"
+            )
+
+            # ---------------- Run Self Correction Team - Diagnosis, Generate New SQL, Retry ----------------
+            self_correcting_assistant(db, agent_instruments, tools, e)
+
+            print(f"Self Correction Team Complete.")
 
         # ---------------- Read result files and respond ----------------
 
diff --git a/api-server/api/modules/db.py b/api-server/api/modules/db.py
@@ -162,3 +162,6 @@ def get_related_tables(self, table_list, n=2):
         related_tables_list = list(set(related_tables_list))
 
         return related_tables_list
+
+    def roll_back(self):
+        self.conn.rollback()
diff --git a/api-server/api/modules/instruments.py b/api-server/api/modules/instruments.py
@@ -1,3 +1,4 @@
+import json
 from modules.db import PostgresManager
 from modules import file
 import os
@@ -112,12 +113,20 @@ def run_sql_results_file(self):
     def sql_query_file(self):
         return self.get_file_path("sql_query.sql")
 
+    @property
+    def self_correcting_table_def_file(self):
+        return self.get_file_path("table_definitions.sql")
+
     # -------------------------- Agent Functions -------------------------- #
 
     def run_sql(self, sql: str) -> str:
         """
         Run a SQL query against the postgres database
         """
+
+        with open(self.sql_query_file, "w") as f:
+            f.write(sql)
+
         results_as_json = self.db.run_sql(sql)
 
         fname = self.run_sql_results_file
@@ -126,9 +135,6 @@ def run_sql(self, sql: str) -> str:
         with open(fname, "w") as f:
             f.write(results_as_json)
 
-        with open(self.sql_query_file, "w") as f:
-            f.write(sql)
-
         return "Successfully delivered results to json file"
 
     def validate_run_sql(self):
@@ -171,3 +177,10 @@ def validate_innovation_files(self):
                     return False, f"File {fname} is empty"
 
         return True, ""
+
+    def validate_file_exists(self, file: str):
+        def file_exists():
+            if not os.path.exists(file):
+                raise Exception(f"File {file} does not exist")
+
+        return file_exists
diff --git a/api-server/api/modules/models.py b/api-server/api/modules/models.py
@@ -1,4 +1,5 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+import time
 from typing import Callable
 
 
@@ -7,3 +8,11 @@ class TurboTool:
     name: str
     config: dict
     function: Callable
+
+
+@dataclass
+class Chat:
+    from_name: str
+    to_name: str
+    message: str
+    created: int = field(default_factory=time.time)