bagofwords1
diff --git a/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 12 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/alembic/versions/p1q2r3s4t5u6_add_supports_vision_to_llm_models.py‎
Lines changed: 30 additions & 0 deletions b/‎backend/alembic/versions/p1q2r3s4t5u6_add_supports_vision_to_llm_models.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎backend/app.db‎ b/‎backend/app.db‎
diff --git a/‎backend/app/ai/agent_v2.py‎
Lines changed: 48 additions & 5 deletions b/‎backend/app/ai/agent_v2.py‎
Lines changed: 48 additions & 5 deletions
diff --git a/‎backend/app/ai/agents/planner/planner_v2.py‎
Lines changed: 1 addition & 0 deletions b/‎backend/app/ai/agents/planner/planner_v2.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/app/ai/agents/planner/prompt_builder.py‎
Lines changed: 16 additions & 1 deletion b/‎backend/app/ai/agents/planner/prompt_builder.py‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎backend/app/ai/context/builders/message_context_builder.py‎
Lines changed: 46 additions & 0 deletions b/‎backend/app/ai/context/builders/message_context_builder.py‎
Lines changed: 46 additions & 0 deletions
@@ -1,5 +1,11 @@
 # Release Notes
 
+## Version 0.0.306 (January 26, 2026)
+- **New Interactive Dashboards**: Dashboards are now generated as executable React/HTML code, enabling rich interactivity, custom styling, and dynamic visualizations
+- **Visual Feedback**: Upload screenshots or images with your prompts to show the AI exactly what you want—perfect for requesting design tweaks or pointing out issues
+- Dashboard validation now includes automatic screenshot capture, allowing the AI to visually verify the output before finalizing
+- Added vision model support for OpenAI, Anthropic, and Google Gemini LLM providers
+
 ## Version 0.0.305 (January 24, 2026)
 - **Rebuilt Dashboards**: Now fully AI-generated as executable code (React/HTML) with iterative refinement based on conversation history
 - Fixed @ mention detection in prompt input (no longer triggers inside existing mentions)
 
@@ -30,6 +30,9 @@ ENV PATH="/opt/venv/bin:$PATH"
 RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
     python3 -m pip install --no-cache-dir --prefer-binary -r requirements_versioned.txt
 
+# Install Playwright browser (chromium only to save space)
+RUN playwright install chromium --with-deps
+
 FROM ubuntu:24.04 AS frontend-builder
 
 ENV DEBIAN_FRONTEND=noninteractive
@@ -93,12 +96,21 @@ COPY --from=backend-builder --chown=app:app /opt/venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 COPY --from=backend-builder --chown=app:app /app/backend /app/backend
 
+# Copy Playwright browser binaries from builder
+COPY --from=backend-builder --chown=app:app /root/.cache/ms-playwright /home/app/.cache/ms-playwright
+
+# Install Playwright system dependencies (runtime libs only, no browser download)
+RUN playwright install-deps chromium
+
 # Copy demo data sources (SQLite/DuckDB files for demo databases)
 COPY --chown=app:app ./backend/demo-datasources /app/backend/demo-datasources
 
 # Copy only the built Nuxt output to keep the image small
 COPY --from=frontend-builder --chown=app:app /app/frontend/.output /app/frontend/.output
 
+# Copy sandbox HTML for artifact validation (used by headless browser)
+COPY --from=frontend-builder --chown=app:app /app/frontend/public/artifact-sandbox.html /app/frontend/public/artifact-sandbox.html
+
 # Copy runtime configs and scripts
 COPY --chown=app:app ./backend/requirements_versioned.txt /app/backend/
 
 
@@ -1 +1 @@
-0.0.305
+0.0.306
@@ -0,0 +1,30 @@
+"""add supports_vision to llm_models
+
+Revision ID: p1q2r3s4t5u6
+Revises: o0p1q2r3s4t5
+Create Date: 2025-01-25 12:00:00.000000
+
+Adds supports_vision column to llm_models table to indicate whether a model accepts image inputs.
+"""
+from typing import Sequence, Union
+from sqlalchemy import false, true
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'p1q2r3s4t5u6'
+down_revision: Union[str, None] = 'o0p1q2r3s4t5'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    with op.batch_alter_table('llm_models', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('supports_vision', sa.Boolean(), nullable=False, server_default=true()))
+
+
+def downgrade() -> None:
+    with op.batch_alter_table('llm_models', schema=None) as batch_op:
+        batch_op.drop_column('supports_vision')
@@ -35,6 +35,7 @@
 from app.core.telemetry import telemetry
 from app.ai.utils.token_counter import count_tokens
 from app.services.instruction_usage_service import InstructionUsageService
+from app.ai.llm.types import ImageInput
 
 INDEX_LIMIT = 1000  # Number of tables to include in the index
 
@@ -69,11 +70,15 @@ def __init__(self, db=None, organization=None, organization_settings=None, repor
             # Handle case where data_sources or files might be None
             self.data_sources = getattr(report, 'data_sources', []) or []
             self.clients = clients
-            self.files = getattr(report, 'files', []) or []
+            all_files = getattr(report, 'files', []) or []
+            # Split files: images go to LLM vision, everything else goes through existing flow
+            self.image_files = [f for f in all_files if (getattr(f, 'content_type', '') or '').startswith('image/')]
+            self.analysis_files = [f for f in all_files if not (getattr(f, 'content_type', '') or '').startswith('image/')]
         else:
             self.data_sources = []
             self.clients = {}
-            self.files = []
+            self.image_files = []
+            self.analysis_files = []
 
         self.sigkill_event = asyncio.Event()
         websocket_manager.add_handler(self._handle_completion_update)
@@ -152,6 +157,26 @@ def __init__(self, db=None, organization=None, organization_settings=None, repor
         # Initialize SuggestInstructions agent for post-analysis suggestions
         self.suggest_instructions = SuggestInstructions(model=self.small_model)
 
+    async def _load_images_as_input(self) -> list[ImageInput]:
+        """Load image files as base64-encoded ImageInput objects for vision models."""
+        import base64
+        import aiofiles
+
+        images: list[ImageInput] = []
+        for f in self.image_files:
+            try:
+                file_path = getattr(f, 'path', None)
+                if not file_path:
+                    continue
+                async with aiofiles.open(file_path, 'rb') as file:
+                    content = await file.read()
+                data = base64.b64encode(content).decode('utf-8')
+                media_type = getattr(f, 'content_type', 'image/png') or 'image/png'
+                images.append(ImageInput(data=data, media_type=media_type, source_type='base64'))
+            except Exception as e:
+                logger.warning(f"Failed to load image file {getattr(f, 'id', 'unknown')}: {e}")
+        return images
+
     async def estimate_prompt_tokens(self) -> dict:
         """Approximate the total planner prompt tokens without executing tools."""
         try:
@@ -691,6 +716,23 @@ async def main_execution(self):
                     # Entities context (catalog entities relevant to this turn)
                     entities_context = (view.warm.entities.render() if getattr(view.warm, "entities", None) else "")
 
+                    # Load user-uploaded images for vision models (only on first loop iteration)
+                    user_images = await self._load_images_as_input() if loop_index == 0 else []
+
+                    # Extract images from observation (tool screenshots, etc.)
+                    observation_images: list[ImageInput] = []
+                    if observation and isinstance(observation, dict) and observation.get("images"):
+                        for img in observation["images"]:
+                            if isinstance(img, dict) and img.get("data"):
+                                observation_images.append(ImageInput(
+                                    data=img["data"],
+                                    media_type=img.get("media_type", "image/png"),
+                                    source_type=img.get("source_type", "base64"),
+                                ))
+
+                    # Combine user images + observation images
+                    all_images = user_images + observation_images
+
                     planner_input = PlannerInput(
                         organization_name=self.organization.name,
                         organization_ai_analyst_name=self.ai_analyst_name,
@@ -710,7 +752,8 @@ async def main_execution(self):
                         past_observations=self.context_hub.observation_builder.tool_observations,
                         external_platform=getattr(self.head_completion, "external_platform", None),
                         tool_catalog=self.planner.tool_catalog,
-                        mode=self.mode
+                        mode=self.mode,
+                        images=all_images if all_images else None,
                     )
                     # Kick off early scoring in background without blocking the loop (isolated DB session)
                     asyncio.create_task(self._run_early_scoring_background(planner_input))
@@ -1122,7 +1165,7 @@ async def _next_seq():
                             "context_view": view,
                             "context_hub": self.context_hub,
                             "ds_clients": self.clients,
-                            "excel_files": self.files,
+                            "excel_files": self.analysis_files,
                             "training_build_id": self.training_build_id,  # For training mode instruction creation
                             "agent_execution_id": str(self.current_execution.id) if self.current_execution else None,
                             "mode": self.mode,  # Current agent mode (chat/training/deep) for tool access control
@@ -1344,7 +1387,7 @@ async def emit(ev: dict):
                                 "status": "success" if observation and not observation.get("error") else "error",
                                 "result_summary": observation.get("summary", "") if observation else "",
                                 # Include query_id for hydration in frontend previews when available
-                                "result_json": ({**safe_result_json, "query_id": (str(self.current_query.id) if getattr(self, "current_query", None) else None)} if isinstance(safe_result_json, dict) else safe_result_json),
+                                "result_json": ({**safe_result_json, "query_id": (str(self.current_query.id) if getattr(self, "current_query", None) else None), "created_visualization_ids": created_visualization_ids} if isinstance(safe_result_json, dict) else safe_result_json),
                                 "duration_ms": tool_execution.duration_ms,
                                 "created_widget_id": created_widget_id,
                                 "created_step_id": created_step_id,
 
@@ -58,6 +58,7 @@ async def execute(
         # Stream LLM tokens and build decision snapshots
         async for chunk in self.llm.inference_stream(
             prompt,
+            images=planner_input.images,
             usage_scope="planner",
             usage_scope_ref_id=None,
         ):
 
@@ -56,6 +56,11 @@ def build_prompt(planner_input: PlannerInput) -> str:
         # Determine mode label for prompt
         mode_label = "Deep Analytics" if planner_input.mode == "deep" else "Chat"
 
+        # Build images context - images can be user-uploaded or from tool observations (screenshots)
+        images_context = ""
+        if planner_input.images:
+            images_context = f"<images>{len(planner_input.images)} image(s) attached to this request. These may include user-uploaded images or tool observation screenshots (see last_observation for context). Analyze them as part of your response when relevant.</images>"
+
         prompt= f"""
 SYSTEM
 Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}; timezone: {datetime.now().astimezone().tzinfo}
@@ -116,8 +121,11 @@ def build_prompt(planner_input: PlannerInput) -> str:
 - Do not include sample/fabricated data in final_answer.
 - If the user asks (explicitly or implicitly) to create/show/list/visualize/compute a metric/table/chart, prefer the create_data tool.
 - A widget should represent a SINGLE piece of data or analysis (a single metric, a single table, a single chart, etc).
-- If the user asks for a dashboard/report/etc, create all the widgets first, then call the create_artifact tool once all queries were created.
+- If the user asks for a dashboard/report/etc, create all the required widgets first, then call the create_artifact tool once all queries were created.
 - If the user asks to build a dashboard/report/layout (or to design/arrange/present widgets), and all widgets are already created, call the create_artifact tool immediately.
+- When calling create_artifact, choose the appropriate mode:
+  - Use mode="page" (default) for dashboards, reports, and interactive data displays
+  - Use mode="slides" for presentations, slide decks, or when the user mentions PowerPoint/PPTX export
 - If the user is asking for a subjective metric or uses a semantic metric that is not well defined (in instructions or schema or context), output your clarifying questions in assistant_message and call the clarify tool.
 - If the user is asking about something that can be answered from provided context (schemas/resources/history) and your confidence is high (≥0.8) AND the user is not asking to create/visualize/persist an artifact, you may use the answer_question tool. Prefer a short reasoning_message (or null). It streams the final user-facing answer.
  - Prefer using data sources, tables, files, and entities explicitly listed in <mentions>. Treat them as high-confidence anchors for this turn. If you select an unmentioned source, briefly explain why.
@@ -185,6 +193,7 @@ def build_prompt(planner_input: PlannerInput) -> str:
 
 INPUT ENVELOPE
 <user_prompt>{planner_input.user_message}</user_prompt>
+{images_context}
 <context>
   <platform>{planner_input.external_platform}</platform>
   {planner_input.instructions}
@@ -262,6 +271,11 @@ def _build_training_prompt(planner_input: PlannerInput) -> str:
         research_tools_json = json.dumps(research_tools, ensure_ascii=False)
         action_tools_json = json.dumps(action_tools, ensure_ascii=False)
 
+        # Build images context - images can be user-uploaded or from tool observations (screenshots)
+        images_context = ""
+        if planner_input.images:
+            images_context = f"<images>{len(planner_input.images)} image(s) attached to this request. These may include user-uploaded images or tool observation screenshots (see last_observation for context). Analyze them as part of your response when relevant.</images>"
+
         prompt = f"""
 SYSTEM
 Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}; timezone: {datetime.now().astimezone().tzinfo}
@@ -474,6 +488,7 @@ def _build_training_prompt(planner_input: PlannerInput) -> str:
 
 INPUT ENVELOPE
 <user_prompt>{planner_input.user_message}</user_prompt>
+{images_context}
 <context>
   <platform>{planner_input.external_platform}</platform>
   {planner_input.instructions}
 
@@ -194,12 +194,35 @@ async def build_context(
                                             digest_parts.append(f"chart: {dm_type}")
                                     except Exception:
                                         pass
+                                    # Surface visualization_id if available (added by orchestrator)
+                                    try:
+                                        viz_ids = rj.get('created_visualization_ids') or []
+                                        if viz_ids:
+                                            digest_parts.append(f"viz_id: {viz_ids[0]}")
+                                    except Exception:
+                                        pass
                                     if sample_row:
                                         try:
                                             digest_parts.append(f"top row: {json.dumps(sample_row)}")
                                         except Exception:
                                             pass
                                     tool_info += " - " + "; ".join(digest_parts)
+                                # Digest for describe_entity results
+                                elif tool_execution.tool_name == 'describe_entity' and tool_execution.result_json:
+                                    rj = tool_execution.result_json or {}
+                                    digest_parts = []
+                                    entity_title = rj.get('title')
+                                    if entity_title:
+                                        digest_parts.append(f"entity: {entity_title}")
+                                    # Surface visualization_id if created
+                                    try:
+                                        viz_ids = rj.get('created_visualization_ids') or []
+                                        if viz_ids:
+                                            digest_parts.append(f"viz_id: {viz_ids[0]}")
+                                    except Exception:
+                                        pass
+                                    if digest_parts:
+                                        tool_info += " - " + "; ".join(digest_parts)
                                 elif tool_execution.tool_name == 'describe_tables' and tool_execution.result_json:
                                     # Show table names extracted from schemas excerpt; fallback to query/arguments
                                     rj = tool_execution.result_json or {}
@@ -556,7 +579,30 @@ async def build(
                                         digest_parts.append(f"chart: {dm_type}")
                                 except Exception:
                                     pass
+                                # Surface visualization_id if available (added by orchestrator)
+                                try:
+                                    viz_ids = rj.get('created_visualization_ids') or []
+                                    if viz_ids:
+                                        digest_parts.append(f"viz_id: {viz_ids[0]}")
+                                except Exception:
+                                    pass
                                 tool_info += " - " + "; ".join(digest_parts)
+                            elif tool_execution.status == 'success' and tool_execution.tool_name == 'describe_entity' and tool_execution.result_json:
+                                # Digest for describe_entity results
+                                rj = tool_execution.result_json or {}
+                                digest_parts = []
+                                entity_title = rj.get('title')
+                                if entity_title:
+                                    digest_parts.append(f"entity: {entity_title}")
+                                # Surface visualization_id if created
+                                try:
+                                    viz_ids = rj.get('created_visualization_ids') or []
+                                    if viz_ids:
+                                        digest_parts.append(f"viz_id: {viz_ids[0]}")
+                                except Exception:
+                                    pass
+                                if digest_parts:
+                                    tool_info += " - " + "; ".join(digest_parts)
                             elif tool_execution.status == 'success' and tool_execution.tool_name == 'describe_tables' and tool_execution.result_json:
                                 # Show table names extracted from schemas excerpt; fallback to query/arguments
                                 rj = tool_execution.result_json or {}