diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json
index aa1f260c1843..62f98ce2fa8e 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json	
@@ -1207,7 +1207,7 @@
             "legacy": false,
             "lf_version": "1.4.3",
             "metadata": {
-              "code_hash": "9cad30eb26b9",
+              "code_hash": "1d81b3a4d764",
               "dependencies": {
                 "dependencies": [
                   {
@@ -1283,7 +1283,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, read_file_bytes, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") if proc.stderr else \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            error_msg = result.get(\"error\", \"Unknown Docling error\")\n            # Override meta file_path with original_file_path to ensure correct path matching\n            meta = result.get(\"meta\", {})\n            meta[\"file_path\"] = original_file_path\n            return Data(data={\"error\": error_msg, **meta})\n\n        meta = result.get(\"meta\", {})\n        # Override meta file_path with original_file_path to ensure correct path matching\n        # The subprocess returns the temp file path, but we need the original S3/local path for rollup_data\n        meta[\"file_path\"] = original_file_path\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        settings = get_settings_service().settings\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # Read bytes based on storage type\n                try:\n                    if settings.storage_type == \"s3\":\n                        # For S3 storage, use storage service to read file bytes\n                        file_path_str = str(file.path)\n                        content = run_until_complete(read_file_bytes(file_path_str))\n                    else:\n                        # For local storage, read bytes directly from filesystem\n                        content = file.path.read_bytes()\n\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except (OSError, FileNotFoundError) as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # Handle None case - Docling processing failed or returned None\n                if advanced_data is None:\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": \"Docling processing returned no result. Check logs for details.\",\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n\n                # Check for errors first\n                if \"error\" in payload:\n                    error_msg = payload.get(\"error\", \"Unknown error\")\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": error_msg,\n                            **{k: v for k, v in payload.items() if k not in (\"error\", \"file_path\")},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data([file], rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    # Ensure file_path is set for proper rollup matching\n                    if not payload.get(\"file_path\"):\n                        payload[\"file_path\"] = file_path\n                        # Create new Data with file_path\n                        advanced_data = Data(\n                            data=payload,\n                            text=getattr(advanced_data, \"text\", None),\n                        )\n                    final_return.extend(self.rollup_data([file], [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
+                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        self.log(f\"Starting Docling subprocess for file: {local_file_path}\")\n        self.log(args)\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n        meta = result.get(\"meta\", {})\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # file.path is already resolved, read bytes directly\n                try:\n                    content = file.path.read_bytes()\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except OSError as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data(file_list, rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    final_return.extend(self.rollup_data(file_list, [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
               },
               "concurrency_multithreading": {
                 "_input_type": "IntInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json
index a4dd2a691386..aef6bb5c7508 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Ingestion.json	
@@ -737,7 +737,7 @@
             "last_updated": "2025-09-29T18:32:20.563Z",
             "legacy": false,
             "metadata": {
-              "code_hash": "52a451e4f053",
+              "code_hash": "c753e92261ae",
               "dependencies": {
                 "dependencies": [
                   {
@@ -867,7 +867,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from __future__ import annotations\n\nimport asyncio\nimport contextlib\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\n\nfrom lfx.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom lfx.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom lfx.components.processing.converter import convert_to_dataframe\nfrom lfx.custom import Component\nfrom lfx.io import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    Output,\n    SecretStrInput,\n    StrInput,\n    TableInput,\n)\nfrom lfx.schema.data import Data\nfrom lfx.schema.table import EditMode\nfrom lfx.services.deps import (\n    get_settings_service,\n    get_variable_service,\n    session_scope,\n)\nfrom lfx.utils.validate_cloud import raise_error_if_astra_cloud_disable_component\n\nif TYPE_CHECKING:\n    from lfx.schema.dataframe import DataFrame\n\nHUGGINGFACE_MODEL_NAMES = [\n    \"sentence-transformers/all-MiniLM-L6-v2\",\n    \"sentence-transformers/all-mpnet-base-v2\",\n]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\n_KNOWLEDGE_BASES_ROOT_PATH: Path | None = None\n\n# Error message to raise if we're in Astra cloud environment and the component is not supported.\nastra_error_msg = \"Knowledge ingestion is not supported in Astra cloud environment.\"\n\n\ndef _get_knowledge_bases_root_path() -> Path:\n    \"\"\"Lazy load the knowledge bases root path from settings.\"\"\"\n    global _KNOWLEDGE_BASES_ROOT_PATH  # noqa: PLW0603\n    if _KNOWLEDGE_BASES_ROOT_PATH is None:\n        settings = get_settings_service().settings\n        knowledge_directory = settings.knowledge_bases_dir\n        if not knowledge_directory:\n            msg = \"Knowledge bases directory is not set in the settings.\"\n            raise ValueError(msg)\n        _KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n    return _KNOWLEDGE_BASES_ROOT_PATH\n\n\nclass KnowledgeIngestionComponent(Component):\n    \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n    # ------ UI metadata ---------------------------------------------------\n    display_name = \"Knowledge Ingestion\"\n    description = \"Create or update knowledge in Langflow.\"\n    icon = \"upload\"\n    name = \"KnowledgeIngestion\"\n\n    def __init__(self, *args, **kwargs) -> None:\n        super().__init__(*args, **kwargs)\n        self._cached_kb_path: Path | None = None\n\n    @dataclass\n    class NewKnowledgeBaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"name\": \"create_knowledge_base\",\n                        \"description\": \"Create new knowledge in Langflow.\",\n                        \"display_name\": \"Create new knowledge\",\n                        \"field_order\": [\n                            \"01_new_kb_name\",\n                            \"02_embedding_model\",\n                            \"03_api_key\",\n                        ],\n                        \"template\": {\n                            \"01_new_kb_name\": StrInput(\n                                name=\"new_kb_name\",\n                                display_name=\"Knowledge Name\",\n                                info=\"Name of the new knowledge to create.\",\n                                required=True,\n                            ),\n                            \"02_embedding_model\": DropdownInput(\n                                name=\"embedding_model\",\n                                display_name=\"Choose Embedding\",\n                                info=\"Select the embedding model to use for this knowledge base.\",\n                                required=True,\n                                options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n                                options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n                                + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n                                + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n                            ),\n                            \"03_api_key\": SecretStrInput(\n                                name=\"api_key\",\n                                display_name=\"API Key\",\n                                info=\"Provider API key for embedding model\",\n                                required=True,\n                                load_from_db=False,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    # ------ Inputs --------------------------------------------------------\n    inputs = [\n        DropdownInput(\n            name=\"knowledge_base\",\n            display_name=\"Knowledge\",\n            info=\"Select the knowledge to load data from.\",\n            required=True,\n            options=[],\n            refresh_button=True,\n            real_time_refresh=True,\n            dialog_inputs=asdict(NewKnowledgeBaseInput()),\n        ),\n        HandleInput(\n            name=\"input_df\",\n            display_name=\"Input\",\n            info=(\n                \"Table with all original columns (already chunked / processed). \"\n                \"Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically.\"\n            ),\n            input_types=[\"Data\", \"DataFrame\"],\n            required=True,\n        ),\n        TableInput(\n            name=\"column_config\",\n            display_name=\"Column Configuration\",\n            info=\"Configure column behavior for the knowledge base.\",\n            required=True,\n            table_schema=[\n                {\n                    \"name\": \"column_name\",\n                    \"display_name\": \"Column Name\",\n                    \"type\": \"str\",\n                    \"description\": \"Name of the column in the source DataFrame\",\n                    \"edit_mode\": EditMode.INLINE,\n                },\n                {\n                    \"name\": \"vectorize\",\n                    \"display_name\": \"Vectorize\",\n                    \"type\": \"boolean\",\n                    \"description\": \"Create embeddings for this column\",\n                    \"default\": False,\n                    \"edit_mode\": EditMode.INLINE,\n                },\n                {\n                    \"name\": \"identifier\",\n                    \"display_name\": \"Identifier\",\n                    \"type\": \"boolean\",\n                    \"description\": \"Use this column as unique identifier\",\n                    \"default\": False,\n                    \"edit_mode\": EditMode.INLINE,\n                },\n            ],\n            value=[\n                {\n                    \"column_name\": \"text\",\n                    \"vectorize\": True,\n                    \"identifier\": True,\n                },\n            ],\n        ),\n        IntInput(\n            name=\"chunk_size\",\n            display_name=\"Chunk Size\",\n            info=\"Batch size for processing embeddings\",\n            advanced=True,\n            value=1000,\n        ),\n        SecretStrInput(\n            name=\"api_key\",\n            display_name=\"Embedding Provider API Key\",\n            info=\"API key for the embedding provider to generate embeddings.\",\n            advanced=True,\n            required=False,\n        ),\n        BoolInput(\n            name=\"allow_duplicates\",\n            display_name=\"Allow Duplicates\",\n            info=\"Allow duplicate rows in the knowledge base\",\n            advanced=True,\n            value=False,\n        ),\n    ]\n\n    # ------ Outputs -------------------------------------------------------\n    outputs = [Output(display_name=\"Results\", name=\"dataframe_output\", method=\"build_kb_info\")]\n\n    # ------ Internal helpers ---------------------------------------------\n    def _get_kb_root(self) -> Path:\n        \"\"\"Return the root directory for knowledge bases.\"\"\"\n        return _get_knowledge_bases_root_path()\n\n    def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n        \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n        if not self.column_config:\n            msg = \"Column configuration cannot be empty\"\n            raise ValueError(msg)\n\n        # Convert table input to list of dicts (similar to Structured Output)\n        config_list = self.column_config if isinstance(self.column_config, list) else []\n\n        # Validate column names exist in DataFrame\n        df_columns = set(df_source.columns)\n        for config in config_list:\n            col_name = config.get(\"column_name\")\n            if col_name not in df_columns:\n                msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n                raise ValueError(msg)\n\n        return config_list\n\n    def _get_embedding_provider(self, embedding_model: str) -> str:\n        \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n        if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n            return \"OpenAI\"\n        if embedding_model in HUGGINGFACE_MODEL_NAMES:\n            return \"HuggingFace\"\n        if embedding_model in COHERE_MODEL_NAMES:\n            return \"Cohere\"\n        return \"Custom\"\n\n    def _build_embeddings(self, embedding_model: str, api_key: str):\n        \"\"\"Build embedding model using provider patterns.\"\"\"\n        # Get provider by matching model name to lists\n        provider = self._get_embedding_provider(embedding_model)\n\n        # Validate provider and model\n        if provider == \"OpenAI\":\n            from langchain_openai import OpenAIEmbeddings\n\n            if not api_key:\n                msg = \"OpenAI API key is required when using OpenAI provider\"\n                raise ValueError(msg)\n            return OpenAIEmbeddings(\n                model=embedding_model,\n                api_key=api_key,\n                chunk_size=self.chunk_size,\n            )\n        if provider == \"HuggingFace\":\n            from langchain_huggingface import HuggingFaceEmbeddings\n\n            return HuggingFaceEmbeddings(\n                model=embedding_model,\n            )\n        if provider == \"Cohere\":\n            from langchain_cohere import CohereEmbeddings\n\n            if not api_key:\n                msg = \"Cohere API key is required when using Cohere provider\"\n                raise ValueError(msg)\n            return CohereEmbeddings(\n                model=embedding_model,\n                cohere_api_key=api_key,\n            )\n        if provider == \"Custom\":\n            # For custom embedding models, we would need additional configuration\n            msg = \"Custom embedding models not yet supported\"\n            raise NotImplementedError(msg)\n        msg = f\"Unknown provider: {provider}\"\n        raise ValueError(msg)\n\n    def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n        \"\"\"Build embedding model metadata.\"\"\"\n        # Get provider by matching model name to lists\n        embedding_provider = self._get_embedding_provider(embedding_model)\n\n        api_key_to_save = None\n        if api_key and hasattr(api_key, \"get_secret_value\"):\n            api_key_to_save = api_key.get_secret_value()\n        elif isinstance(api_key, str):\n            api_key_to_save = api_key\n\n        encrypted_api_key = None\n        if api_key_to_save:\n            settings_service = get_settings_service()\n            try:\n                encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n            except (TypeError, ValueError) as e:\n                self.log(f\"Could not encrypt API key: {e}\")\n\n        return {\n            \"embedding_provider\": embedding_provider,\n            \"embedding_model\": embedding_model,\n            \"api_key\": encrypted_api_key,\n            \"api_key_used\": bool(api_key),\n            \"chunk_size\": self.chunk_size,\n            \"created_at\": datetime.now(timezone.utc).isoformat(),\n        }\n\n    def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n        \"\"\"Save embedding model metadata.\"\"\"\n        embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n        metadata_path = kb_path / \"embedding_metadata.json\"\n        metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n    def _save_kb_files(\n        self,\n        kb_path: Path,\n        config_list: list[dict[str, Any]],\n    ) -> None:\n        \"\"\"Save KB files using File Component storage patterns.\"\"\"\n        try:\n            # Create directory (following File Component patterns)\n            kb_path.mkdir(parents=True, exist_ok=True)\n\n            # Save column configuration\n            # Only do this if the file doesn't exist already\n            cfg_path = kb_path / \"schema.json\"\n            if not cfg_path.exists():\n                cfg_path.write_text(json.dumps(config_list, indent=2))\n\n        except (OSError, TypeError, ValueError) as e:\n            self.log(f\"Error saving KB files: {e}\")\n\n    def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n        \"\"\"Build detailed column metadata.\"\"\"\n        metadata: dict[str, Any] = {\n            \"total_columns\": len(df_source.columns),\n            \"mapped_columns\": len(config_list),\n            \"unmapped_columns\": len(df_source.columns) - len(config_list),\n            \"columns\": [],\n            \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n        }\n\n        for config in config_list:\n            col_name = config.get(\"column_name\")\n            vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n            identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n            # Add to columns list\n            metadata[\"columns\"].append(\n                {\n                    \"name\": col_name,\n                    \"vectorize\": vectorize,\n                    \"identifier\": identifier,\n                }\n            )\n\n            # Update summary\n            if vectorize:\n                metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n            if identifier:\n                metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n        return metadata\n\n    async def _create_vector_store(\n        self,\n        df_source: pd.DataFrame,\n        config_list: list[dict[str, Any]],\n        embedding_model: str,\n        api_key: str,\n    ) -> None:\n        \"\"\"Create vector store following Local DB component pattern.\"\"\"\n        try:\n            # Set up vector store directory\n            vector_store_dir = await self._kb_path()\n            if not vector_store_dir:\n                msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n                raise ValueError(msg)\n            vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n            # Create embeddings model\n            embedding_function = self._build_embeddings(embedding_model, api_key)\n\n            # Convert DataFrame to Data objects (following Local DB pattern)\n            data_objects = await self._convert_df_to_data_objects(df_source, config_list)\n\n            # Create vector store\n            chroma = Chroma(\n                persist_directory=str(vector_store_dir),\n                embedding_function=embedding_function,\n                collection_name=self.knowledge_base,\n            )\n\n            # Convert Data objects to LangChain Documents\n            documents = []\n            for data_obj in data_objects:\n                doc = data_obj.to_lc_document()\n                documents.append(doc)\n\n            # Add documents to vector store\n            if documents:\n                chroma.add_documents(documents)\n                self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n        except (OSError, ValueError, RuntimeError) as e:\n            self.log(f\"Error creating vector store: {e}\")\n\n    async def _convert_df_to_data_objects(\n        self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]\n    ) -> list[Data]:\n        \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n        data_objects: list[Data] = []\n\n        # Set up vector store directory\n        kb_path = await self._kb_path()\n\n        # If we don't allow duplicates, we need to get the existing hashes\n        chroma = Chroma(\n            persist_directory=str(kb_path),\n            collection_name=self.knowledge_base,\n        )\n\n        # Get all documents and their metadata\n        all_docs = chroma.get()\n\n        # Extract all _id values from metadata\n        id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n        # Get column roles\n        content_cols = []\n        identifier_cols = []\n\n        for config in config_list:\n            col_name = config.get(\"column_name\")\n            vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n            identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n            if vectorize:\n                content_cols.append(col_name)\n            elif identifier:\n                identifier_cols.append(col_name)\n\n        # Convert each row to a Data object\n        for _, row in df_source.iterrows():\n            # Build content text from identifier columns using list comprehension\n            identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n            # Join all parts into a single string\n            page_content = \" \".join(identifier_parts)\n\n            # Build metadata from NON-vectorized columns only (simple key-value pairs)\n            data_dict = {\n                \"text\": page_content,  # Main content for vectorization\n            }\n\n            # Add identifier columns if they exist\n            if identifier_cols:\n                identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n                page_content = \" \".join(identifier_parts)\n\n            # Add metadata columns as simple key-value pairs\n            for col in df_source.columns:\n                if col not in content_cols and col in row and pd.notna(row[col]):\n                    # Convert to simple types for Chroma metadata\n                    value = row[col]\n                    data_dict[col] = str(value)  # Convert complex types to string\n\n            # Hash the page_content for unique ID\n            page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n            data_dict[\"_id\"] = page_content_hash\n\n            # If duplicates are disallowed, and hash exists, prevent adding this row\n            if not self.allow_duplicates and page_content_hash in id_list:\n                self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n                continue\n\n            # Create Data object - everything except \"text\" becomes metadata\n            data_obj = Data(data=data_dict)\n            data_objects.append(data_obj)\n\n        return data_objects\n\n    def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n        \"\"\"Validates collection name against conditions 1-3.\n\n        1. Contains 3-63 characters\n        2. Starts and ends with alphanumeric character\n        3. Contains only alphanumeric characters, underscores, or hyphens.\n\n        Args:\n            name (str): Collection name to validate\n            min_length (int): Minimum length of the name\n            max_length (int): Maximum length of the name\n\n        Returns:\n            bool: True if valid, False otherwise\n        \"\"\"\n        # Check length (condition 1)\n        if not (min_length <= len(name) <= max_length):\n            return False\n\n        # Check start/end with alphanumeric (condition 2)\n        if not (name[0].isalnum() and name[-1].isalnum()):\n            return False\n\n        # Check allowed characters (condition 3)\n        return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n    async def _kb_path(self) -> Path | None:\n        # Check if we already have the path cached\n        cached_path = getattr(self, \"_cached_kb_path\", None)\n        if cached_path is not None:\n            return cached_path\n\n        # If not cached, compute it\n        async with session_scope() as db:\n            if not self.user_id:\n                msg = \"User ID is required for fetching knowledge base path.\"\n                raise ValueError(msg)\n            current_user = await get_user_by_id(db, self.user_id)\n            if not current_user:\n                msg = f\"User with ID {self.user_id} not found.\"\n                raise ValueError(msg)\n            kb_user = current_user.username\n\n        kb_root = self._get_kb_root()\n\n        # Cache the result\n        self._cached_kb_path = kb_root / kb_user / self.knowledge_base\n\n        return self._cached_kb_path\n\n    # ---------------------------------------------------------------------\n    #                         OUTPUT METHODS\n    # ---------------------------------------------------------------------\n    async def build_kb_info(self) -> Data:\n        \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n        # Check if we're in Astra cloud environment and raise an error if we are.\n        raise_error_if_astra_cloud_disable_component(astra_error_msg)\n        try:\n            input_value = self.input_df[0] if isinstance(self.input_df, list) else self.input_df\n            df_source: DataFrame = convert_to_dataframe(input_value, auto_parse=False)\n\n            # Validate column configuration (using Structured Output patterns)\n            config_list = self._validate_column_config(df_source)\n            column_metadata = self._build_column_metadata(config_list, df_source)\n\n            # Read the embedding info from the knowledge base folder\n            kb_path = await self._kb_path()\n            if not kb_path:\n                msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n                raise ValueError(msg)\n            metadata_path = kb_path / \"embedding_metadata.json\"\n\n            # If the API key is not provided, try to read it from the metadata file\n            if metadata_path.exists():\n                settings_service = get_settings_service()\n                metadata = json.loads(metadata_path.read_text())\n                embedding_model = metadata.get(\"embedding_model\")\n                try:\n                    api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n                except (InvalidToken, TypeError, ValueError) as e:\n                    self.log(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n            # Check if a custom API key was provided, update metadata if so\n            if self.api_key:\n                api_key = self.api_key\n                self._save_embedding_metadata(\n                    kb_path=kb_path,\n                    embedding_model=embedding_model,\n                    api_key=api_key,\n                )\n\n            # Create vector store following Local DB component pattern\n            await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n            # Save KB files (using File Component storage patterns)\n            self._save_kb_files(kb_path, config_list)\n\n            # Build metadata response\n            meta: dict[str, Any] = {\n                \"kb_id\": str(uuid.uuid4()),\n                \"kb_name\": self.knowledge_base,\n                \"rows\": len(df_source),\n                \"column_metadata\": column_metadata,\n                \"path\": str(kb_path),\n                \"config_columns\": len(config_list),\n                \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n            }\n\n            # Set status message\n            self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n            return Data(data=meta)\n\n        except (OSError, ValueError, RuntimeError, KeyError) as e:\n            msg = f\"Error during KB ingestion: {e}\"\n            raise RuntimeError(msg) from e\n\n    async def _get_api_key_variable(self, field_value: dict[str, Any]):\n        async with session_scope() as db:\n            if not self.user_id:\n                msg = \"User ID is required for fetching global variables.\"\n                raise ValueError(msg)\n            current_user = await get_user_by_id(db, self.user_id)\n            if not current_user:\n                msg = f\"User with ID {self.user_id} not found.\"\n                raise ValueError(msg)\n            variable_service = get_variable_service()\n\n            # Process the api_key field variable\n            return await variable_service.get_variable(\n                user_id=current_user.id,\n                name=field_value[\"03_api_key\"],\n                field=\"\",\n                session=db,\n            )\n\n    async def update_build_config(\n        self,\n        build_config,\n        field_value: Any,\n        field_name: str | None = None,\n    ):\n        \"\"\"Update build configuration based on provider selection.\"\"\"\n        # Check if we're in Astra cloud environment and raise an error if we are.\n        raise_error_if_astra_cloud_disable_component(astra_error_msg)\n        # Create a new knowledge base\n        if field_name == \"knowledge_base\":\n            async with session_scope() as db:\n                if not self.user_id:\n                    msg = \"User ID is required for fetching knowledge base list.\"\n                    raise ValueError(msg)\n                current_user = await get_user_by_id(db, self.user_id)\n                if not current_user:\n                    msg = f\"User with ID {self.user_id} not found.\"\n                    raise ValueError(msg)\n                kb_user = current_user.username\n            if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n                # Validate the knowledge base name - Make sure it follows these rules:\n                if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n                    msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n                    raise ValueError(msg)\n\n                api_key = field_value.get(\"03_api_key\", None)\n                with contextlib.suppress(Exception):\n                    # If the API key is a variable, resolve it\n                    api_key = await self._get_api_key_variable(field_value)\n\n                # Make sure api_key is a string\n                if not isinstance(api_key, str):\n                    msg = \"API key must be a string.\"\n                    raise ValueError(msg)\n\n                # We need to test the API Key one time against the embedding model\n                embed_model = self._build_embeddings(embedding_model=field_value[\"02_embedding_model\"], api_key=api_key)\n\n                # Try to generate a dummy embedding to validate the API key without blocking the event loop\n                try:\n                    await asyncio.wait_for(\n                        asyncio.to_thread(embed_model.embed_query, \"test\"),\n                        timeout=10,\n                    )\n                except TimeoutError as e:\n                    msg = \"Embedding validation timed out. Please verify network connectivity and key.\"\n                    raise ValueError(msg) from e\n                except Exception as e:\n                    msg = f\"Embedding validation failed: {e!s}\"\n                    raise ValueError(msg) from e\n\n                # Create the new knowledge base directory\n                kb_path = _get_knowledge_bases_root_path() / kb_user / field_value[\"01_new_kb_name\"]\n                kb_path.mkdir(parents=True, exist_ok=True)\n\n                # Save the embedding metadata\n                build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n                self._save_embedding_metadata(\n                    kb_path=kb_path,\n                    embedding_model=field_value[\"02_embedding_model\"],\n                    api_key=api_key,\n                )\n\n            # Update the knowledge base options dynamically\n            build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n                _get_knowledge_bases_root_path(),\n                user_id=self.user_id,\n            )\n\n            # If the selected knowledge base is not available, reset it\n            if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n                build_config[\"knowledge_base\"][\"value\"] = None\n\n        return build_config\n"
+                "value": "from __future__ import annotations\n\nimport asyncio\nimport contextlib\nimport hashlib\nimport json\nimport re\nimport uuid\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any\n\nimport pandas as pd\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom langflow.services.auth.utils import decrypt_api_key, encrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\n\nfrom lfx.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom lfx.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES\nfrom lfx.components.processing.converter import convert_to_dataframe\nfrom lfx.custom import Component\nfrom lfx.io import (\n    BoolInput,\n    DropdownInput,\n    HandleInput,\n    IntInput,\n    Output,\n    SecretStrInput,\n    StrInput,\n    TableInput,\n)\nfrom lfx.schema.data import Data\nfrom lfx.schema.table import EditMode\nfrom lfx.services.deps import (\n    get_settings_service,\n    get_variable_service,\n    session_scope,\n)\n\nif TYPE_CHECKING:\n    from lfx.schema.dataframe import DataFrame\n\nHUGGINGFACE_MODEL_NAMES = [\n    \"sentence-transformers/all-MiniLM-L6-v2\",\n    \"sentence-transformers/all-mpnet-base-v2\",\n]\nCOHERE_MODEL_NAMES = [\"embed-english-v3.0\", \"embed-multilingual-v3.0\"]\n\n_KNOWLEDGE_BASES_ROOT_PATH: Path | None = None\n\n\ndef _get_knowledge_bases_root_path() -> Path:\n    \"\"\"Lazy load the knowledge bases root path from settings.\"\"\"\n    global _KNOWLEDGE_BASES_ROOT_PATH  # noqa: PLW0603\n    if _KNOWLEDGE_BASES_ROOT_PATH is None:\n        settings = get_settings_service().settings\n        knowledge_directory = settings.knowledge_bases_dir\n        if not knowledge_directory:\n            msg = \"Knowledge bases directory is not set in the settings.\"\n            raise ValueError(msg)\n        _KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n    return _KNOWLEDGE_BASES_ROOT_PATH\n\n\nclass KnowledgeIngestionComponent(Component):\n    \"\"\"Create or append to Langflow Knowledge from a DataFrame.\"\"\"\n\n    # ------ UI metadata ---------------------------------------------------\n    display_name = \"Knowledge Ingestion\"\n    description = \"Create or update knowledge in Langflow.\"\n    icon = \"upload\"\n    name = \"KnowledgeIngestion\"\n\n    def __init__(self, *args, **kwargs) -> None:\n        super().__init__(*args, **kwargs)\n        self._cached_kb_path: Path | None = None\n\n    @dataclass\n    class NewKnowledgeBaseInput:\n        functionality: str = \"create\"\n        fields: dict[str, dict] = field(\n            default_factory=lambda: {\n                \"data\": {\n                    \"node\": {\n                        \"name\": \"create_knowledge_base\",\n                        \"description\": \"Create new knowledge in Langflow.\",\n                        \"display_name\": \"Create new knowledge\",\n                        \"field_order\": [\n                            \"01_new_kb_name\",\n                            \"02_embedding_model\",\n                            \"03_api_key\",\n                        ],\n                        \"template\": {\n                            \"01_new_kb_name\": StrInput(\n                                name=\"new_kb_name\",\n                                display_name=\"Knowledge Name\",\n                                info=\"Name of the new knowledge to create.\",\n                                required=True,\n                            ),\n                            \"02_embedding_model\": DropdownInput(\n                                name=\"embedding_model\",\n                                display_name=\"Choose Embedding\",\n                                info=\"Select the embedding model to use for this knowledge base.\",\n                                required=True,\n                                options=OPENAI_EMBEDDING_MODEL_NAMES + HUGGINGFACE_MODEL_NAMES + COHERE_MODEL_NAMES,\n                                options_metadata=[{\"icon\": \"OpenAI\"} for _ in OPENAI_EMBEDDING_MODEL_NAMES]\n                                + [{\"icon\": \"HuggingFace\"} for _ in HUGGINGFACE_MODEL_NAMES]\n                                + [{\"icon\": \"Cohere\"} for _ in COHERE_MODEL_NAMES],\n                            ),\n                            \"03_api_key\": SecretStrInput(\n                                name=\"api_key\",\n                                display_name=\"API Key\",\n                                info=\"Provider API key for embedding model\",\n                                required=True,\n                                load_from_db=False,\n                            ),\n                        },\n                    },\n                }\n            }\n        )\n\n    # ------ Inputs --------------------------------------------------------\n    inputs = [\n        DropdownInput(\n            name=\"knowledge_base\",\n            display_name=\"Knowledge\",\n            info=\"Select the knowledge to load data from.\",\n            required=True,\n            options=[],\n            refresh_button=True,\n            real_time_refresh=True,\n            dialog_inputs=asdict(NewKnowledgeBaseInput()),\n        ),\n        HandleInput(\n            name=\"input_df\",\n            display_name=\"Input\",\n            info=(\n                \"Table with all original columns (already chunked / processed). \"\n                \"Accepts Data or DataFrame. If Data is provided, it is converted to a DataFrame automatically.\"\n            ),\n            input_types=[\"Data\", \"DataFrame\"],\n            required=True,\n        ),\n        TableInput(\n            name=\"column_config\",\n            display_name=\"Column Configuration\",\n            info=\"Configure column behavior for the knowledge base.\",\n            required=True,\n            table_schema=[\n                {\n                    \"name\": \"column_name\",\n                    \"display_name\": \"Column Name\",\n                    \"type\": \"str\",\n                    \"description\": \"Name of the column in the source DataFrame\",\n                    \"edit_mode\": EditMode.INLINE,\n                },\n                {\n                    \"name\": \"vectorize\",\n                    \"display_name\": \"Vectorize\",\n                    \"type\": \"boolean\",\n                    \"description\": \"Create embeddings for this column\",\n                    \"default\": False,\n                    \"edit_mode\": EditMode.INLINE,\n                },\n                {\n                    \"name\": \"identifier\",\n                    \"display_name\": \"Identifier\",\n                    \"type\": \"boolean\",\n                    \"description\": \"Use this column as unique identifier\",\n                    \"default\": False,\n                    \"edit_mode\": EditMode.INLINE,\n                },\n            ],\n            value=[\n                {\n                    \"column_name\": \"text\",\n                    \"vectorize\": True,\n                    \"identifier\": True,\n                },\n            ],\n        ),\n        IntInput(\n            name=\"chunk_size\",\n            display_name=\"Chunk Size\",\n            info=\"Batch size for processing embeddings\",\n            advanced=True,\n            value=1000,\n        ),\n        SecretStrInput(\n            name=\"api_key\",\n            display_name=\"Embedding Provider API Key\",\n            info=\"API key for the embedding provider to generate embeddings.\",\n            advanced=True,\n            required=False,\n        ),\n        BoolInput(\n            name=\"allow_duplicates\",\n            display_name=\"Allow Duplicates\",\n            info=\"Allow duplicate rows in the knowledge base\",\n            advanced=True,\n            value=False,\n        ),\n    ]\n\n    # ------ Outputs -------------------------------------------------------\n    outputs = [Output(display_name=\"Results\", name=\"dataframe_output\", method=\"build_kb_info\")]\n\n    # ------ Internal helpers ---------------------------------------------\n    def _get_kb_root(self) -> Path:\n        \"\"\"Return the root directory for knowledge bases.\"\"\"\n        return _get_knowledge_bases_root_path()\n\n    def _validate_column_config(self, df_source: pd.DataFrame) -> list[dict[str, Any]]:\n        \"\"\"Validate column configuration using Structured Output patterns.\"\"\"\n        if not self.column_config:\n            msg = \"Column configuration cannot be empty\"\n            raise ValueError(msg)\n\n        # Convert table input to list of dicts (similar to Structured Output)\n        config_list = self.column_config if isinstance(self.column_config, list) else []\n\n        # Validate column names exist in DataFrame\n        df_columns = set(df_source.columns)\n        for config in config_list:\n            col_name = config.get(\"column_name\")\n            if col_name not in df_columns:\n                msg = f\"Column '{col_name}' not found in DataFrame. Available columns: {sorted(df_columns)}\"\n                raise ValueError(msg)\n\n        return config_list\n\n    def _get_embedding_provider(self, embedding_model: str) -> str:\n        \"\"\"Get embedding provider by matching model name to lists.\"\"\"\n        if embedding_model in OPENAI_EMBEDDING_MODEL_NAMES:\n            return \"OpenAI\"\n        if embedding_model in HUGGINGFACE_MODEL_NAMES:\n            return \"HuggingFace\"\n        if embedding_model in COHERE_MODEL_NAMES:\n            return \"Cohere\"\n        return \"Custom\"\n\n    def _build_embeddings(self, embedding_model: str, api_key: str):\n        \"\"\"Build embedding model using provider patterns.\"\"\"\n        # Get provider by matching model name to lists\n        provider = self._get_embedding_provider(embedding_model)\n\n        # Validate provider and model\n        if provider == \"OpenAI\":\n            from langchain_openai import OpenAIEmbeddings\n\n            if not api_key:\n                msg = \"OpenAI API key is required when using OpenAI provider\"\n                raise ValueError(msg)\n            return OpenAIEmbeddings(\n                model=embedding_model,\n                api_key=api_key,\n                chunk_size=self.chunk_size,\n            )\n        if provider == \"HuggingFace\":\n            from langchain_huggingface import HuggingFaceEmbeddings\n\n            return HuggingFaceEmbeddings(\n                model=embedding_model,\n            )\n        if provider == \"Cohere\":\n            from langchain_cohere import CohereEmbeddings\n\n            if not api_key:\n                msg = \"Cohere API key is required when using Cohere provider\"\n                raise ValueError(msg)\n            return CohereEmbeddings(\n                model=embedding_model,\n                cohere_api_key=api_key,\n            )\n        if provider == \"Custom\":\n            # For custom embedding models, we would need additional configuration\n            msg = \"Custom embedding models not yet supported\"\n            raise NotImplementedError(msg)\n        msg = f\"Unknown provider: {provider}\"\n        raise ValueError(msg)\n\n    def _build_embedding_metadata(self, embedding_model, api_key) -> dict[str, Any]:\n        \"\"\"Build embedding model metadata.\"\"\"\n        # Get provider by matching model name to lists\n        embedding_provider = self._get_embedding_provider(embedding_model)\n\n        api_key_to_save = None\n        if api_key and hasattr(api_key, \"get_secret_value\"):\n            api_key_to_save = api_key.get_secret_value()\n        elif isinstance(api_key, str):\n            api_key_to_save = api_key\n\n        encrypted_api_key = None\n        if api_key_to_save:\n            settings_service = get_settings_service()\n            try:\n                encrypted_api_key = encrypt_api_key(api_key_to_save, settings_service=settings_service)\n            except (TypeError, ValueError) as e:\n                self.log(f\"Could not encrypt API key: {e}\")\n\n        return {\n            \"embedding_provider\": embedding_provider,\n            \"embedding_model\": embedding_model,\n            \"api_key\": encrypted_api_key,\n            \"api_key_used\": bool(api_key),\n            \"chunk_size\": self.chunk_size,\n            \"created_at\": datetime.now(timezone.utc).isoformat(),\n        }\n\n    def _save_embedding_metadata(self, kb_path: Path, embedding_model: str, api_key: str) -> None:\n        \"\"\"Save embedding model metadata.\"\"\"\n        embedding_metadata = self._build_embedding_metadata(embedding_model, api_key)\n        metadata_path = kb_path / \"embedding_metadata.json\"\n        metadata_path.write_text(json.dumps(embedding_metadata, indent=2))\n\n    def _save_kb_files(\n        self,\n        kb_path: Path,\n        config_list: list[dict[str, Any]],\n    ) -> None:\n        \"\"\"Save KB files using File Component storage patterns.\"\"\"\n        try:\n            # Create directory (following File Component patterns)\n            kb_path.mkdir(parents=True, exist_ok=True)\n\n            # Save column configuration\n            # Only do this if the file doesn't exist already\n            cfg_path = kb_path / \"schema.json\"\n            if not cfg_path.exists():\n                cfg_path.write_text(json.dumps(config_list, indent=2))\n\n        except (OSError, TypeError, ValueError) as e:\n            self.log(f\"Error saving KB files: {e}\")\n\n    def _build_column_metadata(self, config_list: list[dict[str, Any]], df_source: pd.DataFrame) -> dict[str, Any]:\n        \"\"\"Build detailed column metadata.\"\"\"\n        metadata: dict[str, Any] = {\n            \"total_columns\": len(df_source.columns),\n            \"mapped_columns\": len(config_list),\n            \"unmapped_columns\": len(df_source.columns) - len(config_list),\n            \"columns\": [],\n            \"summary\": {\"vectorized_columns\": [], \"identifier_columns\": []},\n        }\n\n        for config in config_list:\n            col_name = config.get(\"column_name\")\n            vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n            identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n            # Add to columns list\n            metadata[\"columns\"].append(\n                {\n                    \"name\": col_name,\n                    \"vectorize\": vectorize,\n                    \"identifier\": identifier,\n                }\n            )\n\n            # Update summary\n            if vectorize:\n                metadata[\"summary\"][\"vectorized_columns\"].append(col_name)\n            if identifier:\n                metadata[\"summary\"][\"identifier_columns\"].append(col_name)\n\n        return metadata\n\n    async def _create_vector_store(\n        self,\n        df_source: pd.DataFrame,\n        config_list: list[dict[str, Any]],\n        embedding_model: str,\n        api_key: str,\n    ) -> None:\n        \"\"\"Create vector store following Local DB component pattern.\"\"\"\n        try:\n            # Set up vector store directory\n            vector_store_dir = await self._kb_path()\n            if not vector_store_dir:\n                msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n                raise ValueError(msg)\n            vector_store_dir.mkdir(parents=True, exist_ok=True)\n\n            # Create embeddings model\n            embedding_function = self._build_embeddings(embedding_model, api_key)\n\n            # Convert DataFrame to Data objects (following Local DB pattern)\n            data_objects = await self._convert_df_to_data_objects(df_source, config_list)\n\n            # Create vector store\n            chroma = Chroma(\n                persist_directory=str(vector_store_dir),\n                embedding_function=embedding_function,\n                collection_name=self.knowledge_base,\n            )\n\n            # Convert Data objects to LangChain Documents\n            documents = []\n            for data_obj in data_objects:\n                doc = data_obj.to_lc_document()\n                documents.append(doc)\n\n            # Add documents to vector store\n            if documents:\n                chroma.add_documents(documents)\n                self.log(f\"Added {len(documents)} documents to vector store '{self.knowledge_base}'\")\n\n        except (OSError, ValueError, RuntimeError) as e:\n            self.log(f\"Error creating vector store: {e}\")\n\n    async def _convert_df_to_data_objects(\n        self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]\n    ) -> list[Data]:\n        \"\"\"Convert DataFrame to Data objects for vector store.\"\"\"\n        data_objects: list[Data] = []\n\n        # Set up vector store directory\n        kb_path = await self._kb_path()\n\n        # If we don't allow duplicates, we need to get the existing hashes\n        chroma = Chroma(\n            persist_directory=str(kb_path),\n            collection_name=self.knowledge_base,\n        )\n\n        # Get all documents and their metadata\n        all_docs = chroma.get()\n\n        # Extract all _id values from metadata\n        id_list = [metadata.get(\"_id\") for metadata in all_docs[\"metadatas\"] if metadata.get(\"_id\")]\n\n        # Get column roles\n        content_cols = []\n        identifier_cols = []\n\n        for config in config_list:\n            col_name = config.get(\"column_name\")\n            vectorize = config.get(\"vectorize\") == \"True\" or config.get(\"vectorize\") is True\n            identifier = config.get(\"identifier\") == \"True\" or config.get(\"identifier\") is True\n\n            if vectorize:\n                content_cols.append(col_name)\n            elif identifier:\n                identifier_cols.append(col_name)\n\n        # Convert each row to a Data object\n        for _, row in df_source.iterrows():\n            # Build content text from identifier columns using list comprehension\n            identifier_parts = [str(row[col]) for col in content_cols if col in row and pd.notna(row[col])]\n\n            # Join all parts into a single string\n            page_content = \" \".join(identifier_parts)\n\n            # Build metadata from NON-vectorized columns only (simple key-value pairs)\n            data_dict = {\n                \"text\": page_content,  # Main content for vectorization\n            }\n\n            # Add identifier columns if they exist\n            if identifier_cols:\n                identifier_parts = [str(row[col]) for col in identifier_cols if col in row and pd.notna(row[col])]\n                page_content = \" \".join(identifier_parts)\n\n            # Add metadata columns as simple key-value pairs\n            for col in df_source.columns:\n                if col not in content_cols and col in row and pd.notna(row[col]):\n                    # Convert to simple types for Chroma metadata\n                    value = row[col]\n                    data_dict[col] = str(value)  # Convert complex types to string\n\n            # Hash the page_content for unique ID\n            page_content_hash = hashlib.sha256(page_content.encode()).hexdigest()\n            data_dict[\"_id\"] = page_content_hash\n\n            # If duplicates are disallowed, and hash exists, prevent adding this row\n            if not self.allow_duplicates and page_content_hash in id_list:\n                self.log(f\"Skipping duplicate row with hash {page_content_hash}\")\n                continue\n\n            # Create Data object - everything except \"text\" becomes metadata\n            data_obj = Data(data=data_dict)\n            data_objects.append(data_obj)\n\n        return data_objects\n\n    def is_valid_collection_name(self, name, min_length: int = 3, max_length: int = 63) -> bool:\n        \"\"\"Validates collection name against conditions 1-3.\n\n        1. Contains 3-63 characters\n        2. Starts and ends with alphanumeric character\n        3. Contains only alphanumeric characters, underscores, or hyphens.\n\n        Args:\n            name (str): Collection name to validate\n            min_length (int): Minimum length of the name\n            max_length (int): Maximum length of the name\n\n        Returns:\n            bool: True if valid, False otherwise\n        \"\"\"\n        # Check length (condition 1)\n        if not (min_length <= len(name) <= max_length):\n            return False\n\n        # Check start/end with alphanumeric (condition 2)\n        if not (name[0].isalnum() and name[-1].isalnum()):\n            return False\n\n        # Check allowed characters (condition 3)\n        return re.match(r\"^[a-zA-Z0-9_-]+$\", name) is not None\n\n    async def _kb_path(self) -> Path | None:\n        # Check if we already have the path cached\n        cached_path = getattr(self, \"_cached_kb_path\", None)\n        if cached_path is not None:\n            return cached_path\n\n        # If not cached, compute it\n        async with session_scope() as db:\n            if not self.user_id:\n                msg = \"User ID is required for fetching knowledge base path.\"\n                raise ValueError(msg)\n            current_user = await get_user_by_id(db, self.user_id)\n            if not current_user:\n                msg = f\"User with ID {self.user_id} not found.\"\n                raise ValueError(msg)\n            kb_user = current_user.username\n\n        kb_root = self._get_kb_root()\n\n        # Cache the result\n        self._cached_kb_path = kb_root / kb_user / self.knowledge_base\n\n        return self._cached_kb_path\n\n    # ---------------------------------------------------------------------\n    #                         OUTPUT METHODS\n    # ---------------------------------------------------------------------\n    async def build_kb_info(self) -> Data:\n        \"\"\"Main ingestion routine → returns a dict with KB metadata.\"\"\"\n        try:\n            input_value = self.input_df[0] if isinstance(self.input_df, list) else self.input_df\n            df_source: DataFrame = convert_to_dataframe(input_value, auto_parse=False)\n\n            # Validate column configuration (using Structured Output patterns)\n            config_list = self._validate_column_config(df_source)\n            column_metadata = self._build_column_metadata(config_list, df_source)\n\n            # Read the embedding info from the knowledge base folder\n            kb_path = await self._kb_path()\n            if not kb_path:\n                msg = \"Knowledge base path is not set. Please create a new knowledge base first.\"\n                raise ValueError(msg)\n            metadata_path = kb_path / \"embedding_metadata.json\"\n\n            # If the API key is not provided, try to read it from the metadata file\n            if metadata_path.exists():\n                settings_service = get_settings_service()\n                metadata = json.loads(metadata_path.read_text())\n                embedding_model = metadata.get(\"embedding_model\")\n                try:\n                    api_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n                except (InvalidToken, TypeError, ValueError) as e:\n                    self.log(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n\n            # Check if a custom API key was provided, update metadata if so\n            if self.api_key:\n                api_key = self.api_key\n                self._save_embedding_metadata(\n                    kb_path=kb_path,\n                    embedding_model=embedding_model,\n                    api_key=api_key,\n                )\n\n            # Create vector store following Local DB component pattern\n            await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)\n\n            # Save KB files (using File Component storage patterns)\n            self._save_kb_files(kb_path, config_list)\n\n            # Build metadata response\n            meta: dict[str, Any] = {\n                \"kb_id\": str(uuid.uuid4()),\n                \"kb_name\": self.knowledge_base,\n                \"rows\": len(df_source),\n                \"column_metadata\": column_metadata,\n                \"path\": str(kb_path),\n                \"config_columns\": len(config_list),\n                \"timestamp\": datetime.now(tz=timezone.utc).isoformat(),\n            }\n\n            # Set status message\n            self.status = f\"✅ KB **{self.knowledge_base}** saved · {len(df_source)} chunks.\"\n\n            return Data(data=meta)\n\n        except (OSError, ValueError, RuntimeError, KeyError) as e:\n            msg = f\"Error during KB ingestion: {e}\"\n            raise RuntimeError(msg) from e\n\n    async def _get_api_key_variable(self, field_value: dict[str, Any]):\n        async with session_scope() as db:\n            if not self.user_id:\n                msg = \"User ID is required for fetching global variables.\"\n                raise ValueError(msg)\n            current_user = await get_user_by_id(db, self.user_id)\n            if not current_user:\n                msg = f\"User with ID {self.user_id} not found.\"\n                raise ValueError(msg)\n            variable_service = get_variable_service()\n\n            # Process the api_key field variable\n            return await variable_service.get_variable(\n                user_id=current_user.id,\n                name=field_value[\"03_api_key\"],\n                field=\"\",\n                session=db,\n            )\n\n    async def update_build_config(\n        self,\n        build_config,\n        field_value: Any,\n        field_name: str | None = None,\n    ):\n        \"\"\"Update build configuration based on provider selection.\"\"\"\n        # Create a new knowledge base\n        if field_name == \"knowledge_base\":\n            async with session_scope() as db:\n                if not self.user_id:\n                    msg = \"User ID is required for fetching knowledge base list.\"\n                    raise ValueError(msg)\n                current_user = await get_user_by_id(db, self.user_id)\n                if not current_user:\n                    msg = f\"User with ID {self.user_id} not found.\"\n                    raise ValueError(msg)\n                kb_user = current_user.username\n            if isinstance(field_value, dict) and \"01_new_kb_name\" in field_value:\n                # Validate the knowledge base name - Make sure it follows these rules:\n                if not self.is_valid_collection_name(field_value[\"01_new_kb_name\"]):\n                    msg = f\"Invalid knowledge base name: {field_value['01_new_kb_name']}\"\n                    raise ValueError(msg)\n\n                api_key = field_value.get(\"03_api_key\", None)\n                with contextlib.suppress(Exception):\n                    # If the API key is a variable, resolve it\n                    api_key = await self._get_api_key_variable(field_value)\n\n                # Make sure api_key is a string\n                if not isinstance(api_key, str):\n                    msg = \"API key must be a string.\"\n                    raise ValueError(msg)\n\n                # We need to test the API Key one time against the embedding model\n                embed_model = self._build_embeddings(embedding_model=field_value[\"02_embedding_model\"], api_key=api_key)\n\n                # Try to generate a dummy embedding to validate the API key without blocking the event loop\n                try:\n                    await asyncio.wait_for(\n                        asyncio.to_thread(embed_model.embed_query, \"test\"),\n                        timeout=10,\n                    )\n                except TimeoutError as e:\n                    msg = \"Embedding validation timed out. Please verify network connectivity and key.\"\n                    raise ValueError(msg) from e\n                except Exception as e:\n                    msg = f\"Embedding validation failed: {e!s}\"\n                    raise ValueError(msg) from e\n\n                # Create the new knowledge base directory\n                kb_path = _get_knowledge_bases_root_path() / kb_user / field_value[\"01_new_kb_name\"]\n                kb_path.mkdir(parents=True, exist_ok=True)\n\n                # Save the embedding metadata\n                build_config[\"knowledge_base\"][\"value\"] = field_value[\"01_new_kb_name\"]\n                self._save_embedding_metadata(\n                    kb_path=kb_path,\n                    embedding_model=field_value[\"02_embedding_model\"],\n                    api_key=api_key,\n                )\n\n            # Update the knowledge base options dynamically\n            build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n                _get_knowledge_bases_root_path(),\n                user_id=self.user_id,\n            )\n\n            # If the selected knowledge base is not available, reset it\n            if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n                build_config[\"knowledge_base\"][\"value\"] = None\n\n        return build_config\n"
               },
               "column_config": {
                 "_input_type": "TableInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json
index 3b336473ff2d..3fb7125b4cca 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Knowledge Retrieval.json	
@@ -511,7 +511,7 @@
             "last_updated": "2025-08-26T16:19:16.681Z",
             "legacy": false,
             "metadata": {
-              "code_hash": "af0a162c3f80",
+              "code_hash": "653abe6876b8",
               "dependencies": {
                 "dependencies": [
                   {
@@ -605,7 +605,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom pydantic import SecretStr\n\nfrom lfx.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom lfx.custom import Component\nfrom lfx.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.services.deps import get_settings_service, session_scope\nfrom lfx.utils.validate_cloud import raise_error_if_astra_cloud_disable_component\n\n_KNOWLEDGE_BASES_ROOT_PATH: Path | None = None\n\n# Error message to raise if we're in Astra cloud environment and the component is not supported.\nastra_error_msg = \"Knowledge retrieval is not supported in Astra cloud environment.\"\n\n\ndef _get_knowledge_bases_root_path() -> Path:\n    \"\"\"Lazy load the knowledge bases root path from settings.\"\"\"\n    global _KNOWLEDGE_BASES_ROOT_PATH  # noqa: PLW0603\n    if _KNOWLEDGE_BASES_ROOT_PATH is None:\n        settings = get_settings_service().settings\n        knowledge_directory = settings.knowledge_bases_dir\n        if not knowledge_directory:\n            msg = \"Knowledge bases directory is not set in the settings.\"\n            raise ValueError(msg)\n        _KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n    return _KNOWLEDGE_BASES_ROOT_PATH\n\n\nclass KnowledgeRetrievalComponent(Component):\n    display_name = \"Knowledge Retrieval\"\n    description = \"Search and retrieve data from knowledge.\"\n    icon = \"download\"\n    name = \"KnowledgeRetrieval\"\n\n    inputs = [\n        DropdownInput(\n            name=\"knowledge_base\",\n            display_name=\"Knowledge\",\n            info=\"Select the knowledge to load data from.\",\n            required=True,\n            options=[],\n            refresh_button=True,\n            real_time_refresh=True,\n        ),\n        SecretStrInput(\n            name=\"api_key\",\n            display_name=\"Embedding Provider API Key\",\n            info=\"API key for the embedding provider to generate embeddings.\",\n            advanced=True,\n            required=False,\n        ),\n        MessageTextInput(\n            name=\"search_query\",\n            display_name=\"Search Query\",\n            info=\"Optional search query to filter knowledge base data.\",\n            tool_mode=True,\n        ),\n        IntInput(\n            name=\"top_k\",\n            display_name=\"Top K Results\",\n            info=\"Number of top results to return from the knowledge base.\",\n            value=5,\n            advanced=True,\n            required=False,\n        ),\n        BoolInput(\n            name=\"include_metadata\",\n            display_name=\"Include Metadata\",\n            info=\"Whether to include all metadata in the output. If false, only content is returned.\",\n            value=True,\n            advanced=False,\n        ),\n        BoolInput(\n            name=\"include_embeddings\",\n            display_name=\"Include Embeddings\",\n            info=\"Whether to include embeddings in the output. Only applicable if 'Include Metadata' is enabled.\",\n            value=False,\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(\n            name=\"retrieve_data\",\n            display_name=\"Results\",\n            method=\"retrieve_data\",\n            info=\"Returns the data from the selected knowledge base.\",\n        ),\n    ]\n\n    async def update_build_config(self, build_config, field_value, field_name=None):  # noqa: ARG002\n        # Check if we're in Astra cloud environment and raise an error if we are.\n        raise_error_if_astra_cloud_disable_component(astra_error_msg)\n        if field_name == \"knowledge_base\":\n            # Update the knowledge base options dynamically\n            build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n                _get_knowledge_bases_root_path(),\n                user_id=self.user_id,  # Use the user_id from the component context\n            )\n\n            # If the selected knowledge base is not available, reset it\n            if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n                build_config[\"knowledge_base\"][\"value\"] = None\n\n        return build_config\n\n    def _get_kb_metadata(self, kb_path: Path) -> dict:\n        \"\"\"Load and process knowledge base metadata.\"\"\"\n        # Check if we're in Astra cloud environment and raise an error if we are.\n        raise_error_if_astra_cloud_disable_component(astra_error_msg)\n        metadata: dict[str, Any] = {}\n        metadata_file = kb_path / \"embedding_metadata.json\"\n        if not metadata_file.exists():\n            logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n            return metadata\n\n        try:\n            with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n                metadata = json.load(f)\n        except json.JSONDecodeError:\n            logger.error(f\"Error decoding JSON from {metadata_file}\")\n            return {}\n\n        # Decrypt API key if it exists\n        if \"api_key\" in metadata and metadata.get(\"api_key\"):\n            settings_service = get_settings_service()\n            try:\n                decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n                metadata[\"api_key\"] = decrypted_key\n            except (InvalidToken, TypeError, ValueError) as e:\n                logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n                metadata[\"api_key\"] = None\n        return metadata\n\n    def _build_embeddings(self, metadata: dict):\n        \"\"\"Build embedding model from metadata.\"\"\"\n        runtime_api_key = self.api_key.get_secret_value() if isinstance(self.api_key, SecretStr) else self.api_key\n        provider = metadata.get(\"embedding_provider\")\n        model = metadata.get(\"embedding_model\")\n        api_key = runtime_api_key or metadata.get(\"api_key\")\n        chunk_size = metadata.get(\"chunk_size\")\n\n        # Handle various providers\n        if provider == \"OpenAI\":\n            from langchain_openai import OpenAIEmbeddings\n\n            if not api_key:\n                msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n                raise ValueError(msg)\n            return OpenAIEmbeddings(\n                model=model,\n                api_key=api_key,\n                chunk_size=chunk_size,\n            )\n        if provider == \"HuggingFace\":\n            from langchain_huggingface import HuggingFaceEmbeddings\n\n            return HuggingFaceEmbeddings(\n                model=model,\n            )\n        if provider == \"Cohere\":\n            from langchain_cohere import CohereEmbeddings\n\n            if not api_key:\n                msg = \"Cohere API key is required when using Cohere provider\"\n                raise ValueError(msg)\n            return CohereEmbeddings(\n                model=model,\n                cohere_api_key=api_key,\n            )\n        if provider == \"Custom\":\n            # For custom embedding models, we would need additional configuration\n            msg = \"Custom embedding models not yet supported\"\n            raise NotImplementedError(msg)\n        # Add other providers here if they become supported in ingest\n        msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n        raise NotImplementedError(msg)\n\n    async def retrieve_data(self) -> DataFrame:\n        \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n        Returns:\n            A DataFrame containing the data rows from the knowledge base.\n        \"\"\"\n        # Check if we're in Astra cloud environment and raise an error if we are.\n        raise_error_if_astra_cloud_disable_component(astra_error_msg)\n        # Get the current user\n        async with session_scope() as db:\n            if not self.user_id:\n                msg = \"User ID is required for fetching Knowledge Base data.\"\n                raise ValueError(msg)\n            current_user = await get_user_by_id(db, self.user_id)\n            if not current_user:\n                msg = f\"User with ID {self.user_id} not found.\"\n                raise ValueError(msg)\n            kb_user = current_user.username\n        kb_path = _get_knowledge_bases_root_path() / kb_user / self.knowledge_base\n\n        metadata = self._get_kb_metadata(kb_path)\n        if not metadata:\n            msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n            raise ValueError(msg)\n\n        # Build the embedder for the knowledge base\n        embedding_function = self._build_embeddings(metadata)\n\n        # Load vector store\n        chroma = Chroma(\n            persist_directory=str(kb_path),\n            embedding_function=embedding_function,\n            collection_name=self.knowledge_base,\n        )\n\n        # If a search query is provided, perform a similarity search\n        if self.search_query:\n            # Use the search query to perform a similarity search\n            logger.info(f\"Performing similarity search with query: {self.search_query}\")\n            results = chroma.similarity_search_with_score(\n                query=self.search_query or \"\",\n                k=self.top_k,\n            )\n        else:\n            results = chroma.similarity_search(\n                query=self.search_query or \"\",\n                k=self.top_k,\n            )\n\n            # For each result, make it a tuple to match the expected output format\n            results = [(doc, 0) for doc in results]  # Assign a dummy score of 0\n\n        # If include_embeddings is enabled, get embeddings for the results\n        id_to_embedding = {}\n        if self.include_embeddings and results:\n            doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n            # Only proceed if we have valid document IDs\n            if doc_ids:\n                # Access underlying collection to get embeddings\n                collection = chroma._collection  # noqa: SLF001\n                embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"metadatas\", \"embeddings\"])\n\n                # Create a mapping from document ID to embedding\n                for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n                    if metadata and \"_id\" in metadata:\n                        id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n        # Build output data based on include_metadata setting\n        data_list = []\n        for doc in results:\n            kwargs = {\n                \"content\": doc[0].page_content,\n            }\n            if self.search_query:\n                kwargs[\"_score\"] = -1 * doc[1]\n            if self.include_metadata:\n                # Include all metadata, embeddings, and content\n                kwargs.update(doc[0].metadata)\n            if self.include_embeddings:\n                kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n\n            data_list.append(Data(**kwargs))\n\n        # Return the DataFrame containing the data\n        return DataFrame(data=data_list)\n"
+                "value": "import json\nfrom pathlib import Path\nfrom typing import Any\n\nfrom cryptography.fernet import InvalidToken\nfrom langchain_chroma import Chroma\nfrom langflow.services.auth.utils import decrypt_api_key\nfrom langflow.services.database.models.user.crud import get_user_by_id\nfrom pydantic import SecretStr\n\nfrom lfx.base.knowledge_bases.knowledge_base_utils import get_knowledge_bases\nfrom lfx.custom import Component\nfrom lfx.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.services.deps import get_settings_service, session_scope\n\n_KNOWLEDGE_BASES_ROOT_PATH: Path | None = None\n\n\ndef _get_knowledge_bases_root_path() -> Path:\n    \"\"\"Lazy load the knowledge bases root path from settings.\"\"\"\n    global _KNOWLEDGE_BASES_ROOT_PATH  # noqa: PLW0603\n    if _KNOWLEDGE_BASES_ROOT_PATH is None:\n        settings = get_settings_service().settings\n        knowledge_directory = settings.knowledge_bases_dir\n        if not knowledge_directory:\n            msg = \"Knowledge bases directory is not set in the settings.\"\n            raise ValueError(msg)\n        _KNOWLEDGE_BASES_ROOT_PATH = Path(knowledge_directory).expanduser()\n    return _KNOWLEDGE_BASES_ROOT_PATH\n\n\nclass KnowledgeRetrievalComponent(Component):\n    display_name = \"Knowledge Retrieval\"\n    description = \"Search and retrieve data from knowledge.\"\n    icon = \"download\"\n    name = \"KnowledgeRetrieval\"\n\n    inputs = [\n        DropdownInput(\n            name=\"knowledge_base\",\n            display_name=\"Knowledge\",\n            info=\"Select the knowledge to load data from.\",\n            required=True,\n            options=[],\n            refresh_button=True,\n            real_time_refresh=True,\n        ),\n        SecretStrInput(\n            name=\"api_key\",\n            display_name=\"Embedding Provider API Key\",\n            info=\"API key for the embedding provider to generate embeddings.\",\n            advanced=True,\n            required=False,\n        ),\n        MessageTextInput(\n            name=\"search_query\",\n            display_name=\"Search Query\",\n            info=\"Optional search query to filter knowledge base data.\",\n            tool_mode=True,\n        ),\n        IntInput(\n            name=\"top_k\",\n            display_name=\"Top K Results\",\n            info=\"Number of top results to return from the knowledge base.\",\n            value=5,\n            advanced=True,\n            required=False,\n        ),\n        BoolInput(\n            name=\"include_metadata\",\n            display_name=\"Include Metadata\",\n            info=\"Whether to include all metadata in the output. If false, only content is returned.\",\n            value=True,\n            advanced=False,\n        ),\n        BoolInput(\n            name=\"include_embeddings\",\n            display_name=\"Include Embeddings\",\n            info=\"Whether to include embeddings in the output. Only applicable if 'Include Metadata' is enabled.\",\n            value=False,\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(\n            name=\"retrieve_data\",\n            display_name=\"Results\",\n            method=\"retrieve_data\",\n            info=\"Returns the data from the selected knowledge base.\",\n        ),\n    ]\n\n    async def update_build_config(self, build_config, field_value, field_name=None):  # noqa: ARG002\n        if field_name == \"knowledge_base\":\n            # Update the knowledge base options dynamically\n            build_config[\"knowledge_base\"][\"options\"] = await get_knowledge_bases(\n                _get_knowledge_bases_root_path(),\n                user_id=self.user_id,  # Use the user_id from the component context\n            )\n\n            # If the selected knowledge base is not available, reset it\n            if build_config[\"knowledge_base\"][\"value\"] not in build_config[\"knowledge_base\"][\"options\"]:\n                build_config[\"knowledge_base\"][\"value\"] = None\n\n        return build_config\n\n    def _get_kb_metadata(self, kb_path: Path) -> dict:\n        \"\"\"Load and process knowledge base metadata.\"\"\"\n        metadata: dict[str, Any] = {}\n        metadata_file = kb_path / \"embedding_metadata.json\"\n        if not metadata_file.exists():\n            logger.warning(f\"Embedding metadata file not found at {metadata_file}\")\n            return metadata\n\n        try:\n            with metadata_file.open(\"r\", encoding=\"utf-8\") as f:\n                metadata = json.load(f)\n        except json.JSONDecodeError:\n            logger.error(f\"Error decoding JSON from {metadata_file}\")\n            return {}\n\n        # Decrypt API key if it exists\n        if \"api_key\" in metadata and metadata.get(\"api_key\"):\n            settings_service = get_settings_service()\n            try:\n                decrypted_key = decrypt_api_key(metadata[\"api_key\"], settings_service)\n                metadata[\"api_key\"] = decrypted_key\n            except (InvalidToken, TypeError, ValueError) as e:\n                logger.error(f\"Could not decrypt API key. Please provide it manually. Error: {e}\")\n                metadata[\"api_key\"] = None\n        return metadata\n\n    def _build_embeddings(self, metadata: dict):\n        \"\"\"Build embedding model from metadata.\"\"\"\n        runtime_api_key = self.api_key.get_secret_value() if isinstance(self.api_key, SecretStr) else self.api_key\n        provider = metadata.get(\"embedding_provider\")\n        model = metadata.get(\"embedding_model\")\n        api_key = runtime_api_key or metadata.get(\"api_key\")\n        chunk_size = metadata.get(\"chunk_size\")\n\n        # Handle various providers\n        if provider == \"OpenAI\":\n            from langchain_openai import OpenAIEmbeddings\n\n            if not api_key:\n                msg = \"OpenAI API key is required. Provide it in the component's advanced settings.\"\n                raise ValueError(msg)\n            return OpenAIEmbeddings(\n                model=model,\n                api_key=api_key,\n                chunk_size=chunk_size,\n            )\n        if provider == \"HuggingFace\":\n            from langchain_huggingface import HuggingFaceEmbeddings\n\n            return HuggingFaceEmbeddings(\n                model=model,\n            )\n        if provider == \"Cohere\":\n            from langchain_cohere import CohereEmbeddings\n\n            if not api_key:\n                msg = \"Cohere API key is required when using Cohere provider\"\n                raise ValueError(msg)\n            return CohereEmbeddings(\n                model=model,\n                cohere_api_key=api_key,\n            )\n        if provider == \"Custom\":\n            # For custom embedding models, we would need additional configuration\n            msg = \"Custom embedding models not yet supported\"\n            raise NotImplementedError(msg)\n        # Add other providers here if they become supported in ingest\n        msg = f\"Embedding provider '{provider}' is not supported for retrieval.\"\n        raise NotImplementedError(msg)\n\n    async def retrieve_data(self) -> DataFrame:\n        \"\"\"Retrieve data from the selected knowledge base by reading the Chroma collection.\n\n        Returns:\n            A DataFrame containing the data rows from the knowledge base.\n        \"\"\"\n        # Get the current user\n        async with session_scope() as db:\n            if not self.user_id:\n                msg = \"User ID is required for fetching Knowledge Base data.\"\n                raise ValueError(msg)\n            current_user = await get_user_by_id(db, self.user_id)\n            if not current_user:\n                msg = f\"User with ID {self.user_id} not found.\"\n                raise ValueError(msg)\n            kb_user = current_user.username\n        kb_path = _get_knowledge_bases_root_path() / kb_user / self.knowledge_base\n\n        metadata = self._get_kb_metadata(kb_path)\n        if not metadata:\n            msg = f\"Metadata not found for knowledge base: {self.knowledge_base}. Ensure it has been indexed.\"\n            raise ValueError(msg)\n\n        # Build the embedder for the knowledge base\n        embedding_function = self._build_embeddings(metadata)\n\n        # Load vector store\n        chroma = Chroma(\n            persist_directory=str(kb_path),\n            embedding_function=embedding_function,\n            collection_name=self.knowledge_base,\n        )\n\n        # If a search query is provided, perform a similarity search\n        if self.search_query:\n            # Use the search query to perform a similarity search\n            logger.info(f\"Performing similarity search with query: {self.search_query}\")\n            results = chroma.similarity_search_with_score(\n                query=self.search_query or \"\",\n                k=self.top_k,\n            )\n        else:\n            results = chroma.similarity_search(\n                query=self.search_query or \"\",\n                k=self.top_k,\n            )\n\n            # For each result, make it a tuple to match the expected output format\n            results = [(doc, 0) for doc in results]  # Assign a dummy score of 0\n\n        # If include_embeddings is enabled, get embeddings for the results\n        id_to_embedding = {}\n        if self.include_embeddings and results:\n            doc_ids = [doc[0].metadata.get(\"_id\") for doc in results if doc[0].metadata.get(\"_id\")]\n\n            # Only proceed if we have valid document IDs\n            if doc_ids:\n                # Access underlying collection to get embeddings\n                collection = chroma._collection  # noqa: SLF001\n                embeddings_result = collection.get(where={\"_id\": {\"$in\": doc_ids}}, include=[\"metadatas\", \"embeddings\"])\n\n                # Create a mapping from document ID to embedding\n                for i, metadata in enumerate(embeddings_result.get(\"metadatas\", [])):\n                    if metadata and \"_id\" in metadata:\n                        id_to_embedding[metadata[\"_id\"]] = embeddings_result[\"embeddings\"][i]\n\n        # Build output data based on include_metadata setting\n        data_list = []\n        for doc in results:\n            kwargs = {\n                \"content\": doc[0].page_content,\n            }\n            if self.search_query:\n                kwargs[\"_score\"] = -1 * doc[1]\n            if self.include_metadata:\n                # Include all metadata, embeddings, and content\n                kwargs.update(doc[0].metadata)\n            if self.include_embeddings:\n                kwargs[\"_embeddings\"] = id_to_embedding.get(doc[0].metadata.get(\"_id\"))\n\n            data_list.append(Data(**kwargs))\n\n        # Return the DataFrame containing the data\n        return DataFrame(data=data_list)\n"
               },
               "include_embeddings": {
                 "_input_type": "BoolInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json
index 21b8d605bd0b..1070aea5cba6 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json	
@@ -931,7 +931,7 @@
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
-              "code_hash": "9cad30eb26b9",
+              "code_hash": "1d81b3a4d764",
               "dependencies": {
                 "dependencies": [
                   {
@@ -1007,7 +1007,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, read_file_bytes, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") if proc.stderr else \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            error_msg = result.get(\"error\", \"Unknown Docling error\")\n            # Override meta file_path with original_file_path to ensure correct path matching\n            meta = result.get(\"meta\", {})\n            meta[\"file_path\"] = original_file_path\n            return Data(data={\"error\": error_msg, **meta})\n\n        meta = result.get(\"meta\", {})\n        # Override meta file_path with original_file_path to ensure correct path matching\n        # The subprocess returns the temp file path, but we need the original S3/local path for rollup_data\n        meta[\"file_path\"] = original_file_path\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        settings = get_settings_service().settings\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # Read bytes based on storage type\n                try:\n                    if settings.storage_type == \"s3\":\n                        # For S3 storage, use storage service to read file bytes\n                        file_path_str = str(file.path)\n                        content = run_until_complete(read_file_bytes(file_path_str))\n                    else:\n                        # For local storage, read bytes directly from filesystem\n                        content = file.path.read_bytes()\n\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except (OSError, FileNotFoundError) as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # Handle None case - Docling processing failed or returned None\n                if advanced_data is None:\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": \"Docling processing returned no result. Check logs for details.\",\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n\n                # Check for errors first\n                if \"error\" in payload:\n                    error_msg = payload.get(\"error\", \"Unknown error\")\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": error_msg,\n                            **{k: v for k, v in payload.items() if k not in (\"error\", \"file_path\")},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data([file], rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    # Ensure file_path is set for proper rollup matching\n                    if not payload.get(\"file_path\"):\n                        payload[\"file_path\"] = file_path\n                        # Create new Data with file_path\n                        advanced_data = Data(\n                            data=payload,\n                            text=getattr(advanced_data, \"text\", None),\n                        )\n                    final_return.extend(self.rollup_data([file], [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
+                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        self.log(f\"Starting Docling subprocess for file: {local_file_path}\")\n        self.log(args)\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n        meta = result.get(\"meta\", {})\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # file.path is already resolved, read bytes directly\n                try:\n                    content = file.path.read_bytes()\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except OSError as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data(file_list, rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    final_return.extend(self.rollup_data(file_list, [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
               },
               "concurrency_multithreading": {
                 "_input_type": "IntInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json b/src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json
index 643c95aa742e..097137d21546 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json	
@@ -2339,7 +2339,7 @@
             "icon": "file-text",
             "legacy": false,
             "metadata": {
-              "code_hash": "9cad30eb26b9",
+              "code_hash": "1d81b3a4d764",
               "dependencies": {
                 "dependencies": [
                   {
@@ -2415,7 +2415,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, read_file_bytes, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") if proc.stderr else \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            error_msg = result.get(\"error\", \"Unknown Docling error\")\n            # Override meta file_path with original_file_path to ensure correct path matching\n            meta = result.get(\"meta\", {})\n            meta[\"file_path\"] = original_file_path\n            return Data(data={\"error\": error_msg, **meta})\n\n        meta = result.get(\"meta\", {})\n        # Override meta file_path with original_file_path to ensure correct path matching\n        # The subprocess returns the temp file path, but we need the original S3/local path for rollup_data\n        meta[\"file_path\"] = original_file_path\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        settings = get_settings_service().settings\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # Read bytes based on storage type\n                try:\n                    if settings.storage_type == \"s3\":\n                        # For S3 storage, use storage service to read file bytes\n                        file_path_str = str(file.path)\n                        content = run_until_complete(read_file_bytes(file_path_str))\n                    else:\n                        # For local storage, read bytes directly from filesystem\n                        content = file.path.read_bytes()\n\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except (OSError, FileNotFoundError) as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # Handle None case - Docling processing failed or returned None\n                if advanced_data is None:\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": \"Docling processing returned no result. Check logs for details.\",\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n\n                # Check for errors first\n                if \"error\" in payload:\n                    error_msg = payload.get(\"error\", \"Unknown error\")\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": error_msg,\n                            **{k: v for k, v in payload.items() if k not in (\"error\", \"file_path\")},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data([file], rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    # Ensure file_path is set for proper rollup matching\n                    if not payload.get(\"file_path\"):\n                        payload[\"file_path\"] = file_path\n                        # Create new Data with file_path\n                        advanced_data = Data(\n                            data=payload,\n                            text=getattr(advanced_data, \"text\", None),\n                        )\n                    final_return.extend(self.rollup_data([file], [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
+                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        self.log(f\"Starting Docling subprocess for file: {local_file_path}\")\n        self.log(args)\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n        meta = result.get(\"meta\", {})\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # file.path is already resolved, read bytes directly\n                try:\n                    content = file.path.read_bytes()\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except OSError as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data(file_list, rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    final_return.extend(self.rollup_data(file_list, [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
               },
               "concurrency_multithreading": {
                 "_input_type": "IntInput",
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json
index d9df836c4b8e..27bc77117e7a 100644
--- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
+++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json	
@@ -2675,7 +2675,7 @@
             "last_updated": "2025-10-10T17:51:29.596Z",
             "legacy": false,
             "metadata": {
-              "code_hash": "9cad30eb26b9",
+              "code_hash": "1d81b3a4d764",
               "dependencies": {
                 "dependencies": [
                   {
@@ -2751,7 +2751,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, read_file_bytes, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") if proc.stderr else \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            error_msg = result.get(\"error\", \"Unknown Docling error\")\n            # Override meta file_path with original_file_path to ensure correct path matching\n            meta = result.get(\"meta\", {})\n            meta[\"file_path\"] = original_file_path\n            return Data(data={\"error\": error_msg, **meta})\n\n        meta = result.get(\"meta\", {})\n        # Override meta file_path with original_file_path to ensure correct path matching\n        # The subprocess returns the temp file path, but we need the original S3/local path for rollup_data\n        meta[\"file_path\"] = original_file_path\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        settings = get_settings_service().settings\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # Read bytes based on storage type\n                try:\n                    if settings.storage_type == \"s3\":\n                        # For S3 storage, use storage service to read file bytes\n                        file_path_str = str(file.path)\n                        content = run_until_complete(read_file_bytes(file_path_str))\n                    else:\n                        # For local storage, read bytes directly from filesystem\n                        content = file.path.read_bytes()\n\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except (OSError, FileNotFoundError) as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # Handle None case - Docling processing failed or returned None\n                if advanced_data is None:\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": \"Docling processing returned no result. Check logs for details.\",\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n\n                # Check for errors first\n                if \"error\" in payload:\n                    error_msg = payload.get(\"error\", \"Unknown error\")\n                    error_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"error\": error_msg,\n                            **{k: v for k, v in payload.items() if k not in (\"error\", \"file_path\")},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [error_data]))\n                    continue\n\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data([file], rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    # Ensure file_path is set for proper rollup matching\n                    if not payload.get(\"file_path\"):\n                        payload[\"file_path\"] = file_path\n                        # Create new Data with file_path\n                        advanced_data = Data(\n                            data=payload,\n                            text=getattr(advanced_data, \"text\", None),\n                        )\n                    final_return.extend(self.rollup_data([file], [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
+                "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n  growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n  utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame  # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n    \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n    display_name = \"Read File\"\n    # description is now a dynamic property - see get_tool_description()\n    _base_description = \"Loads content from one or more files.\"\n    documentation: str = \"https://docs.langflow.org/read-file\"\n    icon = \"file-text\"\n    name = \"File\"\n    add_tool_output = True  # Enable tool mode toggle without requiring tool_mode inputs\n\n    # Extensions that can be processed without Docling (using standard text parsing)\n    TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n    # Extensions that require Docling for processing (images, advanced office formats, etc.)\n    DOCLING_ONLY_EXTENSIONS = [\n        \"adoc\",\n        \"asciidoc\",\n        \"asc\",\n        \"bmp\",\n        \"dotx\",\n        \"dotm\",\n        \"docm\",\n        \"jpg\",\n        \"jpeg\",\n        \"png\",\n        \"potx\",\n        \"ppsx\",\n        \"pptm\",\n        \"potm\",\n        \"ppsm\",\n        \"pptx\",\n        \"tiff\",\n        \"xls\",\n        \"xlsx\",\n        \"xhtml\",\n        \"webp\",\n    ]\n\n    # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n    VALID_EXTENSIONS = [\n        *TEXT_EXTENSIONS,\n        *DOCLING_ONLY_EXTENSIONS,\n    ]\n\n    # Fixed export settings used when markdown export is requested.\n    EXPORT_FORMAT = \"Markdown\"\n    IMAGE_MODE = \"placeholder\"\n\n    _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n    for input_item in _base_inputs:\n        if isinstance(input_item, FileInput) and input_item.name == \"path\":\n            input_item.real_time_refresh = True\n            input_item.tool_mode = False  # Disable tool mode for file upload input\n            input_item.required = False  # Make it optional so it doesn't error in tool mode\n            break\n\n    inputs = [\n        *_base_inputs,\n        StrInput(\n            name=\"file_path_str\",\n            display_name=\"File Path\",\n            info=(\n                \"Path to the file to read. Used when component is called as a tool. \"\n                \"If not provided, will use the uploaded file from 'path' input.\"\n            ),\n            show=False,\n            advanced=True,\n            tool_mode=True,  # Required for Toolset toggle, but _get_tools() ignores this parameter\n            required=False,\n        ),\n        BoolInput(\n            name=\"advanced_mode\",\n            display_name=\"Advanced Parser\",\n            value=False,\n            real_time_refresh=True,\n            info=(\n                \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n                \"Note that advanced document processing can consume significant resources.\"\n            ),\n            show=True,\n        ),\n        DropdownInput(\n            name=\"pipeline\",\n            display_name=\"Pipeline\",\n            info=\"Docling pipeline to use\",\n            options=[\"standard\", \"vlm\"],\n            value=\"standard\",\n            advanced=True,\n            real_time_refresh=True,\n        ),\n        DropdownInput(\n            name=\"ocr_engine\",\n            display_name=\"OCR Engine\",\n            info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n            options=[\"None\", \"easyocr\"],\n            value=\"easyocr\",\n            show=False,\n            advanced=True,\n        ),\n        StrInput(\n            name=\"md_image_placeholder\",\n            display_name=\"Image placeholder\",\n            info=\"Specify the image placeholder for markdown exports.\",\n            value=\"<!-- image -->\",\n            advanced=True,\n            show=False,\n        ),\n        StrInput(\n            name=\"md_page_break_placeholder\",\n            display_name=\"Page break placeholder\",\n            info=\"Add this placeholder between pages in the markdown output.\",\n            value=\"\",\n            advanced=True,\n            show=False,\n        ),\n        MessageTextInput(\n            name=\"doc_key\",\n            display_name=\"Doc Key\",\n            info=\"The key to use for the DoclingDocument column.\",\n            value=\"doc\",\n            advanced=True,\n            show=False,\n        ),\n        # Deprecated input retained for backward-compatibility.\n        BoolInput(\n            name=\"use_multithreading\",\n            display_name=\"[Deprecated] Use Multithreading\",\n            advanced=True,\n            value=True,\n            info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n        ),\n        IntInput(\n            name=\"concurrency_multithreading\",\n            display_name=\"Processing Concurrency\",\n            advanced=True,\n            info=\"When multiple files are being processed, the number of files to process concurrently.\",\n            value=1,\n        ),\n        BoolInput(\n            name=\"markdown\",\n            display_name=\"Markdown Export\",\n            info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n            value=False,\n            show=False,\n        ),\n    ]\n\n    outputs = [\n        Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n    ]\n\n    # ------------------------------ Tool description with file names --------------\n\n    def get_tool_description(self) -> str:\n        \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n        This helps the Agent understand which files are available to read.\n        \"\"\"\n        base_description = \"Loads and returns the content from uploaded files.\"\n\n        # Get the list of uploaded file paths\n        file_paths = getattr(self, \"path\", None)\n        if not file_paths:\n            return base_description\n\n        # Ensure it's a list\n        if not isinstance(file_paths, list):\n            file_paths = [file_paths]\n\n        # Extract just the file names from the paths\n        file_names = []\n        for fp in file_paths:\n            if fp:\n                name = Path(fp).name\n                file_names.append(name)\n\n        if file_names:\n            files_str = \", \".join(file_names)\n            return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n        return base_description\n\n    @property\n    def description(self) -> str:\n        \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n        return self.get_tool_description()\n\n    async def _get_tools(self) -> list:\n        \"\"\"Override to create a tool without parameters.\n\n        The Read File component should use the files already uploaded via UI,\n        not accept file paths from the Agent (which wouldn't know the internal paths).\n        \"\"\"\n        from langchain_core.tools import StructuredTool\n        from pydantic import BaseModel\n\n        # Empty schema - no parameters needed\n        class EmptySchema(BaseModel):\n            \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n        async def read_files_tool() -> str:\n            \"\"\"Read the content of uploaded files.\"\"\"\n            try:\n                result = self.load_files_message()\n                if hasattr(result, \"get_text\"):\n                    return result.get_text()\n                if hasattr(result, \"text\"):\n                    return result.text\n                return str(result)\n            except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n                return f\"Error reading files: {e}\"\n\n        description = self.get_tool_description()\n\n        tool = StructuredTool(\n            name=\"load_files_message\",\n            description=description,\n            coroutine=read_files_tool,\n            args_schema=EmptySchema,\n            handle_tool_error=True,\n            tags=[\"load_files_message\"],\n            metadata={\n                \"display_name\": \"Read File\",\n                \"display_description\": description,\n            },\n        )\n\n        return [tool]\n\n    # ------------------------------ UI helpers --------------------------------------\n\n    def _path_value(self, template: dict) -> list[str]:\n        \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n        return template.get(\"path\", {}).get(\"file_path\", [])\n\n    def update_build_config(\n        self,\n        build_config: dict[str, Any],\n        field_value: Any,\n        field_name: str | None = None,\n    ) -> dict[str, Any]:\n        \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n        if field_name == \"path\":\n            paths = self._path_value(build_config)\n\n            # If all files can be processed by docling, do so\n            allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n            build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n            if not allow_advanced:\n                build_config[\"advanced_mode\"][\"value\"] = False\n                for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                    if f in build_config:\n                        build_config[f][\"show\"] = False\n\n        # Docling Processing\n        elif field_name == \"advanced_mode\":\n            for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n                if f in build_config:\n                    build_config[f][\"show\"] = bool(field_value)\n                    if f == \"pipeline\":\n                        build_config[f][\"advanced\"] = not bool(field_value)\n\n        elif field_name == \"pipeline\":\n            if field_value == \"standard\":\n                build_config[\"ocr_engine\"][\"show\"] = True\n                build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n            else:\n                build_config[\"ocr_engine\"][\"show\"] = False\n                build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n        return build_config\n\n    def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]:  # noqa: ARG002\n        \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n        if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n            return frontend_node\n\n        template = frontend_node.get(\"template\", {})\n        paths = self._path_value(template)\n        if not paths:\n            return frontend_node\n\n        frontend_node[\"outputs\"] = []\n        if len(paths) == 1:\n            file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n            if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Content\",\n                        name=\"dataframe\",\n                        method=\"load_files_structured\",\n                        tool_mode=True,\n                    ),\n                )\n            elif file_path.endswith(\".json\"):\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n                )\n\n            advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n            if advanced_mode:\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Structured Output\",\n                        name=\"advanced_dataframe\",\n                        method=\"load_files_dataframe\",\n                        tool_mode=True,\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(\n                        display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n                    ),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n            else:\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n                )\n                frontend_node[\"outputs\"].append(\n                    Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n                )\n        else:\n            # Multiple files => DataFrame output; advanced parser disabled\n            frontend_node[\"outputs\"].append(\n                Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n            )\n\n        return frontend_node\n\n    # ------------------------------ Core processing ----------------------------------\n\n    def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Override to handle file_path_str input from tool mode.\n\n        When called as a tool, the file_path_str parameter can be set.\n        If not provided, it will fall back to using the path FileInput (uploaded file).\n        Priority:\n        1. file_path_str (if provided by the tool call)\n        2. path (uploaded file from UI)\n        \"\"\"\n        # Check if file_path_str is provided (from tool mode)\n        file_path_str = getattr(self, \"file_path_str\", None)\n        if file_path_str:\n            # Use the string path from tool mode\n            from pathlib import Path\n\n            from lfx.schema.data import Data\n\n            resolved_path = Path(self.resolve_path(file_path_str))\n            if not resolved_path.exists():\n                msg = f\"File or directory not found: {file_path_str}\"\n                self.log(msg)\n                if not self.silent_errors:\n                    raise ValueError(msg)\n                return []\n\n            data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n            return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n        # Otherwise use the default implementation (uses path FileInput)\n        return super()._validate_and_resolve_paths()\n\n    def _is_docling_compatible(self, file_path: str) -> bool:\n        \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n        docling_exts = (\n            \".adoc\",\n            \".asciidoc\",\n            \".asc\",\n            \".bmp\",\n            \".csv\",\n            \".dotx\",\n            \".dotm\",\n            \".docm\",\n            \".docx\",\n            \".htm\",\n            \".html\",\n            \".jpg\",\n            \".jpeg\",\n            \".json\",\n            \".md\",\n            \".pdf\",\n            \".png\",\n            \".potx\",\n            \".ppsx\",\n            \".pptm\",\n            \".potm\",\n            \".ppsm\",\n            \".pptx\",\n            \".tiff\",\n            \".txt\",\n            \".xls\",\n            \".xlsx\",\n            \".xhtml\",\n            \".xml\",\n            \".webp\",\n        )\n        return file_path.lower().endswith(docling_exts)\n\n    async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n        \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n        Args:\n            file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n        Returns:\n            tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n                              if this is a temporary file that should be cleaned up\n        \"\"\"\n        settings = get_settings_service().settings\n        if settings.storage_type == \"local\":\n            return file_path, False\n\n        # S3 storage - download to temp file\n        parsed = parse_storage_path(file_path)\n        if not parsed:\n            msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n            raise ValueError(msg)\n\n        storage_service = get_storage_service()\n        flow_id, filename = parsed\n\n        # Get file content from S3\n        content = await storage_service.get_file(flow_id, filename)\n\n        suffix = Path(filename).suffix\n        with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n            tmp_file.write(content)\n            temp_path = tmp_file.name\n\n        return temp_path, True\n\n    def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n        \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n        We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n        passing JSON config via stdin. The child prints a JSON result to stdout.\n\n        For S3 storage, the file is downloaded to a temp file first.\n        \"\"\"\n        if not file_path:\n            return None\n\n        settings = get_settings_service().settings\n        if settings.storage_type == \"s3\":\n            local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n        else:\n            local_path = file_path\n            should_delete = False\n\n        try:\n            return self._process_docling_subprocess_impl(local_path, file_path)\n        finally:\n            # Clean up temp file if we created one\n            if should_delete:\n                with contextlib.suppress(Exception):\n                    Path(local_path).unlink()  # Ignore cleanup errors\n\n    def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n        \"\"\"Implementation of Docling subprocess processing.\n\n        Args:\n            local_file_path: Path to local file to process\n            original_file_path: Original file path to include in metadata\n        Returns:\n            Data object with processed content\n        \"\"\"\n        args: dict[str, Any] = {\n            \"file_path\": local_file_path,\n            \"markdown\": bool(self.markdown),\n            \"image_mode\": str(self.IMAGE_MODE),\n            \"md_image_placeholder\": str(self.md_image_placeholder),\n            \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n            \"pipeline\": str(self.pipeline),\n            \"ocr_engine\": (\n                self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n            ),\n        }\n\n        self.log(f\"Starting Docling subprocess for file: {local_file_path}\")\n        self.log(args)\n\n        # Child script for isolating the docling processing\n        child_script = textwrap.dedent(\n            r\"\"\"\n            import json, sys\n\n            def try_imports():\n                try:\n                    from docling.datamodel.base_models import ConversionStatus, InputFormat  # type: ignore\n                    from docling.document_converter import DocumentConverter  # type: ignore\n                    from docling_core.types.doc import ImageRefMode  # type: ignore\n                    return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n                except Exception as e:\n                    raise e\n\n            def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n                # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n                if pipeline == \"standard\":\n                    try:\n                        from docling.datamodel.pipeline_options import PdfPipelineOptions  # type: ignore\n                        from docling.document_converter import PdfFormatOption  # type: ignore\n\n                        pipe = PdfPipelineOptions()\n                        pipe.do_ocr = False\n\n                        if ocr_engine:\n                            try:\n                                from docling.models.factories import get_ocr_factory  # type: ignore\n                                pipe.do_ocr = True\n                                fac = get_ocr_factory(allow_external_plugins=False)\n                                pipe.ocr_options = fac.create_options(kind=ocr_engine)\n                            except Exception:\n                                # If OCR setup fails, disable it\n                                pipe.do_ocr = False\n\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception:\n                        return DocumentConverter()\n\n                # --- Vision-Language Model (VLM) pipeline ---\n                if pipeline == \"vlm\":\n                    try:\n                        from docling.datamodel.pipeline_options import VlmPipelineOptions\n                        from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n                        from docling.document_converter import PdfFormatOption\n                        from docling.pipeline.vlm_pipeline import VlmPipeline\n\n                        vl_pipe = VlmPipelineOptions(\n                            vlm_options=GRANITEDOCLING_TRANSFORMERS,\n                        )\n\n                        if sys.platform == \"darwin\":\n                            try:\n                                import mlx_vlm\n                                vl_pipe.vlm_options = GRANITEDOCLING_MLX\n                            except ImportError as e:\n                                raise e\n\n                        # VLM paths generally don't need OCR; keep OCR off by default here.\n                        fmt = {}\n                        if hasattr(input_format, \"PDF\"):\n                            fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n                        if hasattr(input_format, \"IMAGE\"):\n                            fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n                            pipeline_cls=VlmPipeline,\n                            pipeline_options=vl_pipe\n                        )\n\n                        return DocumentConverter(format_options=fmt)\n                    except Exception as e:\n                        raise e\n\n                # --- Fallback: default converter with no special options ---\n                return DocumentConverter()\n\n            def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n                try:\n                    mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n                    return document.export_to_markdown(\n                        image_mode=mode,\n                        image_placeholder=img_ph,\n                        page_break_placeholder=pg_ph,\n                    )\n                except Exception:\n                    try:\n                        return document.export_to_text()\n                    except Exception:\n                        return str(document)\n\n            def to_rows(doc_dict):\n                rows = []\n                for t in doc_dict.get(\"texts\", []):\n                    prov = t.get(\"prov\") or []\n                    page_no = None\n                    if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n                        page_no = prov[0].get(\"page_no\")\n                    rows.append({\n                        \"page_no\": page_no,\n                        \"label\": t.get(\"label\"),\n                        \"text\": t.get(\"text\"),\n                        \"level\": t.get(\"level\"),\n                    })\n                return rows\n\n            def main():\n                cfg = json.loads(sys.stdin.read())\n                file_path = cfg[\"file_path\"]\n                markdown = cfg[\"markdown\"]\n                image_mode = cfg[\"image_mode\"]\n                img_ph = cfg[\"md_image_placeholder\"]\n                pg_ph = cfg[\"md_page_break_placeholder\"]\n                pipeline = cfg[\"pipeline\"]\n                ocr_engine = cfg.get(\"ocr_engine\")\n                meta = {\"file_path\": file_path}\n\n                try:\n                    ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n                    converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n                    try:\n                        res = converter.convert(file_path)\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n                        return\n\n                    ok = False\n                    if hasattr(res, \"status\"):\n                        try:\n                            ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n                        except Exception:\n                            ok = (str(res.status).lower() == \"success\")\n                    if not ok and hasattr(res, \"document\"):\n                        ok = getattr(res, \"document\", None) is not None\n                    if not ok:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n                        return\n\n                    doc = getattr(res, \"document\", None)\n                    if doc is None:\n                        print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n                        return\n\n                    if markdown:\n                        text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n                        print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n                        return\n\n                    # structured\n                    try:\n                        doc_dict = doc.export_to_dict()\n                    except Exception as e:\n                        print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n                        return\n\n                    rows = to_rows(doc_dict)\n                    print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n                except Exception as e:\n                    print(\n                        json.dumps({\n                            \"ok\": False,\n                            \"error\": f\"Docling processing error: {e}\",\n                            \"meta\": {\"file_path\": file_path},\n                        })\n                    )\n\n            if __name__ == \"__main__\":\n                main()\n            \"\"\"\n        )\n\n        # Validate file_path to avoid command injection or unsafe input\n        if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n            return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n        proc = subprocess.run(  # noqa: S603\n            [sys.executable, \"-u\", \"-c\", child_script],\n            input=json.dumps(args).encode(\"utf-8\"),\n            capture_output=True,\n            check=False,\n        )\n\n        if not proc.stdout:\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") or \"no output from child process\"\n            return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n        try:\n            result = json.loads(proc.stdout.decode(\"utf-8\"))\n        except Exception as e:  # noqa: BLE001\n            err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n            return Data(\n                data={\n                    \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n                    \"file_path\": original_file_path,\n                },\n            )\n\n        if not result.get(\"ok\"):\n            return Data(data={\"error\": result.get(\"error\", \"Unknown Docling error\"), **result.get(\"meta\", {})})\n\n        meta = result.get(\"meta\", {})\n        if result.get(\"mode\") == \"markdown\":\n            exported_content = str(result.get(\"text\", \"\"))\n            return Data(\n                text=exported_content,\n                data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n            )\n\n        rows = list(result.get(\"doc\", []))\n        return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n    def process_files(\n        self,\n        file_list: list[BaseFileComponent.BaseFile],\n    ) -> list[BaseFileComponent.BaseFile]:\n        \"\"\"Process input files.\n\n        - advanced_mode => Docling in a separate process.\n        - Otherwise => standard parsing in current process (optionally threaded).\n        \"\"\"\n        if not file_list:\n            msg = \"No files to process.\"\n            raise ValueError(msg)\n\n        # Validate image files to detect content/extension mismatches\n        # This prevents API errors like \"Image does not match the provided media type\"\n        image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n        for file in file_list:\n            extension = file.path.suffix[1:].lower()\n            if extension in image_extensions:\n                # file.path is already resolved, read bytes directly\n                try:\n                    content = file.path.read_bytes()\n                    is_valid, error_msg = validate_image_content_type(\n                        str(file.path),\n                        content=content,\n                    )\n                    if not is_valid:\n                        self.log(error_msg)\n                        if not self.silent_errors:\n                            raise ValueError(error_msg)\n                except OSError as e:\n                    self.log(f\"Could not read file for validation: {e}\")\n                    # Continue - let it fail later with better error\n\n        # Validate that files requiring Docling are only processed when advanced mode is enabled\n        if not self.advanced_mode:\n            for file in file_list:\n                extension = file.path.suffix[1:].lower()\n                if extension in self.DOCLING_ONLY_EXTENSIONS:\n                    msg = (\n                        f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n                        f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n                    )\n                    self.log(msg)\n                    raise ValueError(msg)\n\n        def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n            try:\n                return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n            except FileNotFoundError as e:\n                self.log(f\"File not found: {file_path}. Error: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n            except Exception as e:\n                self.log(f\"Unexpected error processing {file_path}: {e}\")\n                if not silent_errors:\n                    raise\n                return None\n\n        docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n        # Advanced path: Check if ALL files are compatible with Docling\n        if self.advanced_mode and docling_compatible:\n            final_return: list[BaseFileComponent.BaseFile] = []\n            for file in file_list:\n                file_path = str(file.path)\n                advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n                # --- UNNEST: expand each element in `doc` to its own Data row\n                payload = getattr(advanced_data, \"data\", {}) or {}\n                doc_rows = payload.get(\"doc\")\n                if isinstance(doc_rows, list) and doc_rows:\n                    # Non-empty list of structured rows\n                    rows: list[Data | None] = [\n                        Data(\n                            data={\n                                \"file_path\": file_path,\n                                **(item if isinstance(item, dict) else {\"value\": item}),\n                            },\n                        )\n                        for item in doc_rows\n                    ]\n                    final_return.extend(self.rollup_data(file_list, rows))\n                elif isinstance(doc_rows, list) and not doc_rows:\n                    # Empty list - file was processed but no text content found\n                    # Create a Data object indicating no content was extracted\n                    self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n                    empty_data = Data(\n                        data={\n                            \"file_path\": file_path,\n                            \"text\": \"(No text content extracted from image)\",\n                            \"info\": \"Image processed successfully but contained no extractable text\",\n                            **{k: v for k, v in payload.items() if k != \"doc\"},\n                        },\n                    )\n                    final_return.extend(self.rollup_data([file], [empty_data]))\n                else:\n                    # If not structured, keep as-is (e.g., markdown export or error dict)\n                    final_return.extend(self.rollup_data(file_list, [advanced_data]))\n            return final_return\n\n        # Standard multi-file (or single non-advanced) path\n        concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n        file_paths = [str(f.path) for f in file_list]\n        self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n        my_data = parallel_load_data(\n            file_paths,\n            silent_errors=self.silent_errors,\n            load_function=process_file_standard,\n            max_concurrency=concurrency,\n        )\n        return self.rollup_data(file_list, my_data)\n\n    # ------------------------------ Output helpers -----------------------------------\n\n    def load_files_helper(self) -> DataFrame:\n        result = self.load_files()\n\n        # Result is a DataFrame - check if it has any rows\n        if result.empty:\n            msg = \"Could not extract content from the provided file(s).\"\n            raise ValueError(msg)\n\n        # Check for error column with error messages\n        if \"error\" in result.columns:\n            errors = result[\"error\"].dropna().tolist()\n            if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n                raise ValueError(errors[0])\n\n        return result\n\n    def load_files_dataframe(self) -> DataFrame:\n        \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n        self.markdown = False\n        return self.load_files_helper()\n\n    def load_files_markdown(self) -> Message:\n        \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n        self.markdown = True\n        result = self.load_files_helper()\n\n        # Result is a DataFrame - check for text or exported_content columns\n        if \"text\" in result.columns and not result[\"text\"].isna().all():\n            text_values = result[\"text\"].dropna().tolist()\n            if text_values:\n                return Message(text=str(text_values[0]))\n\n        if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n            content_values = result[\"exported_content\"].dropna().tolist()\n            if content_values:\n                return Message(text=str(content_values[0]))\n\n        # Return empty message with info that no text was found\n        return Message(text=\"(No text content extracted from file)\")\n"
               },
               "concurrency_multithreading": {
                 "_input_type": "IntInput",
diff --git a/src/lfx/pyproject.toml b/src/lfx/pyproject.toml
index 0c59307455f4..bd145afa9562 100644
--- a/src/lfx/pyproject.toml
+++ b/src/lfx/pyproject.toml
@@ -78,6 +78,13 @@ dev = [
     "pytest-cov>=7.0.0",
     "ruff>=0.9.10",
 ]
+integration = [
+    # Dependencies for running Simple Agent and other integration tests
+    "langchain-openai>=0.3.0,<1.0.0",
+    "langchain-community>=0.3.0,<1.0.0",
+    "beautifulsoup4>=4.12.0",
+    "lxml>=5.0.0",
+]
 
 [tool.coverage.run]
 branch = true
diff --git a/src/lfx/src/lfx/cli/commands.py b/src/lfx/src/lfx/cli/commands.py
index 59b8cc041fcf..71429badfa61 100644
--- a/src/lfx/src/lfx/cli/commands.py
+++ b/src/lfx/src/lfx/cli/commands.py
@@ -303,6 +303,9 @@ async def serve_command(
         console.print()
 
         # Start the server
+        # Use uvicorn.Server to properly handle async context
+        # uvicorn.run() uses asyncio.run() internally which fails when
+        # an event loop is already running (due to syncify decorator)
         try:
             config = uvicorn.Config(
                 serve_app,
diff --git a/src/lfx/tests/integration/__init__.py b/src/lfx/tests/integration/__init__.py
new file mode 100644
index 000000000000..f16cefa553d9
--- /dev/null
+++ b/src/lfx/tests/integration/__init__.py
@@ -0,0 +1 @@
+# Integration tests for lfx
diff --git a/src/lfx/tests/integration/cli/__init__.py b/src/lfx/tests/integration/cli/__init__.py
new file mode 100644
index 000000000000..05d8efae886d
--- /dev/null
+++ b/src/lfx/tests/integration/cli/__init__.py
@@ -0,0 +1 @@
+# CLI integration tests
diff --git a/src/lfx/tests/integration/cli/test_simple_agent_integration.py b/src/lfx/tests/integration/cli/test_simple_agent_integration.py
new file mode 100644
index 000000000000..f194fc202d07
--- /dev/null
+++ b/src/lfx/tests/integration/cli/test_simple_agent_integration.py
@@ -0,0 +1,436 @@
+"""Integration tests for lfx CLI with Simple Agent flow.
+
+These tests verify that the lfx CLI can properly load and execute the
+Simple Agent starter project, addressing the bug where lfx serve/run
+commands fail with module resolution errors.
+
+Requirements:
+- OPENAI_API_KEY environment variable must be set for execution tests
+- Integration dependencies must be installed (use: uv sync --group integration)
+
+Note on version compatibility:
+- lfx requires langchain-core>=0.3.66,<1.0.0
+- langchain-openai 1.x requires langchain-core 1.x which is incompatible
+- When installing langchain-openai, use: langchain-openai>=0.3.0,<1.0.0
+"""
+
+import importlib.util
+import json
+import os
+import re
+import select
+import signal
+
+# Find a free port for testing
+import socket
+import subprocess
+import sys
+import time
+import urllib.request
+from pathlib import Path
+
+import pytest
+from lfx.__main__ import app as lfx_app
+from typer.testing import CliRunner
+
+runner = CliRunner()
+
+
+def has_integration_deps() -> bool:
+    """Check if integration dependencies are installed."""
+    required_modules = ["langchain_openai", "langchain_community", "bs4", "lxml"]
+    return all(importlib.util.find_spec(module) is not None for module in required_modules)
+
+
+# Skip all tests in this module if integration deps are not installed
+pytestmark = pytest.mark.skipif(
+    not has_integration_deps(),
+    reason="Integration dependencies not installed. Run: uv sync --group integration",
+)
+
+
+def get_starter_projects_path() -> Path:
+    """Get path to starter projects directory."""
+    test_file_path = Path(__file__).resolve()
+    current = test_file_path.parent
+    while current != current.parent:
+        starter_path = current / "src" / "backend" / "base" / "langflow" / "initial_setup" / "starter_projects"
+        if starter_path.exists():
+            return starter_path
+        current = current.parent
+    # Return an empty Path() if not found
+    return Path()
+
+
+def get_simple_agent_flow_path() -> Path:
+    """Get path to Simple Agent starter project."""
+    return get_starter_projects_path() / "Simple Agent.json"
+
+
+def has_openai_api_key() -> bool:
+    """Check if OPENAI_API_KEY is set."""
+    key = os.getenv("OPENAI_API_KEY", "")
+    return bool(key) and key != "dummy" and len(key) > 10
+
+
+def parse_json_from_output(output: str, context: str = "output") -> dict:
+    """Parse JSON from command output, searching in reverse if direct parsing fails.
+
+    Args:
+        output: The command output string to parse.
+        context: Description of the output source for error messages.
+
+    Returns:
+        The parsed JSON as a dictionary.
+
+    Raises:
+        pytest.fail: If no valid JSON is found in the output.
+    """
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError:
+        lines = output.split("\n")
+        for line in reversed(lines):
+            try:
+                return json.loads(line)
+            except json.JSONDecodeError:
+                continue
+        pytest.fail(f"No valid JSON in {context}: {output}")
+
+
+class TestSimpleAgentFlowLoading:
+    """Test that Simple Agent flow can be loaded without errors."""
+
+    @pytest.fixture
+    def simple_agent_flow_path(self) -> Path:
+        """Get Simple Agent flow path, skip if not found."""
+        path = get_simple_agent_flow_path()
+        if not path.exists():
+            pytest.skip(f"Simple Agent flow not found at {path}")
+        return path
+
+    def test_simple_agent_flow_loads_via_cli(self, simple_agent_flow_path: Path):
+        """Test that lfx run can load the Simple Agent flow without critical errors."""
+        result = runner.invoke(
+            lfx_app,
+            ["run", "--verbose", "--no-check-variables", str(simple_agent_flow_path), "test input"],
+        )
+
+        output = result.output
+
+        # These are the critical errors that indicate structural problems
+        critical_errors = [
+            "No module named 'lfx.components",
+            "No module named 'langflow",
+            "'NoneType' object has no attribute 'resolve_component_path'",
+            "Error creating class. ModuleNotFoundError",
+        ]
+
+        for error in critical_errors:
+            assert error not in output, f"Critical error found: {error}\nFull output:\n{output}"
+
+    def test_simple_agent_flow_loads_directly(self, simple_agent_flow_path: Path):
+        """Test that Simple Agent flow loads correctly using load_flow_from_json."""
+        from lfx.load import load_flow_from_json
+
+        try:
+            graph = load_flow_from_json(simple_agent_flow_path, disable_logs=True)
+            assert graph is not None, "Graph should not be None"
+            assert hasattr(graph, "vertices"), "Graph should have vertices"
+            assert len(graph.vertices) > 0, "Graph should have at least one vertex"
+
+            # Prepare the graph
+            graph.prepare()
+
+            # Verify Agent component is in the graph
+            component_types = {v.display_name for v in graph.vertices if hasattr(v, "display_name")}
+            assert "Agent" in component_types or any("Agent" in ct for ct in component_types), (
+                f"Expected Agent in graph, found: {component_types}"
+            )
+
+        except ModuleNotFoundError as e:
+            pytest.fail(f"ModuleNotFoundError loading graph: {e}")
+        except Exception as e:
+            if "resolve_component_path" in str(e):
+                pytest.fail(f"Storage service error: {e}")
+            raise
+
+    def test_simple_agent_flow_json_output(self, simple_agent_flow_path: Path):
+        """Test that lfx run produces valid JSON output."""
+        result = runner.invoke(
+            lfx_app,
+            ["run", "--format", "json", "--no-check-variables", str(simple_agent_flow_path), "test"],
+        )
+
+        # Output should contain valid JSON
+        output_json = parse_json_from_output(result.output.strip())
+        assert isinstance(output_json, dict), "Output should be a JSON object"
+
+
+class TestSimpleAgentExecution:
+    """Test that Simple Agent flow can actually execute with real API key."""
+
+    @pytest.fixture
+    def simple_agent_flow_path(self) -> Path:
+        """Get Simple Agent flow path, skip if not found."""
+        path = get_simple_agent_flow_path()
+        if not path.exists():
+            pytest.skip(f"Simple Agent flow not found at {path}")
+        return path
+
+    @pytest.mark.skipif(not has_openai_api_key(), reason="OPENAI_API_KEY required")
+    def test_simple_agent_executes_successfully(self, simple_agent_flow_path: Path):
+        """Test full execution of Simple Agent with real API key.
+
+        This test verifies that the Simple Agent flow executes successfully
+        and returns a valid response.
+        """
+        result = subprocess.run(  # noqa: S603
+            [
+                sys.executable,
+                "-m",
+                "lfx",
+                "run",
+                "--format",
+                "json",
+                str(simple_agent_flow_path),
+                "What is 2 + 2?",
+            ],
+            capture_output=True,
+            text=True,
+            timeout=120,
+            check=False,
+            env={**os.environ},
+        )
+
+        # Parse output
+        output = result.stdout.strip() or result.stderr.strip()
+        output_json = parse_json_from_output(output, context=f"stdout: {result.stdout}\nstderr: {result.stderr}")
+
+        # Assert successful execution
+        assert output_json.get("success") is True, f"Execution failed: {output_json}"
+        assert "result" in output_json, f"No result in output: {output_json}"
+        # Verify we got a meaningful response
+        result_text = str(output_json.get("result", ""))
+        assert len(result_text) > 0, "Result should not be empty"
+
+    @pytest.mark.skipif(not has_openai_api_key(), reason="OPENAI_API_KEY required")
+    def test_simple_agent_with_math_question(self, simple_agent_flow_path: Path):
+        """Test Simple Agent can use Calculator tool."""
+        result = subprocess.run(  # noqa: S603
+            [
+                sys.executable,
+                "-m",
+                "lfx",
+                "run",
+                "--format",
+                "json",
+                str(simple_agent_flow_path),
+                "Calculate 15 multiplied by 7",
+            ],
+            capture_output=True,
+            text=True,
+            timeout=120,
+            check=False,
+            env={**os.environ},
+        )
+
+        output = result.stdout.strip() or result.stderr.strip()
+        output_json = parse_json_from_output(output)
+
+        if output_json.get("success"):
+            result_text = str(output_json.get("result", ""))
+            # The agent should compute 15 * 7 = 105
+            assert "105" in result_text, f"Expected 105 in result: {result_text}"
+
+
+class TestSimpleAgentServe:
+    """Test that Simple Agent can be served."""
+
+    @pytest.fixture
+    def simple_agent_flow_path(self) -> Path:
+        """Get Simple Agent flow path, skip if not found."""
+        path = get_simple_agent_flow_path()
+        if not path.exists():
+            pytest.skip(f"Simple Agent flow not found at {path}")
+        return path
+
+    def test_serve_help(self):
+        """Test serve help command works."""
+        result = runner.invoke(lfx_app, ["serve", "--help"])
+        assert result.exit_code == 0
+        assert "serve" in result.output.lower() or "Serve" in result.output
+
+    def test_serve_requires_api_key(self, simple_agent_flow_path: Path, monkeypatch):
+        """Test serve requires LANGFLOW_API_KEY."""
+        monkeypatch.delenv("LANGFLOW_API_KEY", raising=False)
+
+        result = runner.invoke(
+            lfx_app,
+            ["serve", str(simple_agent_flow_path)],
+        )
+
+        # Should fail or warn about API key
+        assert result.exit_code != 0 or "LANGFLOW_API_KEY" in result.output
+
+    def test_serve_loads_flow(self, simple_agent_flow_path: Path):
+        """Test serve can load the flow without module errors.
+
+        Note: We test graph loading directly instead of invoking the serve command
+        because the serve command now properly starts a server that runs indefinitely.
+        """
+        from lfx.load import load_flow_from_json
+
+        try:
+            graph = load_flow_from_json(simple_agent_flow_path, disable_logs=True)
+            assert graph is not None, "Graph should not be None"
+            graph.prepare()
+        except ModuleNotFoundError as e:
+            pytest.fail(f"ModuleNotFoundError loading graph for serve: {e}")
+        except Exception as e:
+            if "resolve_component_path" in str(e):
+                pytest.fail(f"Storage service error: {e}")
+            raise
+
+    def test_serve_starts_server_no_asyncio_error(self, simple_agent_flow_path: Path):
+        """Regression test: lfx serve should not fail with asyncio error.
+
+        This test verifies the fix for the issue where lfx serve failed with:
+        'asyncio.run() cannot be called from a running event loop'
+
+        The fix was to use uvicorn.Server with await server.serve() instead of
+        uvicorn.run() which internally calls asyncio.run().
+        """
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("127.0.0.1", 0))
+            test_port = s.getsockname()[1]
+
+        # Start serve in a subprocess with unbuffered output on a specific port
+        proc = subprocess.Popen(  # noqa: S603
+            [
+                sys.executable,
+                "-u",  # Unbuffered output
+                "-m",
+                "lfx",
+                "serve",
+                "--verbose",
+                "--port",
+                str(test_port),
+                str(simple_agent_flow_path),
+            ],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            env={**os.environ, "LANGFLOW_API_KEY": "test-key-12345"},  # pragma: allowlist secret
+        )
+
+        server_ready = False
+        output_chunks = []
+        timeout = 15  # seconds
+        start_time = time.time()
+        actual_port = test_port
+
+        try:
+            while time.time() - start_time < timeout:
+                # Check if process exited
+                exit_code = proc.poll()
+                if exit_code is not None:
+                    # Process exited - read remaining output
+                    if proc.stdout:
+                        remaining = proc.stdout.read()
+                        if remaining:
+                            output_chunks.append(remaining)
+                    output = "".join(output_chunks)
+
+                    # Check for the specific asyncio errors we're regression testing
+                    if "asyncio.run() cannot be called from a running event loop" in output:
+                        pytest.fail(f"Regression: lfx serve failed with asyncio error.\nOutput:\n{output}")
+
+                    if "coroutine 'Server.serve' was never awaited" in output:
+                        pytest.fail(f"Regression: Server.serve coroutine was never awaited.\nOutput:\n{output}")
+
+                    # Process exited for another reason
+                    pytest.fail(f"Server process exited with code {exit_code}.\nOutput:\n{output}")
+
+                # Try to read available output without blocking (Unix only)
+                if proc.stdout:
+                    try:
+                        ready, _, _ = select.select([proc.stdout], [], [], 0.1)
+                        if ready:
+                            chunk = proc.stdout.readline()
+                            if chunk:
+                                output_chunks.append(chunk)
+                                # Check if server switched to a different port
+                                if "using port" in chunk.lower():
+                                    port_match = re.search(r"port (\d+)", chunk)
+                                    if port_match:
+                                        actual_port = int(port_match.group(1))
+                    except (ValueError, OSError):
+                        pass
+
+                # Try to connect to server on actual port
+                try:
+                    urllib.request.urlopen(f"http://127.0.0.1:{actual_port}/docs", timeout=1)
+                    server_ready = True
+                    break
+                except Exception:
+                    time.sleep(0.3)
+
+            if not server_ready:
+                output = "".join(output_chunks)
+                pytest.fail(f"Server did not become ready within {timeout}s.\nOutput:\n{output}")
+
+        finally:
+            # Clean up - terminate the server
+            if proc.poll() is None:
+                proc.send_signal(signal.SIGTERM)
+                try:
+                    proc.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+                    proc.wait()
+
+
+class TestAllStarterProjectsLoad:
+    """Test that all starter projects can load without lfx-specific module errors."""
+
+    @pytest.fixture
+    def starter_projects_path(self) -> Path:
+        """Get starter projects path."""
+        path = get_starter_projects_path()
+        if not path.exists():
+            pytest.skip(f"Starter projects not found at {path}")
+        return path
+
+    def test_all_projects_load(self, starter_projects_path: Path):
+        """Test all starter project JSONs can load without lfx-specific errors.
+
+        Note: This test only fails on lfx-specific module errors (lfx.components.*),
+        not on missing external dependencies (langchain_anthropic, etc.) which are
+        expected when running with minimal dev dependencies.
+        """
+        from lfx.load import load_flow_from_json
+
+        json_files = list(starter_projects_path.glob("*.json"))
+        assert len(json_files) > 0, "No starter project files found"
+
+        lfx_module_errors = []
+
+        for json_file in json_files:
+            try:
+                graph = load_flow_from_json(json_file, disable_logs=True)
+                assert graph is not None
+                graph.prepare()
+            except Exception as e:
+                error_str = str(e)
+                # Only track lfx-specific errors, not external dependency errors
+                if "No module named 'lfx." in error_str:
+                    lfx_module_errors.append((json_file.name, str(e)))
+                elif "resolve_component_path" in error_str:
+                    lfx_module_errors.append((json_file.name, f"Storage error: {e}"))
+                # External dependency errors (langchain_anthropic, etc.) are acceptable
+                # as lfx is designed to work with minimal dependencies
+
+        if lfx_module_errors:
+            error_details = "\n".join([f"  {name}: {error}" for name, error in lfx_module_errors])
+            pytest.fail(f"LFX module errors in starter projects:\n{error_details}")
diff --git a/uv.lock b/uv.lock
index 90a1afc8f593..620210b0a74e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -6443,6 +6443,12 @@ dev = [
     { name = "pytest-cov" },
     { name = "ruff" },
 ]
+integration = [
+    { name = "beautifulsoup4" },
+    { name = "langchain-community" },
+    { name = "langchain-openai" },
+    { name = "lxml" },
+]
 
 [package.metadata]
 requires-dist = [
@@ -6492,6 +6498,12 @@ dev = [
     { name = "pytest-cov", specifier = ">=7.0.0" },
     { name = "ruff", specifier = ">=0.9.10" },
 ]
+integration = [
+    { name = "beautifulsoup4", specifier = ">=4.12.0" },
+    { name = "langchain-community", specifier = ">=0.3.0,<1.0.0" },
+    { name = "langchain-openai", specifier = ">=0.3.0,<1.0.0" },
+    { name = "lxml", specifier = ">=5.0.0" },
+]
 
 [[package]]
 name = "libcst"