diff --git a/.env.example b/.env.example index 9ea7727c1192..54eacd3faa03 100644 --- a/.env.example +++ b/.env.example @@ -116,6 +116,21 @@ LANGFLOW_SUPERUSER= # Example: LANGFLOW_SUPERUSER_PASSWORD=123456 LANGFLOW_SUPERUSER_PASSWORD= +# API Key Source +# Controls how API keys are validated for the x-api-key header +# Values: db, env +# - db (default): Validates against API keys stored in the database +# - env: Validates against the LANGFLOW_API_KEY environment variable +# Example: LANGFLOW_API_KEY_SOURCE=db +LANGFLOW_API_KEY_SOURCE= + +# API Key (only used when LANGFLOW_API_KEY_SOURCE=env) +# The API key to use for authentication when API_KEY_SOURCE is set to 'env' +# This allows injecting a pre-defined API key via environment variables +# (useful for Kubernetes Secrets, CI/CD pipelines, etc.) +# Example: LANGFLOW_API_KEY=your-secure-api-key +LANGFLOW_API_KEY= + # Should store environment variables in the database # Values: true, false LANGFLOW_STORE_ENVIRONMENT_VARIABLES= diff --git a/.github/workflows/cross-platform-test.yml b/.github/workflows/cross-platform-test.yml index 0d56b2ec6183..b84174ffd6e0 100644 --- a/.github/workflows/cross-platform-test.yml +++ b/.github/workflows/cross-platform-test.yml @@ -189,7 +189,7 @@ jobs: find ./base-dist -name "*.whl" -type f WHEEL_FILE=$(find ./base-dist -name "*.whl" -type f | head -1) if [ -n "$WHEEL_FILE" ]; then - uv pip install --python ./test-env/Scripts/python.exe "$WHEEL_FILE" + uv pip install --prerelease=allow --python ./test-env/Scripts/python.exe "$WHEEL_FILE" else echo "No wheel file found in ./base-dist/" exit 1 @@ -231,7 +231,7 @@ jobs: find ./base-dist -name "*.whl" -type f WHEEL_FILE=$(find ./base-dist -name "*.whl" -type f | head -1) if [ -n "$WHEEL_FILE" ]; then - uv pip install --python ./test-env/bin/python "$WHEEL_FILE" + uv pip install --prerelease=allow --python ./test-env/bin/python "$WHEEL_FILE" else echo "No wheel file found in ./base-dist/" exit 1 @@ -475,7 +475,7 @@ jobs: find ./base-dist -name "*.whl" -type f WHEEL_FILE=$(find ./base-dist -name "*.whl" -type f | head -1) if [ -n "$WHEEL_FILE" ]; then - uv pip install --python ./test-env/Scripts/python.exe "$WHEEL_FILE" + uv pip install --prerelease=allow --python ./test-env/Scripts/python.exe "$WHEEL_FILE" else echo "No wheel file found in ./base-dist/" exit 1 @@ -489,7 +489,7 @@ jobs: find ./main-dist -name "*.whl" -type f WHEEL_FILE=$(find ./main-dist -name "*.whl" -type f | head -1) if [ -n "$WHEEL_FILE" ]; then - uv pip install --python ./test-env/Scripts/python.exe "$WHEEL_FILE" + uv pip install --prerelease=allow --python ./test-env/Scripts/python.exe "$WHEEL_FILE" else echo "No wheel file found in ./main-dist/" exit 1 @@ -517,7 +517,7 @@ jobs: find ./base-dist -name "*.whl" -type f WHEEL_FILE=$(find ./base-dist -name "*.whl" -type f | head -1) if [ -n "$WHEEL_FILE" ]; then - uv pip install --python ./test-env/bin/python "$WHEEL_FILE" + uv pip install --prerelease=allow --python ./test-env/bin/python "$WHEEL_FILE" else echo "No wheel file found in ./base-dist/" exit 1 diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml index 81c5a31fd920..2e9e95de6f39 100644 --- a/.github/workflows/nightly_build.yml +++ b/.github/workflows/nightly_build.yml @@ -128,7 +128,6 @@ jobs: uv lock cd src/backend/base && uv lock && cd ../../.. - cd src/lfx && uv lock && cd ../.. git add pyproject.toml src/backend/base/pyproject.toml src/lfx/pyproject.toml uv.lock src/backend/base/uv.lock git commit -m "Update version and project name" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a5922f36bf7f..8634112c96a4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -227,7 +227,8 @@ jobs: - name: Check if pre-release if: inputs.pre_release run: | - version=$(uv tree | grep 'langflow' | grep -v 'langflow-base' | awk '{print $2}' | sed 's/^v//') + version="${{ inputs.release_tag }}" + echo "$version" if [[ "${version}" =~ ^([0-9]+\.)?([0-9]+\.)?[0-9]+((a|b|rc|dev|post)([0-9]+))$ ]]; then echo "Pre-release version detected. Continuing with the release." else diff --git a/.github/workflows/release_nightly.yml b/.github/workflows/release_nightly.yml index 653f6520b569..e41e8c898c4a 100644 --- a/.github/workflows/release_nightly.yml +++ b/.github/workflows/release_nightly.yml @@ -163,7 +163,7 @@ jobs: run: | rm -rf src/backend/base/dist rm -rf dist - make build base=true args="--wheel" + make build base=true args="--no-sources --wheel" - name: Test Langflow Base CLI run: | diff --git a/.secrets.baseline b/.secrets.baseline index 0b9b3eee7692..1e8d79fd543d 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -755,6 +755,122 @@ "is_secret": false } ], + "src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json", + "hashed_secret": "377e839f86c1529c656c82599fb225b4a1261ed5", + "is_verified": false, + "line_number": 731, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 1210, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json", + "hashed_secret": "1be2449adf6092e0729be455a98c93034cc90bc8", + "is_verified": false, + "line_number": 209, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json", + "hashed_secret": "7881caec48fc330c8cde89fb096ae27690c8d8a9", + "is_verified": false, + "line_number": 883, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json", + "hashed_secret": "b223275895a74015ca0555983d6e9685efdb03fe", + "is_verified": false, + "line_number": 201, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 934, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Research Translation Loop.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Research Translation Loop.json", + "hashed_secret": "abb09440424b40c661e344d4a61e560975620221", + "is_verified": false, + "line_number": 987, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Research Translation Loop.json", + "hashed_secret": "e66321745fc15e1b80035de7c59f8c700d7e9976", + "is_verified": false, + "line_number": 1624, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 2342, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "377e839f86c1529c656c82599fb225b4a1261ed5", + "is_verified": false, + "line_number": 586, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "ab06ef2a8cc8a90a8526e3511be8f376c7cb0387", + "is_verified": false, + "line_number": 764, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "3de7722ca43ab9676c384eb479950083fb2385bb", + "is_verified": false, + "line_number": 1357, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 2678, + "is_secret": false + } + ], "src/backend/base/langflow/inputs/input_mixin.py": [ { "type": "Secret Keyword", @@ -781,7 +897,7 @@ "filename": "src/backend/base/langflow/services/auth/utils.py", "hashed_secret": "b894b81be94cf8fa8d7536475aaec876addf05c8", "is_verified": false, - "line_number": 28, + "line_number": 31, "is_secret": false } ], @@ -1412,5 +1528,5 @@ } ] }, - "generated_at": "2025-11-19T18:36:04Z" + "generated_at": "2025-12-02T04:40:43Z" } diff --git a/pyproject.toml b/pyproject.toml index c9c21f5e3dfe..f18a2b5a9a31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,8 +135,8 @@ dependencies = [ "fastparquet>=2024.11.0,<2025.0.0", "traceloop-sdk>=0.43.1,<1.0.0", "vlmrun[all]>=0.2.0", - "cuga==0.1.10", - "agent-lifecycle-toolkit~=0.4.1", + "cuga~=0.2.5", + "agent-lifecycle-toolkit~=0.4.4", "astrapy>=2.1.0,<3.0.0", "aioboto3>=15.2.0,<16.0.0" ] diff --git a/src/backend/base/langflow/__main__.py b/src/backend/base/langflow/__main__.py index b3c1d7434053..8e6e7986e7a6 100644 --- a/src/backend/base/langflow/__main__.py +++ b/src/backend/base/langflow/__main__.py @@ -159,7 +159,6 @@ def set_var_for_macos_issue() -> None: os.environ["OBJC_DISABLE_INITIALIZE_FORK_SAFETY"] = "YES" # https://stackoverflow.com/questions/75747888/uwsgi-segmentation-fault-with-flask-python-app-behind-nginx-after-running-for-2 # noqa: E501 os.environ["no_proxy"] = "*" # to avoid error with gunicorn - logger.debug("Set OBJC_DISABLE_INITIALIZE_FORK_SAFETY to YES to avoid error") def wait_for_server_ready(host, port, protocol) -> None: diff --git a/src/backend/base/langflow/api/log_router.py b/src/backend/base/langflow/api/log_router.py index 67492f3670df..ae2673c29fa8 100644 --- a/src/backend/base/langflow/api/log_router.py +++ b/src/backend/base/langflow/api/log_router.py @@ -3,10 +3,12 @@ from http import HTTPStatus from typing import Annotated, Any -from fastapi import APIRouter, HTTPException, Query, Request +from fastapi import APIRouter, Depends, HTTPException, Query, Request from fastapi.responses import JSONResponse, StreamingResponse from lfx.log.logger import log_buffer +from langflow.services.auth.utils import get_current_active_user + log_router = APIRouter(tags=["Log"]) @@ -50,12 +52,13 @@ async def event_generator(request: Request): await asyncio.sleep(1) -@log_router.get("/logs-stream") +@log_router.get("/logs-stream", dependencies=[Depends(get_current_active_user)]) async def stream_logs( request: Request, ): """HTTP/2 Server-Sent-Event (SSE) endpoint for streaming logs. + Requires authentication to prevent exposure of sensitive log data. It establishes a long-lived connection to the server and receives log messages in real-time. The client should use the header "Accept: text/event-stream". """ @@ -69,12 +72,16 @@ async def stream_logs( return StreamingResponse(event_generator(request), media_type="text/event-stream") -@log_router.get("/logs") +@log_router.get("/logs", dependencies=[Depends(get_current_active_user)]) async def logs( lines_before: Annotated[int, Query(description="The number of logs before the timestamp or the last log")] = 0, lines_after: Annotated[int, Query(description="The number of logs after the timestamp")] = 0, timestamp: Annotated[int, Query(description="The timestamp to start getting logs from")] = 0, ): + """Retrieve application logs with authentication required. + + SECURITY: Logs may contain sensitive information and require authentication. + """ global log_buffer # noqa: PLW0602 if log_buffer.enabled() is False: raise HTTPException( diff --git a/src/backend/base/langflow/api/utils/core.py b/src/backend/base/langflow/api/utils/core.py index 9c02ff4e22b2..1dc301f18b4d 100644 --- a/src/backend/base/langflow/api/utils/core.py +++ b/src/backend/base/langflow/api/utils/core.py @@ -57,7 +57,7 @@ def remove_api_keys(flow: dict): node_data = node.get("data").get("node") template = node_data.get("template") for value in template.values(): - if isinstance(value, dict) and has_api_terms(value["name"]) and value.get("password"): + if isinstance(value, dict) and "name" in value and has_api_terms(value["name"]) and value.get("password"): value["value"] = None return flow diff --git a/src/backend/base/langflow/api/utils/mcp/__init__.py b/src/backend/base/langflow/api/utils/mcp/__init__.py index 4105d92102df..3fe4baaa6af9 100644 --- a/src/backend/base/langflow/api/utils/mcp/__init__.py +++ b/src/backend/base/langflow/api/utils/mcp/__init__.py @@ -1,9 +1,17 @@ """MCP utilities for Langflow.""" -from langflow.api.utils.mcp.config_utils import auto_configure_starter_projects_mcp, get_project_sse_url, get_url_by_os +from langflow.api.utils.mcp.config_utils import ( + auto_configure_starter_projects_mcp, + get_composer_streamable_http_url, + get_project_sse_url, + get_project_streamable_http_url, + get_url_by_os, +) __all__ = [ "auto_configure_starter_projects_mcp", + "get_composer_streamable_http_url", "get_project_sse_url", + "get_project_streamable_http_url", "get_url_by_os", ] diff --git a/src/backend/base/langflow/api/utils/mcp/config_utils.py b/src/backend/base/langflow/api/utils/mcp/config_utils.py index 58fb8e1a62e9..03a2ac7dd0c1 100644 --- a/src/backend/base/langflow/api/utils/mcp/config_utils.py +++ b/src/backend/base/langflow/api/utils/mcp/config_utils.py @@ -12,7 +12,7 @@ from sqlmodel import select from langflow.api.v2.mcp import get_server_list, update_server -from langflow.services.auth.mcp_encryption import encrypt_auth_settings +from langflow.services.auth.mcp_encryption import decrypt_auth_settings, encrypt_auth_settings from langflow.services.database.models import Flow, Folder from langflow.services.database.models.api_key.crud import create_api_key from langflow.services.database.models.api_key.model import ApiKeyCreate @@ -204,8 +204,8 @@ async def get_url_by_os(host: str, port: int, url: str) -> str: return url -async def get_project_sse_url(project_id: UUID) -> str: - """Generate the SSE URL for a project, including WSL handling.""" +async def _get_project_base_url_components() -> tuple[str, int]: + """Return normalized host and port for building MCP URLs.""" # Get settings service to build the SSE URL settings_service = get_settings_service() server_host = getattr(settings_service.settings, "host", "localhost") @@ -219,14 +219,52 @@ async def get_project_sse_url(project_id: UUID) -> str: # For MCP clients, always use localhost instead of 0.0.0.0 # 0.0.0.0 is a bind address, not a connect address host = "localhost" if server_host == ALL_INTERFACES_HOST else server_host - port = server_port + return host, server_port + +async def get_project_streamable_http_url(project_id: UUID) -> str: + """Generate the Streamable HTTP endpoint for a project (no /sse suffix).""" + host, port = await _get_project_base_url_components() base_url = f"http://{host}:{port}".rstrip("/") - project_sse_url = f"{base_url}/api/v1/mcp/project/{project_id}/sse" + project_url = f"{base_url}/api/v1/mcp/project/{project_id}/streamable" + return await get_url_by_os(host, port, project_url) + +async def get_project_sse_url(project_id: UUID) -> str: + """Generate the legacy SSE URL for a project, including WSL handling.""" + host, port = await _get_project_base_url_components() + base_url = f"http://{host}:{port}".rstrip("/") + project_sse_url = f"{base_url}/api/v1/mcp/project/{project_id}/sse" return await get_url_by_os(host, port, project_sse_url) +async def _get_mcp_composer_auth_config(project: Folder) -> dict: + """Decrypt and return MCP Composer auth configuration for a project.""" + auth_config = None + if project.auth_settings: + decrypted_settings = decrypt_auth_settings(project.auth_settings) + if decrypted_settings: + auth_config = decrypted_settings + + if not auth_config: + error_message = "Auth config is missing. Please check your settings and try again." + raise ValueError(error_message) + + return auth_config + + +async def get_composer_streamable_http_url(project: Folder) -> str: + """Generate Streamable HTTP URL for the MCP Composer instance.""" + auth_config = await _get_mcp_composer_auth_config(project) + composer_host = auth_config.get("oauth_host") + composer_port = auth_config.get("oauth_port") + if not composer_host or not composer_port: + error_msg = "OAuth host and port are required to get the MCP Composer URL" + raise ValueError(error_msg) + composer_url = f"http://{composer_host}:{composer_port}" + return await get_url_by_os(composer_host, int(composer_port), composer_url) # type: ignore[arg-type] + + async def auto_configure_starter_projects_mcp(session): """Auto-configure MCP servers for starter projects for all users at startup.""" # Check if auto-configure is enabled @@ -333,7 +371,18 @@ async def auto_configure_starter_projects_mcp(session): default_auth = {"auth_type": "none"} await logger.adebug(f"Settings service auth settings: {settings_service.auth_settings}") await logger.adebug(f"User starter folder auth settings: {user_starter_folder.auth_settings}") - if not settings_service.auth_settings.AUTO_LOGIN and not user_starter_folder.auth_settings: + if ( + not user_starter_folder.auth_settings + and settings_service.auth_settings.AUTO_LOGIN + and not settings_service.auth_settings.SUPERUSER + ): + default_auth = {"auth_type": "apikey"} + user_starter_folder.auth_settings = encrypt_auth_settings(default_auth) + await logger.adebug( + "AUTO_LOGIN enabled without SUPERUSER; forcing API key auth for starter folder %s", + user.username, + ) + elif not settings_service.auth_settings.AUTO_LOGIN and not user_starter_folder.auth_settings: default_auth = {"auth_type": "apikey"} user_starter_folder.auth_settings = encrypt_auth_settings(default_auth) await logger.adebug(f"Set up auth settings for user {user.username}'s starter folder") @@ -344,18 +393,20 @@ async def auto_configure_starter_projects_mcp(session): api_key_name = f"MCP Project {DEFAULT_FOLDER_NAME} - {user.username}" unmasked_api_key = await create_api_key(session, ApiKeyCreate(name=api_key_name), user.id) - # Build SSE URL for THIS USER'S starter folder (unique ID per user) - sse_url = await get_project_sse_url(user_starter_folder.id) + # Build connection URLs for THIS USER'S starter folder (unique ID per user) + streamable_http_url = await get_project_streamable_http_url(user_starter_folder.id) # Prepare server config (similar to new project creation) if default_auth.get("auth_type", "none") == "apikey": command = "uvx" args = [ "mcp-proxy", + "--transport", + "streamablehttp", "--headers", "x-api-key", unmasked_api_key.api_key, - sse_url, + streamable_http_url, ] elif default_auth.get("auth_type", "none") == "oauth": msg = "OAuth authentication is not yet implemented for MCP server creation during project creation." @@ -366,7 +417,9 @@ async def auto_configure_starter_projects_mcp(session): command = "uvx" args = [ "mcp-proxy", - sse_url, + "--transport", + "streamablehttp", + streamable_http_url, ] server_config = {"command": command, "args": args} diff --git a/src/backend/base/langflow/api/v1/chat.py b/src/backend/base/langflow/api/v1/chat.py index d32b6db6ae1b..231f0985822a 100644 --- a/src/backend/base/langflow/api/v1/chat.py +++ b/src/backend/base/langflow/api/v1/chat.py @@ -37,6 +37,7 @@ VerticesOrderResponse, ) from langflow.exceptions.component import ComponentBuildError +from langflow.services.auth.utils import get_current_active_user from langflow.services.chat.service import ChatService from langflow.services.database.models.flow.model import Flow from langflow.services.deps import ( @@ -54,7 +55,7 @@ router = APIRouter(tags=["Chat"]) -@router.post("/build/{flow_id}/vertices", deprecated=True) +@router.post("/build/{flow_id}/vertices", deprecated=True, dependencies=[Depends(get_current_active_user)]) async def retrieve_vertices_order( *, flow_id: uuid.UUID, @@ -197,14 +198,17 @@ async def build_flow( ) -@router.get("/build/{job_id}/events") +@router.get("/build/{job_id}/events", dependencies=[Depends(get_current_active_user)]) async def get_build_events( job_id: str, queue_service: Annotated[JobQueueService, Depends(get_queue_service)], *, event_delivery: EventDeliveryType = EventDeliveryType.STREAMING, ): - """Get events for a specific build job.""" + """Get events for a specific build job. + + Requires authentication to prevent unauthorized access to build events. + """ return await get_flow_events_response( job_id=job_id, queue_service=queue_service, @@ -212,12 +216,19 @@ async def get_build_events( ) -@router.post("/build/{job_id}/cancel", response_model=CancelFlowResponse) +@router.post( + "/build/{job_id}/cancel", + response_model=CancelFlowResponse, + dependencies=[Depends(get_current_active_user)], +) async def cancel_build( job_id: str, queue_service: Annotated[JobQueueService, Depends(get_queue_service)], ): - """Cancel a specific build job.""" + """Cancel a specific build job. + + Requires authentication to prevent unauthorized build cancellation. + """ try: # Cancel the flow build and check if it was successful cancellation_success = await cancel_flow_build(job_id=job_id, queue_service=queue_service) @@ -509,6 +520,7 @@ async def _stream_vertex(flow_id: str, vertex_id: str, chat_service: ChatService "/build/{flow_id}/{vertex_id}/stream", response_class=StreamingResponse, deprecated=True, + dependencies=[Depends(get_current_active_user)], ) async def build_vertex_stream( flow_id: uuid.UUID, diff --git a/src/backend/base/langflow/api/v1/endpoints.py b/src/backend/base/langflow/api/v1/endpoints.py index c780b0e79e0c..845222674b6a 100644 --- a/src/backend/base/langflow/api/v1/endpoints.py +++ b/src/backend/base/langflow/api/v1/endpoints.py @@ -588,7 +588,15 @@ async def simplified_run_flow_session( - "token": Individual tokens during streaming - "end": Final execution result - Authentication: Requires active session (cookies) + - Feature Flag: Only available when agentic_experience is enabled """ + # Feature flag: Only allow access if agentic_experience is enabled + if not get_settings_service().settings.agentic_experience: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="This endpoint is not available", + ) + return await _run_flow_internal( background_tasks=background_tasks, flow=flow, @@ -953,10 +961,12 @@ async def custom_component_update( raise SerializationError.from_exception(exc, data=component_node) from exc -@router.get("/config") +@router.get("/config", dependencies=[Depends(get_current_active_user)]) async def get_config() -> ConfigResponse: """Retrieve the current application configuration settings. + Requires authentication to prevent exposure of sensitive configuration details. + Returns: ConfigResponse: The configuration settings of the application. diff --git a/src/backend/base/langflow/api/v1/files.py b/src/backend/base/langflow/api/v1/files.py index c6d2e03785d7..0a9de1cf3399 100644 --- a/src/backend/base/langflow/api/v1/files.py +++ b/src/backend/base/langflow/api/v1/files.py @@ -21,6 +21,41 @@ router = APIRouter(tags=["Files"], prefix="/files") +def _get_allowed_profile_picture_folders(settings_service: SettingsService) -> set[str]: + """Return the set of allowed profile picture folders. + + This enumerates subdirectories under the profile_pictures directory in both + the user's config_dir and the package's bundled assets. This makes the API + flexible (users may add new folders under config_dir/profile_pictures) while + still safe because we only ever serve files contained within the resolved + base directory and validate path containment below. + + If no directories can be found (unexpected), fall back to the curated + defaults {"People", "Space"} shipped with Langflow. + """ + allowed: set[str] = set() + try: + # Config-provided folders + config_dir = Path(settings_service.settings.config_dir) + cfg_base = config_dir / "profile_pictures" + if cfg_base.exists(): + allowed.update({p.name for p in cfg_base.iterdir() if p.is_dir()}) + # Package-provided folders + from langflow.initial_setup import setup + + pkg_base = Path(setup.__file__).parent / "profile_pictures" + if pkg_base.exists(): + allowed.update({p.name for p in pkg_base.iterdir() if p.is_dir()}) + except Exception as _: + import logging + + logger = logging.getLogger(__name__) + logger.exception("Exception occurred while getting allowed profile picture folders") + + # Sensible defaults ensure tests and OOTB behavior + return allowed or {"People", "Space"} + + # Create dep that gets the flow_id from the request # then finds it in the database and returns it while # using the current user as the owner @@ -31,10 +66,9 @@ async def get_flow( ): # AttributeError: 'SelectOfScalar' object has no attribute 'first' flow = await session.get(Flow, flow_id) - if not flow: + # Return 404 for both non-existent flows and unauthorized access to prevent information disclosure + if not flow or flow.user_id != current_user.id: raise HTTPException(status_code=404, detail="Flow not found") - if flow.user_id != current_user.id: - raise HTTPException(status_code=403, detail="You don't have access to this flow") return flow @@ -43,7 +77,6 @@ async def upload_file( *, file: UploadFile, flow: Annotated[Flow, Depends(get_flow)], - current_user: CurrentActiveUser, storage_service: Annotated[StorageService, Depends(get_storage_service)], settings_service: Annotated[SettingsService, Depends(get_settings_service)], ) -> UploadFileResponse: @@ -57,9 +90,7 @@ async def upload_file( status_code=413, detail=f"File size is larger than the maximum file size {max_file_size_upload}MB." ) - if flow.user_id != current_user.id: - raise HTTPException(status_code=403, detail="You don't have access to this flow") - + # Authorization handled by get_flow dependency try: file_content = await file.read() timestamp = datetime.now(tz=timezone.utc).astimezone().strftime("%Y-%m-%d_%H-%M-%S") @@ -74,9 +105,12 @@ async def upload_file( @router.get("/download/{flow_id}/{file_name}") async def download_file( - file_name: str, flow_id: UUID, storage_service: Annotated[StorageService, Depends(get_storage_service)] + file_name: str, + flow: Annotated[Flow, Depends(get_flow)], + storage_service: Annotated[StorageService, Depends(get_storage_service)], ): - flow_id_str = str(flow_id) + # Authorization handled by get_flow dependency + flow_id_str = str(flow.id) extension = file_name.split(".")[-1] if not extension: @@ -102,10 +136,14 @@ async def download_file( @router.get("/images/{flow_id}/{file_name}") -async def download_image(file_name: str, flow_id: UUID): +async def download_image( + file_name: str, + flow: Annotated[Flow, Depends(get_flow)], +): + # Authorization handled by get_flow dependency storage_service = get_storage_service() extension = file_name.split(".")[-1] - flow_id_str = str(flow_id) + flow_id_str = str(flow.id) if not extension: raise HTTPException(status_code=500, detail=f"Extension not found for file {file_name}") @@ -134,17 +172,57 @@ async def download_profile_picture( ): """Download profile picture from local filesystem. - Profile pictures are always stored locally in config_dir/profile_pictures/, - regardless of the storage_type setting (local, s3, etc.). + Profile pictures are first looked up in config_dir/profile_pictures/, + then fallback to the package's bundled profile_pictures directory. """ try: + # SECURITY: Validate inputs to prevent path traversal attacks + # Reject any path components that contain directory traversal sequences + if ".." in folder_name or ".." in file_name: + raise HTTPException( + status_code=400, detail="Path traversal patterns ('..') are not allowed in folder or file names" + ) + + # Only allow specific folder names (dynamic from config + package) + allowed_folders = _get_allowed_profile_picture_folders(settings_service) + if folder_name not in allowed_folders: + raise HTTPException(status_code=400, detail=f"Folder must be one of: {', '.join(sorted(allowed_folders))}") + + # Validate file name contains no path separators + if "/" in file_name or "\\" in file_name: + raise HTTPException(status_code=400, detail="File name cannot contain path separators ('/' or '\\')") + extension = file_name.split(".")[-1] config_dir = settings_service.settings.config_dir - config_path = Path(config_dir) # type: ignore[arg-type] - file_path = config_path / "profile_pictures" / folder_name / file_name + config_path = Path(config_dir).resolve() # type: ignore[arg-type] + + # Construct the file path + file_path = (config_path / "profile_pictures" / folder_name / file_name).resolve() + + # SECURITY: Verify the resolved path is still within the allowed directory + # This prevents path traversal even if symbolic links are involved + allowed_base = (config_path / "profile_pictures").resolve() + if not str(file_path).startswith(str(allowed_base)): + # Return 404 to prevent path traversal attempts from revealing system structure + raise HTTPException(status_code=404, detail="Profile picture not found") + # Fallback to package bundled profile pictures if not found in config_dir if not file_path.exists(): - raise HTTPException(status_code=404, detail=f"Profile picture {folder_name}/{file_name} not found") + from langflow.initial_setup import setup + + package_base = Path(setup.__file__).parent / "profile_pictures" + package_path = (package_base / folder_name / file_name).resolve() + + # SECURITY: Verify package path is also within allowed directory + allowed_package_base = package_base.resolve() + if not str(package_path).startswith(str(allowed_package_base)): + # Return 404 to prevent path traversal attempts from revealing system structure + raise HTTPException(status_code=404, detail="Profile picture not found") + + if package_path.exists(): + file_path = package_path + else: + raise HTTPException(status_code=404, detail=f"Profile picture {folder_name}/{file_name} not found") content_type = build_content_type_from_extension(extension) # Read file directly from local filesystem using async file operations @@ -163,26 +241,37 @@ async def list_profile_pictures( ): """List profile pictures from local filesystem. - Profile pictures are always stored locally in config_dir/profile_pictures/, - regardless of the storage_type setting (local, s3, etc.). + Profile pictures are first looked up in config_dir/profile_pictures/, + then fallback to the package's bundled profile_pictures directory. """ try: config_dir = settings_service.settings.config_dir config_path = Path(config_dir) # type: ignore[arg-type] - people_path = config_path / "profile_pictures" / "People" - space_path = config_path / "profile_pictures" / "Space" - - # List files directly from local filesystem - bundled with the container - people = [f.name for f in people_path.iterdir() if f.is_file()] if people_path.exists() else [] - space = [f.name for f in space_path.iterdir() if f.is_file()] if space_path.exists() else [] + # Build list for all allowed folders (dynamic) + allowed_folders = _get_allowed_profile_picture_folders(settings_service) + + results: list[str] = [] + cfg_base = config_path / "profile_pictures" + if cfg_base.exists(): + for folder in sorted(allowed_folders): + p = cfg_base / folder + if p.exists(): + results += [f"{folder}/{f.name}" for f in p.iterdir() if f.is_file()] + + # Fallback to package if config_dir produced no results + if not results: + from langflow.initial_setup import setup + + package_base = Path(setup.__file__).parent / "profile_pictures" + for folder in sorted(allowed_folders): + p = package_base / folder + if p.exists(): + results += [f"{folder}/{f.name}" for f in p.iterdir() if f.is_file()] except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e - files = [f"People/{i}" for i in people] - files += [f"Space/{i}" for i in space] - - return {"files": files} + return {"files": results} @router.get("/list/{flow_id}") diff --git a/src/backend/base/langflow/api/v1/mcp.py b/src/backend/base/langflow/api/v1/mcp.py index ed9587833fd5..f043e36f3549 100644 --- a/src/backend/base/langflow/api/v1/mcp.py +++ b/src/backend/base/langflow/api/v1/mcp.py @@ -1,13 +1,15 @@ import asyncio +from contextlib import AsyncExitStack import pydantic from anyio import BrokenResourceError from fastapi import APIRouter, HTTPException, Request, Response -from fastapi.responses import HTMLResponse, StreamingResponse +from fastapi.responses import HTMLResponse from lfx.log.logger import logger from mcp import types from mcp.server import NotificationOptions, Server from mcp.server.sse import SseServerTransport +from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from langflow.api.utils import CurrentActiveMCPUser from langflow.api.v1.mcp_utils import ( @@ -18,21 +20,12 @@ handle_mcp_errors, handle_read_resource, ) -from langflow.services.deps import get_settings_service router = APIRouter(prefix="/mcp", tags=["mcp"]) server = Server("langflow-mcp-server") -# Define constants -MAX_RETRIES = 2 - - -def get_enable_progress_notifications() -> bool: - return get_settings_service().settings.mcp_server_enable_progress_notifications - - @server.list_prompts() async def handle_list_prompts(): return [] @@ -63,7 +56,19 @@ async def handle_global_call_tool(name: str, arguments: dict) -> list[types.Text return await handle_call_tool(name, arguments, server) -sse = SseServerTransport("/api/v1/mcp/") +######################################################## +# The transports handle the full ASGI response. +# FastAPI still expects the endpoint to return +# a Response, while Starlette's middleware +# stream validation panics when +# a http.response.start message +# is encountered twice within the same stream. +# This class nullifies the redundant +# response to end streams gracefully. +######################################################## +class ResponseNoOp(Response): + async def __call__(self, scope, receive, send) -> None: # noqa: ARG002 + return def find_validation_error(exc): @@ -75,12 +80,18 @@ def find_validation_error(exc): return None +################################################################################ +# SSE Transport +################################################################################ +sse = SseServerTransport("/api/v1/mcp/") + + @router.head("/sse", response_class=HTMLResponse, include_in_schema=False) async def im_alive(): return Response() -@router.get("/sse", response_class=StreamingResponse) +@router.get("/sse", response_class=ResponseNoOp) async def handle_sse(request: Request, current_user: CurrentActiveMCPUser): msg = f"Starting SSE connection, server name: {server.name}" await logger.ainfo(msg) @@ -135,3 +146,115 @@ async def handle_messages(request: Request): except Exception as e: await logger.aerror(f"Internal server error: {e}") raise HTTPException(status_code=500, detail=f"Internal server error: {e}") from e + + +################################################################################ +# Streamable HTTP Transport +################################################################################ +class StreamableHTTP: + def __init__(self): + self.session_manager: StreamableHTTPSessionManager | None = None + self._context_stack: AsyncExitStack | None = None + self._started = False + self._start_stop_lock = asyncio.Lock() + + async def start(self, *, stateless: bool = True) -> None: + """Create and enter the Streamable HTTP session manager lifecycle.""" + async with self._start_stop_lock: + if self._started: + await logger.adebug("Streamable HTTP session manager already running; skipping start") + return + + manager = StreamableHTTPSessionManager(server, stateless=stateless) + stack = AsyncExitStack() + try: + await stack.enter_async_context(manager.run()) + except Exception: + await stack.aclose() + raise + + self.session_manager = manager + self._context_stack = stack + self._started = True + await logger.adebug("Streamable HTTP session manager started") + + def get_manager(self) -> StreamableHTTPSessionManager: + """Fetch the active Streamable HTTP session manager or raise if it is unavailable.""" + if not self._started or self.session_manager is None: + raise HTTPException(status_code=503, detail="MCP Streamable HTTP transport is not initialized") + return self.session_manager + + async def stop(self) -> None: + """Close the Streamable HTTP session manager context.""" + async with self._start_stop_lock: + if not self._started: + return + + try: + if self._context_stack is not None: + await self._context_stack.aclose() + finally: + self._context_stack = None + self.session_manager = None + self._started = False + await logger.adebug("Streamable HTTP session manager stopped") + + +_streamable_http = StreamableHTTP() + + +async def start_streamable_http_manager(stateless: bool = True) -> None: # noqa: FBT001, FBT002 + await _streamable_http.start(stateless=stateless) + + +def get_streamable_http_manager() -> StreamableHTTPSessionManager: + return _streamable_http.get_manager() + + +async def stop_streamable_http_manager() -> None: + await _streamable_http.stop() + + +streamable_http_route_config = { # use for all streamable http routes (except for the health check) + "methods": ["GET", "POST", "DELETE"], + "response_class": ResponseNoOp, +} + + +@router.head("/streamable", include_in_schema=False) +async def streamable_health(): + return Response() + + +@router.api_route("/streamable", **streamable_http_route_config) +@router.api_route("/streamable/", **streamable_http_route_config) +async def handle_streamable_http(request: Request, current_user: CurrentActiveMCPUser): + """Streamable HTTP endpoint for MCP clients that support the new transport.""" + return await _dispatch_streamable_http(request, current_user) + + +async def _dispatch_streamable_http( + request: Request, + current_user: CurrentActiveMCPUser, +) -> Response: + """Common handler for Streamable HTTP requests with user context propagation.""" + await logger.adebug( + "Handling %s %s via Streamable HTTP for user %s", + request.method, + request.url.path, + current_user.id, + ) + + context_token = current_user_ctx.set(current_user) + try: + manager = get_streamable_http_manager() + await manager.handle_request(request.scope, request.receive, request._send) # noqa: SLF001 + except HTTPException: + raise + except Exception as exc: + await logger.aexception(f"Error handling Streamable HTTP request: {exc!s}") + raise HTTPException(status_code=500, detail="Internal server error in Streamable HTTP transport") from exc + finally: + current_user_ctx.reset(context_token) + + return ResponseNoOp() diff --git a/src/backend/base/langflow/api/v1/mcp_projects.py b/src/backend/base/langflow/api/v1/mcp_projects.py index bb3c156e3ef5..38bada8c129c 100644 --- a/src/backend/base/langflow/api/v1/mcp_projects.py +++ b/src/backend/base/langflow/api/v1/mcp_projects.py @@ -3,6 +3,7 @@ import os import platform from asyncio.subprocess import create_subprocess_exec +from collections.abc import Awaitable, Callable, Sequence from contextvars import ContextVar from datetime import datetime, timezone from ipaddress import ip_address @@ -11,9 +12,11 @@ from typing import Annotated, Any, cast from uuid import UUID +import anyio from anyio import BrokenResourceError +from anyio.abc import TaskGroup, TaskStatus from fastapi import APIRouter, Depends, HTTPException, Request, Response, status -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse, JSONResponse from lfx.base.mcp.constants import MAX_MCP_SERVER_NAME_LENGTH from lfx.base.mcp.util import sanitize_mcp_name from lfx.log import logger @@ -23,13 +26,21 @@ from mcp import types from mcp.server import NotificationOptions, Server from mcp.server.sse import SseServerTransport +from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from sqlalchemy.orm import selectinload from sqlmodel import select from sqlmodel.ext.asyncio.session import AsyncSession from langflow.api.utils import CurrentActiveMCPUser, extract_global_variables_from_headers -from langflow.api.utils.mcp import auto_configure_starter_projects_mcp, get_project_sse_url, get_url_by_os +from langflow.api.utils.mcp import ( + auto_configure_starter_projects_mcp, + get_composer_streamable_http_url, + get_project_sse_url, + get_project_streamable_http_url, + get_url_by_os, +) from langflow.api.v1.auth_helpers import handle_auth_settings_update +from langflow.api.v1.mcp import ResponseNoOp from langflow.api.v1.mcp_utils import ( current_request_variables_ctx, current_user_ctx, @@ -41,6 +52,7 @@ ) from langflow.api.v1.schemas import ( AuthSettings, + ComposerUrlResponse, MCPInstallRequest, MCPProjectResponse, MCPProjectUpdateRequest, @@ -180,8 +192,8 @@ async def verify_project_auth_conditional( # Create project-specific context variable current_project_ctx: ContextVar[UUID | None] = ContextVar("current_project_ctx", default=None) -# Create a mapping of project-specific SSE transports -project_sse_transports = {} +# Mapping of project-specific SSE transports +project_sse_transports: dict[str, SseServerTransport] = {} def get_project_sse(project_id: UUID | None) -> SseServerTransport: @@ -195,14 +207,13 @@ def get_project_sse(project_id: UUID | None) -> SseServerTransport: return project_sse_transports[project_id_str] -@router.get("/{project_id}") -async def list_project_tools( +async def _build_project_tools_response( project_id: UUID, current_user: CurrentActiveMCPUser, *, - mcp_enabled: bool = True, -) -> MCPProjectResponse | None: - """List all tools in a project that are enabled for MCP.""" + mcp_enabled: bool, +) -> MCPProjectResponse: + """Return tool metadata for a project.""" tools: list[MCPSettings] = [] try: async with session_scope() as session: @@ -277,6 +288,23 @@ async def list_project_tools( return MCPProjectResponse(tools=tools, auth_settings=auth_settings) +@router.get("/{project_id}") +async def list_project_tools( + project_id: UUID, + current_user: CurrentActiveMCPUser, + *, + mcp_enabled: bool = True, +) -> Response: + """List project MCP tools.""" + metadata = await _build_project_tools_response(project_id, current_user, mcp_enabled=mcp_enabled) + return JSONResponse(content=metadata.model_dump(mode="json")) + + +######################################################## +# legacy SSE transport routes +######################################################## + + @router.head("/{project_id}/sse", response_class=HTMLResponse, include_in_schema=False) async def im_alive(project_id: str): # noqa: ARG001 return Response() @@ -289,8 +317,6 @@ async def handle_project_sse( current_user: Annotated[User, Depends(verify_project_auth_conditional)], ): """Handle SSE connections for a specific project.""" - # Verify project exists and user has access - async with session_scope() as session: project = ( await session.exec(select(Folder).where(Folder.id == project_id, Folder.user_id == current_user.id)) @@ -299,15 +325,12 @@ async def handle_project_sse( if not project: raise HTTPException(status_code=404, detail="Project not found") - # Get project-specific SSE transport and MCP server sse = get_project_sse(project_id) project_server = get_project_mcp_server(project_id) await logger.adebug("Project MCP server name: %s", project_server.server.name) - # Set context variables user_token = current_user_ctx.set(current_user) project_token = current_project_ctx.set(project_id) - # Extract request-level variables from headers with prefix X-LANGFLOW-GLOBAL-VAR-* variables = extract_global_variables_from_headers(request.headers) req_vars_token = current_request_variables_ctx.set(variables or None) @@ -338,20 +361,17 @@ async def handle_project_sse( current_project_ctx.reset(project_token) current_request_variables_ctx.reset(req_vars_token) - return Response(status_code=200) + return ResponseNoOp(status_code=200) -@router.post("/{project_id}") -async def handle_project_messages( +async def _handle_project_sse_messages( project_id: UUID, request: Request, - current_user: Annotated[User, Depends(verify_project_auth_conditional)], + current_user: User, ): - """Handle POST messages for a project-specific MCP server.""" - # Set context variables + """Handle POST messages for a project-specific MCP server using SSE transport.""" user_token = current_user_ctx.set(current_user) project_token = current_project_ctx.set(project_id) - # Extract request-level variables from headers with prefix X-LANGFLOW-GLOBAL-VAR-* variables = extract_global_variables_from_headers(request.headers) req_vars_token = current_request_variables_ctx.set(variables or None) @@ -367,15 +387,73 @@ async def handle_project_messages( current_request_variables_ctx.reset(req_vars_token) +@router.post("/{project_id}") @router.post("/{project_id}/") -async def handle_project_messages_with_slash( +async def handle_project_messages( project_id: UUID, request: Request, current_user: Annotated[User, Depends(verify_project_auth_conditional)], ): - """Handle POST messages for a project-specific MCP server with trailing slash.""" - # Call the original handler - return await handle_project_messages(project_id, request, current_user) + """Handle POST messages for a project-specific MCP server.""" + return await _handle_project_sse_messages(project_id, request, current_user) + + +######################################################## +# Streamable HTTP transport routes +######################################################## + + +@router.head("/{project_id}/streamable", include_in_schema=False) +async def streamable_health(project_id: UUID): # noqa: ARG001 + return Response() + + +async def _dispatch_project_streamable_http( + project_id: UUID, + request: Request, + current_user: User, +) -> Response: + """Common handler for project-specific Streamable HTTP requests.""" + # Lazily initialize the project's Streamable HTTP manager + # to pick up new projects as they are created. + project_server = get_project_mcp_server(project_id) + await project_server.ensure_session_manager_running() + + user_token = current_user_ctx.set(current_user) + project_token = current_project_ctx.set(project_id) + variables = extract_global_variables_from_headers(request.headers) + request_vars_token = current_request_variables_ctx.set(variables or None) + + try: + await project_server.session_manager.handle_request(request.scope, request.receive, request._send) # noqa: SLF001 + except HTTPException: + raise + except Exception as exc: + await logger.aexception(f"Error handling Streamable HTTP request for project {project_id}: {exc!s}") + raise HTTPException(status_code=500, detail="Internal server error in project MCP transport") from exc + finally: + current_request_variables_ctx.reset(request_vars_token) + current_project_ctx.reset(project_token) + current_user_ctx.reset(user_token) + + return ResponseNoOp(status_code=200) + + +streamable_http_route_config = { + "methods": ["GET", "POST", "DELETE"], + "response_class": ResponseNoOp, +} + + +@router.api_route("/{project_id}/streamable", **streamable_http_route_config) +@router.api_route("/{project_id}/streamable/", **streamable_http_route_config) +async def handle_project_streamable_http( + project_id: UUID, + request: Request, + current_user: Annotated[User, Depends(verify_project_auth_conditional)], +): + """Handle Streamable HTTP connections for a specific project.""" + return await _dispatch_project_streamable_http(project_id, request, current_user) @router.patch("/{project_id}", status_code=200) @@ -463,11 +541,14 @@ async def update_project_mcp_settings( try: auth_config = await _get_mcp_composer_auth_config(project) await get_or_start_mcp_composer(auth_config, project.name, project_id) + composer_streamable_http_url = await get_composer_streamable_http_url(project) composer_sse_url = await get_composer_sse_url(project) # Clear any previous error on success mcp_composer_service.clear_last_error(str(project_id)) response["result"] = { "project_id": str(project_id), + "streamable_http_url": composer_streamable_http_url, + "legacy_sse_url": composer_sse_url, "sse_url": composer_sse_url, "uses_composer": True, } @@ -509,14 +590,17 @@ async def update_project_mcp_settings( # Clear any error when user explicitly disables OAuth mcp_composer_service.clear_last_error(str(project_id)) - # Provide the direct SSE URL since we're no longer using composer - sse_url = await get_project_sse_url(project_id) - if not sse_url: - raise HTTPException(status_code=500, detail="Failed to get direct SSE URL") + # Provide direct connection URLs since we're no longer using composer + streamable_http_url = await get_project_streamable_http_url(project_id) + legacy_sse_url = await get_project_sse_url(project_id) + if not streamable_http_url: + raise HTTPException(status_code=500, detail="Failed to get direct Streamable HTTP URL") response["result"] = { "project_id": str(project_id), - "sse_url": sse_url, + "streamable_http_url": streamable_http_url, + "legacy_sse_url": legacy_sse_url, + "sse_url": legacy_sse_url, "uses_composer": False, } @@ -622,6 +706,9 @@ async def install_mcp_config( # Get settings service to build the SSE URL settings_service = get_settings_service() + if settings_service.auth_settings.AUTO_LOGIN and not settings_service.auth_settings.SUPERUSER: + # Without a superuser fallback, require API key auth for MCP installs. + should_generate_api_key = True settings = settings_service.settings host = settings.host or None port = settings.port or None @@ -632,12 +719,18 @@ async def install_mcp_config( os_type = platform.system() use_mcp_composer = should_use_mcp_composer(project) + connection_urls: list[str] + transport_mode = (body.transport or "sse").lower() + if transport_mode not in {"sse", "streamablehttp"}: + raise HTTPException(status_code=400, detail="Invalid transport. Use 'sse' or 'streamablehttp'.") if use_mcp_composer: try: auth_config = await _get_mcp_composer_auth_config(project) await get_or_start_mcp_composer(auth_config, project.name, project_id) + composer_streamable_http_url = await get_composer_streamable_http_url(project) sse_url = await get_composer_sse_url(project) + connection_urls = [composer_streamable_http_url, sse_url] except MCPComposerError as e: await logger.aerror( f"Failed to start MCP Composer for project '{project.name}' ({project_id}): {e.message}" @@ -655,16 +748,19 @@ async def install_mcp_config( args = [ f"mcp-composer{settings.mcp_composer_version}", "--mode", - "stdio", + "http", + "--endpoint", + composer_streamable_http_url, "--sse-url", sse_url, - "--disable-composer-tools", "--client_auth_type", "oauth", + "--disable-composer-tools", ] else: # For non-OAuth (API key or no auth), use mcp-proxy - sse_url = await get_project_sse_url(project_id) + streamable_http_url = await get_project_streamable_http_url(project_id) + legacy_sse_url = await get_project_sse_url(project_id) command = "uvx" args = ["mcp-proxy"] # Check if we need to add Langflow API key headers @@ -680,8 +776,12 @@ async def install_mcp_config( langflow_api_key = api_key_response.api_key args.extend(["--headers", "x-api-key", langflow_api_key]) - # Add the SSE URL for mcp-proxy - args.append(sse_url) + # Add the target URL for mcp-proxy based on requested transport + proxy_target_url = streamable_http_url if transport_mode == "streamablehttp" else legacy_sse_url + if transport_mode == "streamablehttp": + args.extend(["--transport", "streamablehttp"]) + args.append(proxy_target_url) + connection_urls = [streamable_http_url, legacy_sse_url] if os_type == "Windows" and not use_mcp_composer: # Only wrap in cmd for Windows when using mcp-proxy @@ -733,7 +833,8 @@ async def install_mcp_config( if "mcpServers" not in existing_config: existing_config["mcpServers"] = {} - existing_config, removed_servers = remove_server_by_sse_url(existing_config, sse_url) + # Remove stale entries that point to the same Langflow URLs (e.g. after the project is renamed) + existing_config, removed_servers = remove_server_by_urls(existing_config, connection_urls) if removed_servers: await logger.adebug("Removed existing MCP servers with same SSE URL for reinstall: %s", removed_servers) @@ -767,7 +868,7 @@ async def install_mcp_config( async def get_project_composer_url( project_id: UUID, current_user: CurrentActiveMCPUser, -): +) -> ComposerUrlResponse: """Get the MCP Composer URL for a specific project. On failure, this endpoint should return with a 200 status code and an error message in @@ -780,51 +881,66 @@ async def get_project_composer_url( ) if not should_use_mcp_composer(project): + streamable_http_url = await get_project_streamable_http_url(project_id) + legacy_sse_url = await get_project_sse_url(project_id) # Check if there's a recent error from a failed OAuth attempt last_error = mcp_composer_service.get_last_error(str(project_id)) - # If there's a recent error, return it even though OAuth is not currently active - # This happens when OAuth was attempted but rolled back due to an error - if last_error: - return { - "project_id": str(project_id), - "uses_composer": False, - "error_message": last_error, - } - return { + # Always return the regular MCP URLs so the UI can fall back to manual installation instructions. + response_payload: dict[str, Any] = { "project_id": str(project_id), "uses_composer": False, - "error_message": ( - "MCP Composer is only available for projects with MCP Composer enabled and OAuth authentication" - ), + "streamable_http_url": streamable_http_url, + "legacy_sse_url": legacy_sse_url, } + # If there's a recent error, return it even though OAuth is not currently active + # This happens when OAuth was attempted but rolled back due to an error + if last_error: + response_payload["error_message"] = last_error + return ComposerUrlResponse(**response_payload) auth_config = await _get_mcp_composer_auth_config(project) try: await get_or_start_mcp_composer(auth_config, project.name, project_id) + composer_streamable_http_url = await get_composer_streamable_http_url(project) composer_sse_url = await get_composer_sse_url(project) # Clear any previous error on success mcp_composer_service.clear_last_error(str(project_id)) - return {"project_id": str(project_id), "sse_url": composer_sse_url, "uses_composer": True} + return ComposerUrlResponse( + project_id=str(project_id), + uses_composer=True, + streamable_http_url=composer_streamable_http_url, + legacy_sse_url=composer_sse_url, + ) except MCPComposerError as e: - return {"project_id": str(project_id), "uses_composer": True, "error_message": e.message} + await logger.aerror( + "Failed to obtain MCP Composer URL for project %s (%s): %s", + project.name, + project_id, + e.message, + ) + return ComposerUrlResponse( + project_id=str(project_id), + uses_composer=True, + error_message=e.message, + ) except Exception as e: # noqa: BLE001 await logger.aerror(f"Unexpected error getting composer URL: {e}") - return { - "project_id": str(project_id), - "uses_composer": True, - "error_message": "Failed to start MCP Composer. See logs for details.", - } + return ComposerUrlResponse( + project_id=str(project_id), + uses_composer=True, + error_message="Failed to start MCP Composer. See logs for details.", + ) except Exception as e: # noqa: BLE001 msg = f"Error getting composer URL for project {project_id}: {e!s}" await logger.aerror(msg) - return { - "project_id": str(project_id), - "uses_composer": True, - "error_message": "Failed to get MCP Composer URL. See logs for details.", - } + return ComposerUrlResponse( + project_id=str(project_id), + uses_composer=True, + error_message="Failed to get MCP Composer URL. See logs for details.", + ) @router.get("/{project_id}/installed") @@ -845,8 +961,10 @@ async def check_installed_mcp_servers( project = await verify_project_access(project_id, current_user) if should_use_mcp_composer(project): + project_streamable_url = await get_composer_streamable_http_url(project) project_sse_url = await get_composer_sse_url(project) else: + project_streamable_url = await get_project_streamable_http_url(project_id) project_sse_url = await get_project_sse_url(project_id) await logger.adebug( @@ -871,18 +989,18 @@ async def check_installed_mcp_servers( try: with config_path.open("r") as f: config_data = json.load(f) - if config_contains_sse_url(config_data, project_sse_url): - await logger.adebug( - "Found %s config with matching SSE URL: %s", client_name, project_sse_url - ) - installed = True - else: - await logger.adebug( - "%s config exists but no server with SSE URL: %s (available servers: %s)", - client_name, - project_sse_url, - list(config_data.get("mcpServers", {}).keys()), - ) + if config_contains_server_url(config_data, [project_streamable_url, project_sse_url]): + await logger.adebug( + "Found %s config with matching URL for project %s", client_name, project.name + ) + installed = True + else: + await logger.adebug( + "%s config exists but no server with URL: %s (available servers: %s)", + client_name, + project_sse_url, + list(config_data.get("mcpServers", {}).keys()), + ) except json.JSONDecodeError: await logger.awarning("Failed to parse %s config JSON at: %s", client_name, config_path) # available is True but installed remains False due to parse error @@ -905,14 +1023,34 @@ async def check_installed_mcp_servers( return results -def config_contains_sse_url(config_data: dict, sse_url: str) -> bool: - """Check if any MCP server in the config uses the specified SSE URL.""" +def _normalize_url_list(urls: Sequence[str] | str) -> list[str]: + """Ensure URL inputs are always handled as a list of strings.""" + if isinstance(urls, str): + return [urls] + try: + return [str(url) for url in urls] + except TypeError as exc: + error_msg = "urls must be a sequence of strings or a single string" + raise TypeError(error_msg) from exc + + +def _args_reference_urls(args: Sequence[Any] | None, urls: list[str]) -> bool: + """Check whether the given args list references any of the provided URLs.""" + if not args or not urls: + return False + return bool({arg for arg in args if isinstance(arg, str)}.intersection(urls)) + + +def config_contains_server_url(config_data: dict, urls: Sequence[str] | str) -> bool: + """Check if any MCP server in the config uses one of the specified URLs.""" + normalized_urls = _normalize_url_list(urls) + if not normalized_urls: + return False + mcp_servers = config_data.get("mcpServers", {}) for server_name, server_config in mcp_servers.items(): - args = server_config.get("args", []) - # The SSE URL is typically the last argument in mcp-proxy configurations - if args and sse_url in args: - logger.debug("Found matching SSE URL in server: %s", server_name) + if _args_reference_urls(server_config.get("args", []), normalized_urls): + logger.debug("Found matching server URL in server: %s", server_name) return True return False @@ -994,12 +1132,16 @@ async def get_config_path(client: str) -> Path: raise ValueError(msg) -def remove_server_by_sse_url(config_data: dict, sse_url: str) -> tuple[dict, list[str]]: - """Remove any MCP servers that use the specified SSE URL from config data. +def remove_server_by_urls(config_data: dict, urls: Sequence[str] | str) -> tuple[dict, list[str]]: + """Remove any MCP servers that use one of the specified URLs from config data. Returns: tuple: (updated_config, list_of_removed_server_names) """ + normalized_urls = _normalize_url_list(urls) + if not normalized_urls: + return config_data, [] + if "mcpServers" not in config_data: return config_data, [] @@ -1008,8 +1150,7 @@ def remove_server_by_sse_url(config_data: dict, sse_url: str) -> tuple[dict, lis # Find servers to remove for server_name, server_config in config_data["mcpServers"].items(): - args = server_config.get("args", []) - if args and args[-1] == sse_url: + if _args_reference_urls(server_config.get("args", []), normalized_urls): servers_to_remove.append(server_name) # Remove the servers @@ -1051,6 +1192,13 @@ class ProjectMCPServer: def __init__(self, project_id: UUID): self.project_id = project_id self.server = Server(f"langflow-mcp-project-{project_id}") + # TODO: implement an environment variable to enable/disable stateless mode + self.session_manager = StreamableHTTPSessionManager(self.server, stateless=True) + # since we lazily initialize the session manager's lifecycle + # via .run(), which can only be called once, otherwise an error is raised, + # we use the lock to prevent race conditions on concurrent requests to prevent such an error + self._manager_lock = anyio.Lock() + self._manager_started = False # whether or not the session manager is running # Register handlers that filter by project @self.server.list_tools() @@ -1085,9 +1233,129 @@ async def handle_call_project_tool(name: str, arguments: dict) -> list[types.Tex is_action=True, ) + async def ensure_session_manager_running(self) -> None: + """Start the project's Streamable HTTP manager if needed.""" + if self._manager_started: + return + async with self._manager_lock: + if self._manager_started: + return + try: + task_group = get_project_task_group() + await task_group.start_task(self._run_session_manager) + await logger.adebug("Streamable HTTP manager started for project %s", self.project_id) + except Exception as e: + await logger.aexception(f"Failed to start session manager for project {self.project_id}: {e}") + raise + + async def _run_session_manager(self, *, task_status: TaskStatus[None] = anyio.TASK_STATUS_IGNORED): + """Own the lifecycle of the project's Streamable HTTP session manager.""" + try: + async with self.session_manager.run(): + self._manager_started = True # set flag before unblocking task (ensures waiting requests proceed) + task_status.started() # unblock + await anyio.sleep_forever() + except anyio.get_cancelled_exc_class(): + await logger.adebug(f"Streamable HTTP manager cancelled for project {self.project_id}") + except Exception as e: + await logger.aexception(f"Error in session manager for project {self.project_id}: {e}") + raise + finally: + self._manager_started = False + await logger.adebug(f"Streamable HTTP manager stopped for project {self.project_id}") + # Cache of project MCP servers -project_mcp_servers = {} +project_mcp_servers: dict[str, ProjectMCPServer] = {} + + +# Due to the lazy initialization of the project MCP servers' +# streamable-http session managers, we implement a global +# task group (AnyIO) manager for the session managers. +# This ensures that each session manager's .run() context manager is +# entered and exited from the same coroutine, otherwise Asyncio will raise a RuntimeError. +class ProjectTaskGroup: + """Manage the dynamically created MCP project servers' streamable-http session managers. + + Utilizes an AnyIO TaskGroup to manage + the lifecycle of the streamable-http session managers. + This ensures that each session manager's .run() + context manager is entered and exited from the same coroutine, + otherwise Asyncio will raise a RuntimeError. + """ + + def __init__(self): + self._started = False + self._start_stop_lock = anyio.Lock() + self._task_group: TaskGroup | None = None + self._tg_task: asyncio.Task | None = None + self._tg_ready = anyio.Event() + + async def _start_tg(self) -> None: + """Background task that owns the task group lifecycle. + + This ensures __aenter__ and __aexit__ happen in the same task. + """ + async with anyio.create_task_group() as tg: + self._task_group = tg + self._tg_ready.set() + await anyio.sleep_forever() + + async def start(self) -> None: + """Create the project task group.""" + async with self._start_stop_lock: + if self._started: + return + self._tg_ready = anyio.Event() + self._tg_task = asyncio.create_task(self._start_tg()) + await self._tg_ready.wait() + self._started = True + + async def stop(self) -> None: + """Close the shared project task group and signal all servers to shut down.""" + async with self._start_stop_lock: + if not self._started: + return + try: # https://anyio.readthedocs.io/en/stable/cancellation.html, https://docs.python.org/3/library/asyncio-task.html#asyncio.Task.cancel + self._task_group.cancel_scope.cancel() # type: ignore[union-attr] + await self._tg_task # type: ignore[misc] + except Exception as e: # noqa: BLE001 + await logger.aexception(f"Failed to stop project task group: {e}") + finally: + self._cleanup() + await logger.adebug("Project MCP task group stopped") + + async def start_task(self, func: Callable[..., Awaitable[Any]], *args) -> Any: + if not self._started or self._task_group is None: + msg = "MCP project task group not initialized. Call start_project_task_group() first." + raise RuntimeError(msg) + return await self._task_group.start(func, *args) + + def _cleanup(self) -> None: + """Cleanup the project task group.""" + self._task_group = None + self._tg_task = None + self._tg_ready = None + self._started = False + project_mcp_servers.clear() + + +_project_task_group = ProjectTaskGroup() + + +async def start_project_task_group() -> None: + """Initialize the shared project task group.""" + await _project_task_group.start() + + +def get_project_task_group() -> ProjectTaskGroup: + """Get the project task group manager.""" + return _project_task_group + + +async def stop_project_task_group() -> None: + """Close the shared project task group.""" + await _project_task_group.stop() def get_project_mcp_server(project_id: UUID | None) -> ProjectMCPServer: @@ -1102,6 +1370,11 @@ def get_project_mcp_server(project_id: UUID | None) -> ProjectMCPServer: return project_mcp_servers[project_id_str] +# Note: Shutdown is handled by stop_project_task_group() in main.py lifespan +# This handler was removed because ProjectMCPServer doesn't have stop_session_manager() +# Session managers are managed centrally by ProjectTaskGroup + + async def register_project_with_composer(project: Folder): """Register a project with MCP Composer by starting a dedicated composer instance.""" try: @@ -1118,13 +1391,15 @@ async def register_project_with_composer(project: Folder): error_msg = "Project must have an ID to register with MCP Composer" raise ValueError(error_msg) - sse_url = await get_project_sse_url(project.id) + streamable_http_url = await get_project_streamable_http_url(project.id) + legacy_sse_url = await get_project_sse_url(project.id) auth_config = await _get_mcp_composer_auth_config(project) error_message = await mcp_composer_service.start_project_composer( project_id=str(project.id), - sse_url=sse_url, + streamable_http_url=streamable_http_url, auth_config=auth_config, + legacy_sse_url=legacy_sse_url, ) if error_message is not None: raise RuntimeError(error_message) @@ -1247,9 +1522,15 @@ async def get_or_start_mcp_composer(auth_config: dict, project_name: str, projec error_msg = "Langflow host and port must be set in settings to register project with MCP Composer" raise ValueError(error_msg) - sse_url = await get_project_sse_url(project_id) + streamable_http_url = await get_project_streamable_http_url(project_id) + legacy_sse_url = await get_project_sse_url(project_id) if not auth_config: error_msg = f"Auth config is required to start MCP Composer for project {project_name}" raise MCPComposerConfigError(error_msg, str(project_id)) - await mcp_composer_service.start_project_composer(str(project_id), sse_url, auth_config) + await mcp_composer_service.start_project_composer( + str(project_id), + streamable_http_url, + auth_config, + legacy_sse_url=legacy_sse_url, + ) diff --git a/src/backend/base/langflow/api/v1/mcp_utils.py b/src/backend/base/langflow/api/v1/mcp_utils.py index 47232c244cf3..14c70cfaeb95 100644 --- a/src/backend/base/langflow/api/v1/mcp_utils.py +++ b/src/backend/base/langflow/api/v1/mcp_utils.py @@ -8,6 +8,7 @@ from collections.abc import Awaitable, Callable from contextvars import ContextVar from functools import wraps +from pathlib import Path from typing import Any, ParamSpec, TypeVar from urllib.parse import quote, unquote, urlparse from uuid import uuid4 @@ -24,12 +25,15 @@ from langflow.helpers.flow import json_schema_from_flow from langflow.schema.message import Message from langflow.services.database.models import Flow +from langflow.services.database.models.file.model import File as UserFile from langflow.services.database.models.user.model import User from langflow.services.deps import get_settings_service, get_storage_service, session_scope T = TypeVar("T") P = ParamSpec("P") +MCP_SERVERS_FILE = "_mcp_servers" + # Create context variables current_user_ctx: ContextVar[User] = ContextVar("current_user_ctx") # Carries per-request variables injected via HTTP headers (e.g., X-Langflow-Global-Var-*) @@ -89,7 +93,12 @@ async def handle_list_resources(project_id=None): port = getattr(settings_service.settings, "port", 3000) base_url = f"http://{host}:{port}".rstrip("/") - + try: + current_user = current_user_ctx.get() + except Exception as e: # noqa: BLE001 + msg = f"Error getting current user: {e!s}" + await logger.aexception(msg) + current_user = None async with session_scope() as session: # Build query based on whether project_id is provided flows_query = select(Flow).where(Flow.folder_id == project_id) if project_id else select(Flow) @@ -104,7 +113,7 @@ async def handle_list_resources(project_id=None): # URL encode the filename safe_filename = quote(file_name) resource = types.Resource( - uri=f"{base_url}/api/v1/files/{flow.id}/{safe_filename}", + uri=f"{base_url}/api/v1/files/download/{flow.id}/{safe_filename}", name=file_name, description=f"File in flow: {flow.name}", mimeType=build_content_type_from_extension(file_name), @@ -114,6 +123,33 @@ async def handle_list_resources(project_id=None): msg = f"Error listing files for flow {flow.id}: {e}" await logger.adebug(msg) continue + #################################################### + # When a user uploads a file inside a flow + # (e.g., via the File Read component), + # it hits /api/v2/files (POST), + # which saves files at the user-level. + # So the above query for flow files is not enough. + # So we list all user files for the current user. + # This is not good. We need to fix this for 1.8.0. + ################################################### + if current_user: + user_files_stmt = select(UserFile).where(UserFile.user_id == current_user.id) + user_files = (await session.exec(user_files_stmt)).all() + for user_file in user_files: + stored_path = getattr(user_file, "path", "") or "" + stored_filename = Path(stored_path).name if stored_path else user_file.name + safe_filename = quote(stored_filename) + if stored_filename.startswith(f"{MCP_SERVERS_FILE}_{current_user.id}"): + # reserved file name for langflow MCP server config file(s) + continue + description = getattr(user_file, "provider", None) or "User file uploaded via File Manager" + resource = types.Resource( + uri=f"{base_url}/api/v1/files/download/{current_user.id}/{safe_filename}", + name=stored_filename, + description=description, + mimeType=build_content_type_from_extension(stored_filename), + ) + resources.append(resource) except Exception as e: msg = f"Error in listing resources: {e!s}" await logger.aexception(msg) @@ -126,7 +162,7 @@ async def handle_read_resource(uri: str) -> bytes: try: # Parse the URI properly parsed_uri = urlparse(str(uri)) - # Path will be like /api/v1/files/{flow_id}/{filename} + # Path will be like /api/v1/files/download/{namespace}/{filename} path_parts = parsed_uri.path.split("/") # Remove empty strings from split path_parts = [p for p in path_parts if p] diff --git a/src/backend/base/langflow/api/v1/monitor.py b/src/backend/base/langflow/api/v1/monitor.py index a5aa1def475b..c8b0dd841780 100644 --- a/src/backend/base/langflow/api/v1/monitor.py +++ b/src/backend/base/langflow/api/v1/monitor.py @@ -10,9 +10,11 @@ from langflow.api.utils import DbSession, custom_params from langflow.schema.message import MessageResponse from langflow.services.auth.utils import get_current_active_user +from langflow.services.database.models.flow.model import Flow from langflow.services.database.models.message.model import MessageRead, MessageTable, MessageUpdate from langflow.services.database.models.transactions.crud import transform_transaction_table from langflow.services.database.models.transactions.model import TransactionTable +from langflow.services.database.models.user.model import User from langflow.services.database.models.vertex_builds.crud import ( delete_vertex_builds_by_flow_id, get_vertex_builds_by_flow_id, @@ -22,7 +24,7 @@ router = APIRouter(prefix="/monitor", tags=["Monitor"]) -@router.get("/builds") +@router.get("/builds", dependencies=[Depends(get_current_active_user)]) async def get_vertex_builds(flow_id: Annotated[UUID, Query()], session: DbSession) -> VertexBuildMapModel: try: vertex_builds = await get_vertex_builds_by_flow_id(session, flow_id) @@ -31,7 +33,7 @@ async def get_vertex_builds(flow_id: Annotated[UUID, Query()], session: DbSessio raise HTTPException(status_code=500, detail=str(e)) from e -@router.delete("/builds", status_code=204) +@router.delete("/builds", status_code=204, dependencies=[Depends(get_current_active_user)]) async def delete_vertex_builds(flow_id: Annotated[UUID, Query()], session: DbSession) -> None: try: await delete_vertex_builds_by_flow_id(session, flow_id) @@ -39,15 +41,20 @@ async def delete_vertex_builds(flow_id: Annotated[UUID, Query()], session: DbSes raise HTTPException(status_code=500, detail=str(e)) from e -@router.get("/messages/sessions", dependencies=[Depends(get_current_active_user)]) +@router.get("/messages/sessions") async def get_message_sessions( session: DbSession, + current_user: Annotated[User, Depends(get_current_active_user)], flow_id: Annotated[UUID | None, Query()] = None, ) -> list[str]: try: stmt = select(MessageTable.session_id).distinct() stmt = stmt.where(col(MessageTable.session_id).isnot(None)) + # Filter by user's flows + user_flows_stmt = select(Flow.id).where(Flow.user_id == current_user.id) + stmt = stmt.where(col(MessageTable.flow_id).in_(user_flows_stmt)) + if flow_id: stmt = stmt.where(MessageTable.flow_id == flow_id) @@ -57,9 +64,10 @@ async def get_message_sessions( raise HTTPException(status_code=500, detail=str(e)) from e -@router.get("/messages", dependencies=[Depends(get_current_active_user)]) +@router.get("/messages") async def get_messages( session: DbSession, + current_user: Annotated[User, Depends(get_current_active_user)], flow_id: Annotated[UUID | None, Query()] = None, session_id: Annotated[str | None, Query()] = None, sender: Annotated[str | None, Query()] = None, @@ -68,6 +76,11 @@ async def get_messages( ) -> list[MessageResponse]: try: stmt = select(MessageTable) + + # Filter by user's flows + user_flows_stmt = select(Flow.id).where(Flow.user_id == current_user.id) + stmt = stmt.where(col(MessageTable.flow_id).in_(user_flows_stmt)) + if flow_id: stmt = stmt.where(MessageTable.flow_id == flow_id) if session_id: @@ -80,8 +93,8 @@ async def get_messages( if sender_name: stmt = stmt.where(MessageTable.sender_name == sender_name) if order_by: - col = getattr(MessageTable, order_by).asc() - stmt = stmt.order_by(col) + order_col = getattr(MessageTable, order_by).asc() + stmt = stmt.order_by(order_col) messages = await session.exec(stmt) return [MessageResponse.model_validate(d, from_attributes=True) for d in messages] except Exception as e: diff --git a/src/backend/base/langflow/api/v1/projects.py b/src/backend/base/langflow/api/v1/projects.py index 290b4a607a36..f987434f171f 100644 --- a/src/backend/base/langflow/api/v1/projects.py +++ b/src/backend/base/langflow/api/v1/projects.py @@ -22,7 +22,11 @@ from langflow.api.utils.mcp.config_utils import validate_mcp_server_for_project from langflow.api.v1.auth_helpers import handle_auth_settings_update from langflow.api.v1.flows import create_flows -from langflow.api.v1.mcp_projects import get_project_sse_url, register_project_with_composer +from langflow.api.v1.mcp_projects import ( + get_project_sse_url, # noqa: F401 + get_project_streamable_http_url, + register_project_with_composer, +) from langflow.api.v1.schemas import FlowListCreate from langflow.api.v2.mcp import update_server from langflow.helpers.flow import generate_unique_flow_name @@ -75,6 +79,7 @@ async def create_project( ) if project_results: project_names = [project.name for project in project_results] + # TODO: this throws an error if the name contains non-numeric content in parentheses project_numbers = [int(name.split("(")[-1].split(")")[0]) for name in project_names if "(" in name] if project_numbers: new_project.name = f"{new_project.name} ({max(project_numbers) + 1})" @@ -100,22 +105,26 @@ async def create_project( # Auto-register MCP server for this project with configured default auth if get_settings_service().settings.add_projects_to_mcp_servers: try: - # Build SSE URL - sse_url = await get_project_sse_url(new_project.id) + # Build Streamable HTTP URL (preferred transport) and legacy SSE URL (for docs/errors) + streamable_http_url = await get_project_streamable_http_url(new_project.id) + # legacy SSE URL + # sse_url = await get_project_sse_url(new_project.id) # Prepare server config based on auth type same as new project if default_auth.get("auth_type", "none") == "apikey": # Create API key for API key authentication api_key_name = f"MCP Project {new_project.name} - default" unmasked_api_key = await create_api_key(session, ApiKeyCreate(name=api_key_name), current_user.id) - + # Starting v>=1.7.1, we use Streamable HTTP transport by default command = "uvx" args = [ "mcp-proxy", + "--transport", + "streamablehttp", "--headers", "x-api-key", unmasked_api_key.api_key, - sse_url, + streamable_http_url, ] elif default_auth.get("auth_type", "none") == "oauth": msg = "OAuth authentication is not yet implemented for MCP server creation during project creation." @@ -126,7 +135,9 @@ async def create_project( command = "uvx" args = [ "mcp-proxy", - sse_url, + "--transport", + "streamablehttp", + streamable_http_url, ] server_config = {"command": command, "args": args} diff --git a/src/backend/base/langflow/api/v1/schemas.py b/src/backend/base/langflow/api/v1/schemas.py index 69f8439732a6..a220a214b924 100644 --- a/src/backend/base/langflow/api/v1/schemas.py +++ b/src/backend/base/langflow/api/v1/schemas.py @@ -460,5 +460,16 @@ class MCPProjectResponse(BaseModel): auth_settings: AuthSettings | None = None +class ComposerUrlResponse(BaseModel): + """Response model for MCP Composer connection details.""" + + project_id: str + uses_composer: bool + streamable_http_url: str | None = None + legacy_sse_url: str | None = None + error_message: str | None = None + + class MCPInstallRequest(BaseModel): client: str + transport: Literal["sse", "streamablehttp"] | None = None diff --git a/src/backend/base/langflow/api/v1/users.py b/src/backend/base/langflow/api/v1/users.py index 02f3df29a432..4bbadb821228 100644 --- a/src/backend/base/langflow/api/v1/users.py +++ b/src/backend/base/langflow/api/v1/users.py @@ -26,8 +26,12 @@ async def add_user( user: UserCreate, session: DbSession, + current_user: Annotated[User, Depends(get_current_active_superuser)], # noqa: ARG001 ) -> User: - """Add a new user to the database.""" + """Add a new user to the database. + + Requires superuser authentication to prevent unauthorized account creation. + """ new_user = User.model_validate(user, from_attributes=True) try: new_user.password = get_password_hash(user.password) diff --git a/src/backend/base/langflow/api/v1/validate.py b/src/backend/base/langflow/api/v1/validate.py index 9150b3245290..fd0f1911f5ab 100644 --- a/src/backend/base/langflow/api/v1/validate.py +++ b/src/backend/base/langflow/api/v1/validate.py @@ -1,17 +1,17 @@ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException from lfx.base.prompts.api_utils import process_prompt_template from lfx.custom.validate import validate_code from lfx.log.logger import logger -from langflow.api.utils import CurrentActiveUser from langflow.api.v1.base import Code, CodeValidationResponse, PromptValidationResponse, ValidatePromptRequest +from langflow.services.auth.utils import get_current_active_user # build router router = APIRouter(prefix="/validate", tags=["Validate"]) -@router.post("/code", status_code=200) -async def post_validate_code(code: Code, _current_user: CurrentActiveUser) -> CodeValidationResponse: +@router.post("/code", status_code=200, dependencies=[Depends(get_current_active_user)]) +async def post_validate_code(code: Code) -> CodeValidationResponse: try: errors = validate_code(code.code) return CodeValidationResponse( @@ -23,8 +23,10 @@ async def post_validate_code(code: Code, _current_user: CurrentActiveUser) -> Co raise HTTPException(status_code=500, detail=str(e)) from e -@router.post("/prompt", status_code=200) -async def post_validate_prompt(prompt_request: ValidatePromptRequest) -> PromptValidationResponse: +@router.post("/prompt", status_code=200, dependencies=[Depends(get_current_active_user)]) +async def post_validate_prompt( + prompt_request: ValidatePromptRequest, +) -> PromptValidationResponse: try: if not prompt_request.frontend_node: return PromptValidationResponse( diff --git a/src/backend/base/langflow/api/v2/files.py b/src/backend/base/langflow/api/v2/files.py index 8cb3b606ce99..b67b82ce0a0e 100644 --- a/src/backend/base/langflow/api/v2/files.py +++ b/src/backend/base/langflow/api/v2/files.py @@ -98,7 +98,8 @@ async def fetch_file_object(file_id: uuid.UUID, current_user: CurrentActiveUser, # Make sure the user has access to the file if file.user_id != current_user.id: - raise HTTPException(status_code=403, detail="You don't have access to this file") + # Return 404 to prevent information disclosure about resource existence + raise HTTPException(status_code=404, detail="File not found") return file @@ -225,8 +226,8 @@ async def upload_user_file( # S3 bucket doesn't exist or file not found, or file was uploaded but can't be found raise HTTPException(status_code=404, detail=str(e)) from e except PermissionError as e: - # Access denied or invalid credentials - raise HTTPException(status_code=403, detail=str(e)) from e + # Access denied or invalid credentials - return 500 as this is a server config issue + raise HTTPException(status_code=500, detail="Error accessing storage") from e except Exception as e: # General error saving file or getting file size raise HTTPException(status_code=500, detail=f"Error accessing file: {e}") from e @@ -583,17 +584,16 @@ async def download_file( raise HTTPException(status_code=404, detail="File not found") return await read_file_content(file_content, decode=True) - # For streaming, use the appropriate method based on storage type - if hasattr(storage_service, "get_file_stream"): - # S3 storage - use streaming method - file_stream = storage_service.get_file_stream(flow_id=str(current_user.id), file_name=file_name) - byte_stream = file_stream - else: - # Local storage - get file and convert to stream - file_content = await storage_service.get_file(flow_id=str(current_user.id), file_name=file_name) - if file_content is None: - raise HTTPException(status_code=404, detail="File not found") - byte_stream = byte_stream_generator(file_content) + # Check file exists before streaming (to catch errors before response headers are sent) + # This is important because once StreamingResponse starts, we can't change the status code + try: + await storage_service.get_file_size(flow_id=str(current_user.id), file_name=file_name) + except FileNotFoundError as e: + raise HTTPException(status_code=404, detail=f"File not found: {e}") from e + + # Wrap the async generator in byte_stream_generator to ensure proper iteration + file_stream = storage_service.get_file_stream(flow_id=str(current_user.id), file_name=file_name) + byte_stream = byte_stream_generator(file_stream) # Create the filename with extension file_extension = Path(file.path).suffix diff --git a/src/backend/base/langflow/api/v2/registration.py b/src/backend/base/langflow/api/v2/registration.py index 20efa1668243..91bbce79f612 100644 --- a/src/backend/base/langflow/api/v2/registration.py +++ b/src/backend/base/langflow/api/v2/registration.py @@ -3,10 +3,11 @@ from datetime import datetime, timezone from pathlib import Path -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, Depends, HTTPException from pydantic import BaseModel, EmailStr from langflow.logging import logger +from langflow.services.auth.utils import get_current_active_user from langflow.services.deps import get_telemetry_service from langflow.services.telemetry.schema import EmailPayload @@ -135,7 +136,7 @@ async def _send_email_telemetry(email: str) -> None: logger.debug(f"Successfully sent email telemetry event: {payload.email}") -@router.get("/") +@router.get("/", dependencies=[Depends(get_current_active_user)]) async def get_registration(): """Get the registered user (if any).""" try: diff --git a/src/backend/base/langflow/helpers/flow.py b/src/backend/base/langflow/helpers/flow.py index 46f4b3810f33..fae1d1e390b5 100644 --- a/src/backend/base/langflow/helpers/flow.py +++ b/src/backend/base/langflow/helpers/flow.py @@ -6,7 +6,8 @@ from fastapi import HTTPException from lfx.log.logger import logger from pydantic.v1 import BaseModel, Field, create_model -from sqlmodel import select +from sqlalchemy.orm import aliased +from sqlmodel import asc, desc, select from langflow.schema.schema import INPUT_FIELD_NAME from langflow.services.database.models.flow.model import Flow, FlowRead @@ -19,13 +20,17 @@ from lfx.graph.schema import RunOutputs from lfx.graph.vertex.base import Vertex - from langflow.schema.data import Data +from langflow.schema.data import Data INPUT_TYPE_MAP = { "ChatInput": {"type_hint": "Optional[str]", "default": '""'}, "TextInput": {"type_hint": "Optional[str]", "default": '""'}, "JSONInput": {"type_hint": "Optional[dict]", "default": "{}"}, } +SORT_DISPATCHER = { + "asc": asc, + "desc": desc, +} async def list_flows(*, user_id: str | None = None) -> list[Data]: @@ -44,6 +49,120 @@ async def list_flows(*, user_id: str | None = None) -> list[Data]: raise ValueError(msg) from e +async def list_flows_by_flow_folder( + *, + user_id: str | None = None, + flow_id: str | None = None, + order_params: dict | None = {"column": "updated_at", "direction": "desc"}, # noqa: B006 +) -> list[Data]: + if not user_id: + msg = "Session is invalid" + raise ValueError(msg) + if not flow_id: + msg = "Flow ID is required" + raise ValueError(msg) + try: + async with session_scope() as session: + uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id + uuid_flow_id = UUID(flow_id) if isinstance(flow_id, str) else flow_id + # get all flows belonging to the specified user + # and inside the same folder as the specified flow + flow_ = aliased(Flow) # flow table alias, used to retrieve the folder + stmt = ( + select(Flow.id, Flow.name, Flow.updated_at) + .join(flow_, Flow.folder_id == flow_.folder_id) + .where(flow_.id == uuid_flow_id) + .where(flow_.user_id == uuid_user_id) + .where(Flow.user_id == uuid_user_id) + .where(Flow.id != uuid_flow_id) + ) + # sort flows by the specified column and direction + if order_params is not None: + sort_col = getattr(Flow, order_params.get("column", "updated_at"), Flow.updated_at) + sort_dir = SORT_DISPATCHER.get(order_params.get("direction", "desc"), desc) + stmt = stmt.order_by(sort_dir(sort_col)) + + flows = (await session.exec(stmt)).all() + return [Data(data=dict(flow._mapping)) for flow in flows] # noqa: SLF001 + except Exception as e: + msg = f"Error listing flows: {e}" + raise ValueError(msg) from e + + +async def list_flows_by_folder_id( + *, user_id: str | None = None, folder_id: str | None = None, order_params: dict | None = None +) -> list[Data]: + if not user_id: + msg = "Session is invalid" + raise ValueError(msg) + if not folder_id: + msg = "Folder ID is required" + raise ValueError(msg) + + if order_params is None: + order_params = {"column": "updated_at", "direction": "desc"} + + try: + async with session_scope() as session: + uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id + uuid_folder_id = UUID(folder_id) if isinstance(folder_id, str) else folder_id + stmt = ( + select(Flow.id, Flow.name, Flow.updated_at) + .where(Flow.user_id == uuid_user_id) + .where(Flow.folder_id == uuid_folder_id) + ) + if order_params is not None: + sort_col = getattr(Flow, order_params.get("column", "updated_at"), Flow.updated_at) + sort_dir = SORT_DISPATCHER.get(order_params.get("direction", "desc"), desc) + stmt = stmt.order_by(sort_dir(sort_col)) + + flows = (await session.exec(stmt)).all() + return [Data(data=dict(flow._mapping)) for flow in flows] # noqa: SLF001 + except Exception as e: + msg = f"Error listing flows: {e}" + raise ValueError(msg) from e + + +async def get_flow_by_id_or_name( + *, + user_id: str | None = None, + flow_id: str | None = None, + flow_name: str | None = None, +) -> Data | None: + if not user_id: + msg = "Session is invalid" + raise ValueError(msg) + if not (flow_id or flow_name): + msg = "Flow ID or Flow Name is required" + raise ValueError(msg) + + # set user provided flow id or flow name. + # if both are provided, flow_id is used. + attr, val = None, None + if flow_name: + attr = "name" + val = flow_name + if flow_id: + attr = "id" + val = flow_id + if not (attr and val): + msg = "Flow id or Name is required" + raise ValueError(msg) + try: + async with session_scope() as session: + uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id # type: ignore[assignment] + uuid_flow_id_or_name = val # type: ignore[assignment] + if isinstance(val, str) and attr == "id": + uuid_flow_id_or_name = UUID(val) # type: ignore[assignment] + stmt = select(Flow).where(Flow.user_id == uuid_user_id).where(getattr(Flow, attr) == uuid_flow_id_or_name) + flow = (await session.exec(stmt)).first() + return flow.to_data() if flow else None + + except Exception as e: + msg = f"Error getting flow by id: {e}" + raise ValueError(msg) from e + + async def load_flow( user_id: str, flow_id: str | None = None, flow_name: str | None = None, tweaks: dict | None = None ) -> Graph: diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json index fff9580895f2..30314977ea65 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json @@ -362,7 +362,7 @@ "legacy": false, "lf_version": "1.5.0", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -410,7 +410,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -632,7 +632,7 @@ "legacy": false, "lf_version": "1.5.0", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "8c87e536cca4", "dependencies": { "dependencies": [ { @@ -641,7 +641,7 @@ }, { "name": "fastapi", - "version": "0.120.0" + "version": "0.123.0" }, { "name": "lfx", @@ -706,7 +706,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n # Preserve existing session_id from the incoming message if it exists\n existing_session_id = message.session_id\n else:\n message = Message(text=text)\n existing_session_id = None\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n # Preserve session_id from incoming message, or use component/graph session_id\n message.session_id = (\n self.session_id or existing_session_id or (self.graph.session_id if hasattr(self, \"graph\") else None) or \"\"\n )\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json index f65b36a724b8..1fee7135197b 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json @@ -117,7 +117,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -165,7 +165,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -584,7 +584,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "8c87e536cca4", "dependencies": { "dependencies": [ { @@ -593,7 +593,7 @@ }, { "name": "fastapi", - "version": "0.120.0" + "version": "0.123.0" }, { "name": "lfx", @@ -658,7 +658,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n # Preserve existing session_id from the incoming message if it exists\n existing_session_id = message.session_id\n else:\n message = Message(text=text)\n existing_session_id = None\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n # Preserve session_id from incoming message, or use component/graph session_id\n message.session_id = (\n self.session_id or existing_session_id or (self.graph.session_id if hasattr(self, \"graph\") else None) or \"\"\n )\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json b/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json index 250ded6d972e..926c64a28a44 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json @@ -352,7 +352,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "3dd28ea591b9", + "code_hash": "3d80f125b734", "dependencies": { "dependencies": [ { @@ -400,7 +400,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.io.text import TextComponent\nfrom lfx.io import MultilineInput, Output\nfrom lfx.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" + "value": "from lfx.base.io.text import TextComponent\nfrom lfx.io import MultilineInput, Output\nfrom lfx.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/text-input-and-output\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" }, "input_value": { "_input_type": "MultilineInput", @@ -477,7 +477,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "8c87e536cca4", "dependencies": { "dependencies": [ { @@ -486,7 +486,7 @@ }, { "name": "fastapi", - "version": "0.120.0" + "version": "0.123.0" }, { "name": "lfx", @@ -551,7 +551,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n # Preserve existing session_id from the incoming message if it exists\n existing_session_id = message.session_id\n else:\n message = Message(text=text)\n existing_session_id = None\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n # Preserve session_id from incoming message, or use component/graph session_id\n message.session_id = (\n self.session_id or existing_session_id or (self.graph.session_id if hasattr(self, \"graph\") else None) or \"\"\n )\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -777,7 +777,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "17514953c7e8", + "code_hash": "3cda25c3f7b5", "dependencies": { "dependencies": [ { @@ -827,7 +827,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n # Use format_map with a dict that returns default_value for missing keys\n class DefaultDict(dict):\n def __missing__(self, key):\n return data.default_value or \"\"\n\n formatted_text = self.pattern.format_map(DefaultDict(data.data))\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" + "value": "from lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n # Use format_map with a dict that returns default_value for missing keys\n class DefaultDict(dict):\n def __missing__(self, key):\n return data.default_value or \"\"\n\n formatted_text = self.pattern.format_map(DefaultDict(data.data))\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, "input_data": { "_input_type": "HandleInput", @@ -973,7 +973,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "cdb7d379306e", + "code_hash": "47d3ccb92d71", "dependencies": { "dependencies": [ { @@ -1085,7 +1085,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import importlib\nimport re\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom langchain_community.document_loaders import RecursiveUrlLoader\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.helpers.data import safe_convert\nfrom lfx.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SliderInput, TableInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.utils.request_utils import get_user_agent\n\n# Constants\nDEFAULT_TIMEOUT = 30\nDEFAULT_MAX_DEPTH = 1\nDEFAULT_FORMAT = \"Text\"\n\n\nURL_REGEX = re.compile(\n r\"^(https?:\\/\\/)?\" r\"(www\\.)?\" r\"([a-zA-Z0-9.-]+)\" r\"(\\.[a-zA-Z]{2,})?\" r\"(:\\d+)?\" r\"(\\/[^\\s]*)?$\",\n re.IGNORECASE,\n)\n\nUSER_AGENT = None\n# Check if langflow is installed using importlib.util.find_spec(name))\nif importlib.util.find_spec(\"langflow\"):\n langflow_installed = True\n USER_AGENT = get_user_agent()\nelse:\n langflow_installed = False\n USER_AGENT = \"lfx\"\n\n\nclass URLComponent(Component):\n \"\"\"A component that loads and parses content from web pages recursively.\n\n This component allows fetching content from one or more URLs, with options to:\n - Control crawl depth\n - Prevent crawling outside the root domain\n - Use async loading for better performance\n - Extract either raw HTML or clean text\n - Configure request headers and timeouts\n \"\"\"\n\n display_name = \"URL\"\n description = \"Fetch content from one or more web pages, following links recursively.\"\n documentation: str = \"https://docs.langflow.org/components-data#url\"\n icon = \"layout-template\"\n name = \"URLComponent\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs to crawl recursively, by clicking the '+' button.\",\n is_list=True,\n tool_mode=True,\n placeholder=\"Enter a URL...\",\n list_add_label=\"Add URL\",\n input_types=[],\n ),\n SliderInput(\n name=\"max_depth\",\n display_name=\"Depth\",\n info=(\n \"Controls how many 'clicks' away from the initial page the crawler will go:\\n\"\n \"- depth 1: only the initial page\\n\"\n \"- depth 2: initial page + all pages linked directly from it\\n\"\n \"- depth 3: initial page + direct links + links found on those direct link pages\\n\"\n \"Note: This is about link traversal, not URL path depth.\"\n ),\n value=DEFAULT_MAX_DEPTH,\n range_spec=RangeSpec(min=1, max=5, step=1),\n required=False,\n min_label=\" \",\n max_label=\" \",\n min_label_icon=\"None\",\n max_label_icon=\"None\",\n # slider_input=True\n ),\n BoolInput(\n name=\"prevent_outside\",\n display_name=\"Prevent Outside\",\n info=(\n \"If enabled, only crawls URLs within the same domain as the root URL. \"\n \"This helps prevent the crawler from going to external websites.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"use_async\",\n display_name=\"Use Async\",\n info=(\n \"If enabled, uses asynchronous loading which can be significantly faster \"\n \"but might use more system resources.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n DropdownInput(\n name=\"format\",\n display_name=\"Output Format\",\n info=\"Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.\",\n options=[\"Text\", \"HTML\"],\n value=DEFAULT_FORMAT,\n advanced=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n info=\"Timeout for the request in seconds.\",\n value=DEFAULT_TIMEOUT,\n required=False,\n advanced=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": USER_AGENT}],\n advanced=True,\n input_types=[\"DataFrame\"],\n ),\n BoolInput(\n name=\"filter_text_html\",\n display_name=\"Filter Text/HTML\",\n info=\"If enabled, filters out text/css content type from the results.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"continue_on_failure\",\n display_name=\"Continue on Failure\",\n info=\"If enabled, continues crawling even if some requests fail.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"check_response_status\",\n display_name=\"Check Response Status\",\n info=\"If enabled, checks the response status of the request.\",\n value=False,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"autoset_encoding\",\n display_name=\"Autoset Encoding\",\n info=\"If enabled, automatically sets the encoding of the request.\",\n value=True,\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Extracted Pages\", name=\"page_results\", method=\"fetch_content\"),\n Output(display_name=\"Raw Content\", name=\"raw_results\", method=\"fetch_content_as_message\", tool_mode=False),\n ]\n\n @staticmethod\n def validate_url(url: str) -> bool:\n \"\"\"Validates if the given string matches URL pattern.\n\n Args:\n url: The URL string to validate\n\n Returns:\n bool: True if the URL is valid, False otherwise\n \"\"\"\n return bool(URL_REGEX.match(url))\n\n def ensure_url(self, url: str) -> str:\n \"\"\"Ensures the given string is a valid URL.\n\n Args:\n url: The URL string to validate and normalize\n\n Returns:\n str: The normalized URL\n\n Raises:\n ValueError: If the URL is invalid\n \"\"\"\n url = url.strip()\n if not url.startswith((\"http://\", \"https://\")):\n url = \"https://\" + url\n\n if not self.validate_url(url):\n msg = f\"Invalid URL: {url}\"\n raise ValueError(msg)\n\n return url\n\n def _create_loader(self, url: str) -> RecursiveUrlLoader:\n \"\"\"Creates a RecursiveUrlLoader instance with the configured settings.\n\n Args:\n url: The URL to load\n\n Returns:\n RecursiveUrlLoader: Configured loader instance\n \"\"\"\n headers_dict = {header[\"key\"]: header[\"value\"] for header in self.headers if header[\"value\"] is not None}\n extractor = (lambda x: x) if self.format == \"HTML\" else (lambda x: BeautifulSoup(x, \"lxml\").get_text())\n\n return RecursiveUrlLoader(\n url=url,\n max_depth=self.max_depth,\n prevent_outside=self.prevent_outside,\n use_async=self.use_async,\n extractor=extractor,\n timeout=self.timeout,\n headers=headers_dict,\n check_response_status=self.check_response_status,\n continue_on_failure=self.continue_on_failure,\n base_url=url, # Add base_url to ensure consistent domain crawling\n autoset_encoding=self.autoset_encoding, # Enable automatic encoding detection\n exclude_dirs=[], # Allow customization of excluded directories\n link_regex=None, # Allow customization of link filtering\n )\n\n def fetch_url_contents(self) -> list[dict]:\n \"\"\"Load documents from the configured URLs.\n\n Returns:\n List[Data]: List of Data objects containing the fetched content\n\n Raises:\n ValueError: If no valid URLs are provided or if there's an error loading documents\n \"\"\"\n try:\n urls = list({self.ensure_url(url) for url in self.urls if url.strip()})\n logger.debug(f\"URLs: {urls}\")\n if not urls:\n msg = \"No valid URLs provided.\"\n raise ValueError(msg)\n\n all_docs = []\n for url in urls:\n logger.debug(f\"Loading documents from {url}\")\n\n try:\n loader = self._create_loader(url)\n docs = loader.load()\n\n if not docs:\n logger.warning(f\"No documents found for {url}\")\n continue\n\n logger.debug(f\"Found {len(docs)} documents from {url}\")\n all_docs.extend(docs)\n\n except requests.exceptions.RequestException as e:\n logger.exception(f\"Error loading documents from {url}: {e}\")\n continue\n\n if not all_docs:\n msg = \"No documents were successfully loaded from any URL\"\n raise ValueError(msg)\n\n # data = [Data(text=doc.page_content, **doc.metadata) for doc in all_docs]\n data = [\n {\n \"text\": safe_convert(doc.page_content, clean_data=True),\n \"url\": doc.metadata.get(\"source\", \"\"),\n \"title\": doc.metadata.get(\"title\", \"\"),\n \"description\": doc.metadata.get(\"description\", \"\"),\n \"content_type\": doc.metadata.get(\"content_type\", \"\"),\n \"language\": doc.metadata.get(\"language\", \"\"),\n }\n for doc in all_docs\n ]\n except Exception as e:\n error_msg = e.message if hasattr(e, \"message\") else e\n msg = f\"Error loading documents: {error_msg!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n return data\n\n def fetch_content(self) -> DataFrame:\n \"\"\"Convert the documents to a DataFrame.\"\"\"\n return DataFrame(data=self.fetch_url_contents())\n\n def fetch_content_as_message(self) -> Message:\n \"\"\"Convert the documents to a Message.\"\"\"\n url_contents = self.fetch_url_contents()\n return Message(text=\"\\n\\n\".join([x[\"text\"] for x in url_contents]), data={\"data\": url_contents})\n" + "value": "import importlib\nimport re\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom langchain_community.document_loaders import RecursiveUrlLoader\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.helpers.data import safe_convert\nfrom lfx.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SliderInput, TableInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.utils.request_utils import get_user_agent\n\n# Constants\nDEFAULT_TIMEOUT = 30\nDEFAULT_MAX_DEPTH = 1\nDEFAULT_FORMAT = \"Text\"\n\n\nURL_REGEX = re.compile(\n r\"^(https?:\\/\\/)?\" r\"(www\\.)?\" r\"([a-zA-Z0-9.-]+)\" r\"(\\.[a-zA-Z]{2,})?\" r\"(:\\d+)?\" r\"(\\/[^\\s]*)?$\",\n re.IGNORECASE,\n)\n\nUSER_AGENT = None\n# Check if langflow is installed using importlib.util.find_spec(name))\nif importlib.util.find_spec(\"langflow\"):\n langflow_installed = True\n USER_AGENT = get_user_agent()\nelse:\n langflow_installed = False\n USER_AGENT = \"lfx\"\n\n\nclass URLComponent(Component):\n \"\"\"A component that loads and parses content from web pages recursively.\n\n This component allows fetching content from one or more URLs, with options to:\n - Control crawl depth\n - Prevent crawling outside the root domain\n - Use async loading for better performance\n - Extract either raw HTML or clean text\n - Configure request headers and timeouts\n \"\"\"\n\n display_name = \"URL\"\n description = \"Fetch content from one or more web pages, following links recursively.\"\n documentation: str = \"https://docs.langflow.org/url\"\n icon = \"layout-template\"\n name = \"URLComponent\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs to crawl recursively, by clicking the '+' button.\",\n is_list=True,\n tool_mode=True,\n placeholder=\"Enter a URL...\",\n list_add_label=\"Add URL\",\n input_types=[],\n ),\n SliderInput(\n name=\"max_depth\",\n display_name=\"Depth\",\n info=(\n \"Controls how many 'clicks' away from the initial page the crawler will go:\\n\"\n \"- depth 1: only the initial page\\n\"\n \"- depth 2: initial page + all pages linked directly from it\\n\"\n \"- depth 3: initial page + direct links + links found on those direct link pages\\n\"\n \"Note: This is about link traversal, not URL path depth.\"\n ),\n value=DEFAULT_MAX_DEPTH,\n range_spec=RangeSpec(min=1, max=5, step=1),\n required=False,\n min_label=\" \",\n max_label=\" \",\n min_label_icon=\"None\",\n max_label_icon=\"None\",\n # slider_input=True\n ),\n BoolInput(\n name=\"prevent_outside\",\n display_name=\"Prevent Outside\",\n info=(\n \"If enabled, only crawls URLs within the same domain as the root URL. \"\n \"This helps prevent the crawler from going to external websites.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"use_async\",\n display_name=\"Use Async\",\n info=(\n \"If enabled, uses asynchronous loading which can be significantly faster \"\n \"but might use more system resources.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n DropdownInput(\n name=\"format\",\n display_name=\"Output Format\",\n info=\"Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.\",\n options=[\"Text\", \"HTML\"],\n value=DEFAULT_FORMAT,\n advanced=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n info=\"Timeout for the request in seconds.\",\n value=DEFAULT_TIMEOUT,\n required=False,\n advanced=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": USER_AGENT}],\n advanced=True,\n input_types=[\"DataFrame\"],\n ),\n BoolInput(\n name=\"filter_text_html\",\n display_name=\"Filter Text/HTML\",\n info=\"If enabled, filters out text/css content type from the results.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"continue_on_failure\",\n display_name=\"Continue on Failure\",\n info=\"If enabled, continues crawling even if some requests fail.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"check_response_status\",\n display_name=\"Check Response Status\",\n info=\"If enabled, checks the response status of the request.\",\n value=False,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"autoset_encoding\",\n display_name=\"Autoset Encoding\",\n info=\"If enabled, automatically sets the encoding of the request.\",\n value=True,\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Extracted Pages\", name=\"page_results\", method=\"fetch_content\"),\n Output(display_name=\"Raw Content\", name=\"raw_results\", method=\"fetch_content_as_message\", tool_mode=False),\n ]\n\n @staticmethod\n def validate_url(url: str) -> bool:\n \"\"\"Validates if the given string matches URL pattern.\n\n Args:\n url: The URL string to validate\n\n Returns:\n bool: True if the URL is valid, False otherwise\n \"\"\"\n return bool(URL_REGEX.match(url))\n\n def ensure_url(self, url: str) -> str:\n \"\"\"Ensures the given string is a valid URL.\n\n Args:\n url: The URL string to validate and normalize\n\n Returns:\n str: The normalized URL\n\n Raises:\n ValueError: If the URL is invalid\n \"\"\"\n url = url.strip()\n if not url.startswith((\"http://\", \"https://\")):\n url = \"https://\" + url\n\n if not self.validate_url(url):\n msg = f\"Invalid URL: {url}\"\n raise ValueError(msg)\n\n return url\n\n def _create_loader(self, url: str) -> RecursiveUrlLoader:\n \"\"\"Creates a RecursiveUrlLoader instance with the configured settings.\n\n Args:\n url: The URL to load\n\n Returns:\n RecursiveUrlLoader: Configured loader instance\n \"\"\"\n headers_dict = {header[\"key\"]: header[\"value\"] for header in self.headers if header[\"value\"] is not None}\n extractor = (lambda x: x) if self.format == \"HTML\" else (lambda x: BeautifulSoup(x, \"lxml\").get_text())\n\n return RecursiveUrlLoader(\n url=url,\n max_depth=self.max_depth,\n prevent_outside=self.prevent_outside,\n use_async=self.use_async,\n extractor=extractor,\n timeout=self.timeout,\n headers=headers_dict,\n check_response_status=self.check_response_status,\n continue_on_failure=self.continue_on_failure,\n base_url=url, # Add base_url to ensure consistent domain crawling\n autoset_encoding=self.autoset_encoding, # Enable automatic encoding detection\n exclude_dirs=[], # Allow customization of excluded directories\n link_regex=None, # Allow customization of link filtering\n )\n\n def fetch_url_contents(self) -> list[dict]:\n \"\"\"Load documents from the configured URLs.\n\n Returns:\n List[Data]: List of Data objects containing the fetched content\n\n Raises:\n ValueError: If no valid URLs are provided or if there's an error loading documents\n \"\"\"\n try:\n urls = list({self.ensure_url(url) for url in self.urls if url.strip()})\n logger.debug(f\"URLs: {urls}\")\n if not urls:\n msg = \"No valid URLs provided.\"\n raise ValueError(msg)\n\n all_docs = []\n for url in urls:\n logger.debug(f\"Loading documents from {url}\")\n\n try:\n loader = self._create_loader(url)\n docs = loader.load()\n\n if not docs:\n logger.warning(f\"No documents found for {url}\")\n continue\n\n logger.debug(f\"Found {len(docs)} documents from {url}\")\n all_docs.extend(docs)\n\n except requests.exceptions.RequestException as e:\n logger.exception(f\"Error loading documents from {url}: {e}\")\n continue\n\n if not all_docs:\n msg = \"No documents were successfully loaded from any URL\"\n raise ValueError(msg)\n\n # data = [Data(text=doc.page_content, **doc.metadata) for doc in all_docs]\n data = [\n {\n \"text\": safe_convert(doc.page_content, clean_data=True),\n \"url\": doc.metadata.get(\"source\", \"\"),\n \"title\": doc.metadata.get(\"title\", \"\"),\n \"description\": doc.metadata.get(\"description\", \"\"),\n \"content_type\": doc.metadata.get(\"content_type\", \"\"),\n \"language\": doc.metadata.get(\"language\", \"\"),\n }\n for doc in all_docs\n ]\n except Exception as e:\n error_msg = e.message if hasattr(e, \"message\") else e\n msg = f\"Error loading documents: {error_msg!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n return data\n\n def fetch_content(self) -> DataFrame:\n \"\"\"Convert the documents to a DataFrame.\"\"\"\n return DataFrame(data=self.fetch_url_contents())\n\n def fetch_content_as_message(self) -> Message:\n \"\"\"Convert the documents to a Message.\"\"\"\n url_contents = self.fetch_url_contents()\n return Message(text=\"\\n\\n\".join([x[\"text\"] for x in url_contents]), data={\"data\": url_contents})\n" }, "continue_on_failure": { "_input_type": "BoolInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json index 61e033900cb2..59dddbb02a79 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json @@ -237,7 +237,7 @@ "legacy": false, "lf_version": "1.6.0", "metadata": { - "code_hash": "227e053b4704", + "code_hash": "efd064ef48ff", "dependencies": { "dependencies": [ { @@ -299,7 +299,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any, cast\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import data_to_text\nfrom lfx.inputs.inputs import DropdownInput, HandleInput, IntInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.memory import aget_messages, astore_message\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.dotdict import dotdict\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\nfrom lfx.utils.component_utils import set_current_fields, set_field_display\nfrom lfx.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass MemoryComponent(Component):\n display_name = \"Message History\"\n description = \"Stores or retrieves stored chat messages from Langflow tables or an external memory.\"\n documentation: str = \"https://docs.langflow.org/components-helpers#message-history\"\n icon = \"message-square-more\"\n name = \"Memory\"\n default_keys = [\"mode\", \"memory\", \"session_id\", \"context_id\"]\n mode_config = {\n \"Store\": [\"message\", \"memory\", \"sender\", \"sender_name\", \"session_id\", \"context_id\"],\n \"Retrieve\": [\"n_messages\", \"order\", \"template\", \"memory\", \"session_id\", \"context_id\"],\n }\n\n inputs = [\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Retrieve\", \"Store\"],\n value=\"Retrieve\",\n info=\"Operation mode: Store messages or Retrieve messages.\",\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"message\",\n display_name=\"Message\",\n info=\"The chat message to be stored.\",\n tool_mode=True,\n dynamic=True,\n show=False,\n ),\n HandleInput(\n name=\"memory\",\n display_name=\"External Memory\",\n input_types=[\"Memory\"],\n info=\"Retrieve messages from an external memory. If empty, it will use the Langflow tables.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"sender_type\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER, \"Machine and User\"],\n value=\"Machine and User\",\n info=\"Filter by sender type.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender\",\n display_name=\"Sender\",\n info=\"The sender of the message. Might be Machine or User. \"\n \"If empty, the current sender parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Filter by sender name.\",\n advanced=True,\n show=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Messages\",\n value=100,\n info=\"Number of messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n DropdownInput(\n name=\"order\",\n display_name=\"Order\",\n options=[\"Ascending\", \"Descending\"],\n value=\"Ascending\",\n info=\"Order of the messages.\",\n advanced=True,\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {sender} or any other key in the message data.\",\n value=\"{sender_name}: {text}\",\n advanced=True,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Message\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True),\n Output(display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True),\n ]\n\n def update_outputs(self, frontend_node: dict, field_name: str, field_value: Any) -> dict:\n \"\"\"Dynamically show only the relevant output based on the selected output type.\"\"\"\n if field_name == \"mode\":\n # Start with empty outputs\n frontend_node[\"outputs\"] = []\n if field_value == \"Store\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Stored Messages\",\n name=\"stored_messages\",\n method=\"store_message\",\n hidden=True,\n dynamic=True,\n )\n ]\n if field_value == \"Retrieve\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Messages\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True\n ),\n Output(\n display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True\n ),\n ]\n return frontend_node\n\n async def store_message(self) -> Message:\n message = Message(text=self.message) if isinstance(self.message, str) else self.message\n\n message.context_id = self.context_id or message.context_id\n message.session_id = self.session_id or message.session_id\n message.sender = self.sender or message.sender or MESSAGE_SENDER_AI\n message.sender_name = self.sender_name or message.sender_name or MESSAGE_SENDER_NAME_AI\n\n stored_messages: list[Message] = []\n\n if self.memory:\n self.memory.context_id = message.context_id\n self.memory.session_id = message.session_id\n lc_message = message.to_lc_message()\n await self.memory.aadd_messages([lc_message])\n\n stored_messages = await self.memory.aget_messages() or []\n\n stored_messages = [Message.from_lc_message(m) for m in stored_messages] if stored_messages else []\n\n if message.sender:\n stored_messages = [m for m in stored_messages if m.sender == message.sender]\n else:\n await astore_message(message, flow_id=self.graph.flow_id)\n stored_messages = (\n await aget_messages(\n session_id=message.session_id,\n context_id=message.context_id,\n sender_name=message.sender_name,\n sender=message.sender,\n )\n or []\n )\n\n if not stored_messages:\n msg = \"No messages were stored. Please ensure that the session ID and sender are properly set.\"\n raise ValueError(msg)\n\n stored_message = stored_messages[0]\n self.status = stored_message\n return stored_message\n\n async def retrieve_messages(self) -> Data:\n sender_type = self.sender_type\n sender_name = self.sender_name\n session_id = self.session_id\n context_id = self.context_id\n n_messages = self.n_messages\n order = \"DESC\" if self.order == \"Descending\" else \"ASC\"\n\n if sender_type == \"Machine and User\":\n sender_type = None\n\n if self.memory and not hasattr(self.memory, \"aget_messages\"):\n memory_name = type(self.memory).__name__\n err_msg = f\"External Memory object ({memory_name}) must have 'aget_messages' method.\"\n raise AttributeError(err_msg)\n # Check if n_messages is None or 0\n if n_messages == 0:\n stored = []\n elif self.memory:\n # override session_id\n self.memory.session_id = session_id\n self.memory.context_id = context_id\n\n stored = await self.memory.aget_messages()\n # langchain memories are supposed to return messages in ascending order\n\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages first\n\n if order == \"DESC\":\n stored = stored[::-1] # Then reverse if needed\n\n stored = [Message.from_lc_message(m) for m in stored]\n if sender_type:\n expected_type = MESSAGE_SENDER_AI if sender_type == MESSAGE_SENDER_AI else MESSAGE_SENDER_USER\n stored = [m for m in stored if m.type == expected_type]\n else:\n # For internal memory, we always fetch the last N messages by ordering by DESC\n stored = await aget_messages(\n sender=sender_type,\n sender_name=sender_name,\n session_id=session_id,\n context_id=context_id,\n limit=10000,\n order=order,\n )\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages\n\n # self.status = stored\n return cast(\"Data\", stored)\n\n async def retrieve_messages_as_text(self) -> Message:\n stored_text = data_to_text(self.template, await self.retrieve_messages())\n # self.status = stored_text\n return Message(text=stored_text)\n\n async def retrieve_messages_dataframe(self) -> DataFrame:\n \"\"\"Convert the retrieved messages into a DataFrame.\n\n Returns:\n DataFrame: A DataFrame containing the message data.\n \"\"\"\n messages = await self.retrieve_messages()\n return DataFrame(messages)\n\n def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any, # noqa: ARG002\n field_name: str | None = None, # noqa: ARG002\n ) -> dotdict:\n return set_current_fields(\n build_config=build_config,\n action_fields=self.mode_config,\n selected_action=build_config[\"mode\"][\"value\"],\n default_fields=self.default_keys,\n func=set_field_display,\n )\n" + "value": "from typing import Any, cast\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import data_to_text\nfrom lfx.inputs.inputs import DropdownInput, HandleInput, IntInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.memory import aget_messages, astore_message\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.dotdict import dotdict\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\nfrom lfx.utils.component_utils import set_current_fields, set_field_display\nfrom lfx.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass MemoryComponent(Component):\n display_name = \"Message History\"\n description = \"Stores or retrieves stored chat messages from Langflow tables or an external memory.\"\n documentation: str = \"https://docs.langflow.org/message-history\"\n icon = \"message-square-more\"\n name = \"Memory\"\n default_keys = [\"mode\", \"memory\", \"session_id\", \"context_id\"]\n mode_config = {\n \"Store\": [\"message\", \"memory\", \"sender\", \"sender_name\", \"session_id\", \"context_id\"],\n \"Retrieve\": [\"n_messages\", \"order\", \"template\", \"memory\", \"session_id\", \"context_id\"],\n }\n\n inputs = [\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Retrieve\", \"Store\"],\n value=\"Retrieve\",\n info=\"Operation mode: Store messages or Retrieve messages.\",\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"message\",\n display_name=\"Message\",\n info=\"The chat message to be stored.\",\n tool_mode=True,\n dynamic=True,\n show=False,\n ),\n HandleInput(\n name=\"memory\",\n display_name=\"External Memory\",\n input_types=[\"Memory\"],\n info=\"Retrieve messages from an external memory. If empty, it will use the Langflow tables.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"sender_type\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER, \"Machine and User\"],\n value=\"Machine and User\",\n info=\"Filter by sender type.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender\",\n display_name=\"Sender\",\n info=\"The sender of the message. Might be Machine or User. \"\n \"If empty, the current sender parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Filter by sender name.\",\n advanced=True,\n show=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Messages\",\n value=100,\n info=\"Number of messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n DropdownInput(\n name=\"order\",\n display_name=\"Order\",\n options=[\"Ascending\", \"Descending\"],\n value=\"Ascending\",\n info=\"Order of the messages.\",\n advanced=True,\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {sender} or any other key in the message data.\",\n value=\"{sender_name}: {text}\",\n advanced=True,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Message\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True),\n Output(display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True),\n ]\n\n def update_outputs(self, frontend_node: dict, field_name: str, field_value: Any) -> dict:\n \"\"\"Dynamically show only the relevant output based on the selected output type.\"\"\"\n if field_name == \"mode\":\n # Start with empty outputs\n frontend_node[\"outputs\"] = []\n if field_value == \"Store\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Stored Messages\",\n name=\"stored_messages\",\n method=\"store_message\",\n hidden=True,\n dynamic=True,\n )\n ]\n if field_value == \"Retrieve\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Messages\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True\n ),\n Output(\n display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True\n ),\n ]\n return frontend_node\n\n async def store_message(self) -> Message:\n message = Message(text=self.message) if isinstance(self.message, str) else self.message\n\n message.context_id = self.context_id or message.context_id\n message.session_id = self.session_id or message.session_id\n message.sender = self.sender or message.sender or MESSAGE_SENDER_AI\n message.sender_name = self.sender_name or message.sender_name or MESSAGE_SENDER_NAME_AI\n\n stored_messages: list[Message] = []\n\n if self.memory:\n self.memory.context_id = message.context_id\n self.memory.session_id = message.session_id\n lc_message = message.to_lc_message()\n await self.memory.aadd_messages([lc_message])\n\n stored_messages = await self.memory.aget_messages() or []\n\n stored_messages = [Message.from_lc_message(m) for m in stored_messages] if stored_messages else []\n\n if message.sender:\n stored_messages = [m for m in stored_messages if m.sender == message.sender]\n else:\n await astore_message(message, flow_id=self.graph.flow_id)\n stored_messages = (\n await aget_messages(\n session_id=message.session_id,\n context_id=message.context_id,\n sender_name=message.sender_name,\n sender=message.sender,\n )\n or []\n )\n\n if not stored_messages:\n msg = \"No messages were stored. Please ensure that the session ID and sender are properly set.\"\n raise ValueError(msg)\n\n stored_message = stored_messages[0]\n self.status = stored_message\n return stored_message\n\n async def retrieve_messages(self) -> Data:\n sender_type = self.sender_type\n sender_name = self.sender_name\n session_id = self.session_id\n context_id = self.context_id\n n_messages = self.n_messages\n order = \"DESC\" if self.order == \"Descending\" else \"ASC\"\n\n if sender_type == \"Machine and User\":\n sender_type = None\n\n if self.memory and not hasattr(self.memory, \"aget_messages\"):\n memory_name = type(self.memory).__name__\n err_msg = f\"External Memory object ({memory_name}) must have 'aget_messages' method.\"\n raise AttributeError(err_msg)\n # Check if n_messages is None or 0\n if n_messages == 0:\n stored = []\n elif self.memory:\n # override session_id\n self.memory.session_id = session_id\n self.memory.context_id = context_id\n\n stored = await self.memory.aget_messages()\n # langchain memories are supposed to return messages in ascending order\n\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages first\n\n if order == \"DESC\":\n stored = stored[::-1] # Then reverse if needed\n\n stored = [Message.from_lc_message(m) for m in stored]\n if sender_type:\n expected_type = MESSAGE_SENDER_AI if sender_type == MESSAGE_SENDER_AI else MESSAGE_SENDER_USER\n stored = [m for m in stored if m.type == expected_type]\n else:\n # For internal memory, we always fetch the last N messages by ordering by DESC\n stored = await aget_messages(\n sender=sender_type,\n sender_name=sender_name,\n session_id=session_id,\n context_id=context_id,\n limit=10000,\n order=order,\n )\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages\n\n # self.status = stored\n return cast(\"Data\", stored)\n\n async def retrieve_messages_as_text(self) -> Message:\n stored_text = data_to_text(self.template, await self.retrieve_messages())\n # self.status = stored_text\n return Message(text=stored_text)\n\n async def retrieve_messages_dataframe(self) -> DataFrame:\n \"\"\"Convert the retrieved messages into a DataFrame.\n\n Returns:\n DataFrame: A DataFrame containing the message data.\n \"\"\"\n messages = await self.retrieve_messages()\n return DataFrame(messages)\n\n def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any, # noqa: ARG002\n field_name: str | None = None, # noqa: ARG002\n ) -> dotdict:\n return set_current_fields(\n build_config=build_config,\n action_fields=self.mode_config,\n selected_action=build_config[\"mode\"][\"value\"],\n default_fields=self.default_keys,\n func=set_field_display,\n )\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -1957,7 +1957,7 @@ "legacy": false, "lf_version": "1.6.0", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -2007,7 +2007,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -2237,7 +2237,7 @@ "key": "ChatOutput", "legacy": false, "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "8c87e536cca4", "dependencies": { "dependencies": [ { @@ -2246,7 +2246,7 @@ }, { "name": "fastapi", - "version": "0.120.0" + "version": "0.123.0" }, { "name": "lfx", @@ -2313,7 +2313,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n # Preserve existing session_id from the incoming message if it exists\n existing_session_id = message.session_id\n else:\n message = Message(text=text)\n existing_session_id = None\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n # Preserve session_id from incoming message, or use component/graph session_id\n message.session_id = (\n self.session_id or existing_session_id or (self.graph.session_id if hasattr(self, \"graph\") else None) or \"\"\n )\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json index b8591640aa94..62f98ce2fa8e 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json @@ -147,7 +147,7 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -195,7 +195,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -411,7 +411,7 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "8c87e536cca4", "dependencies": { "dependencies": [ { @@ -420,7 +420,7 @@ }, { "name": "fastapi", - "version": "0.120.0" + "version": "0.123.0" }, { "name": "lfx", @@ -485,7 +485,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n # Preserve existing session_id from the incoming message if it exists\n existing_session_id = message.session_id\n else:\n message = Message(text=text)\n existing_session_id = None\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n # Preserve session_id from incoming message, or use component/graph session_id\n message.session_id = (\n self.session_id or existing_session_id or (self.graph.session_id if hasattr(self, \"graph\") else None) or \"\"\n )\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -1187,7 +1187,7 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Loads content from one or more files.", + "description": "Loads and returns the content from uploaded files.", "display_name": "File", "documentation": "", "edited": false, @@ -1207,15 +1207,23 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "844fa7ef9064", + "code_hash": "1d81b3a4d764", "dependencies": { "dependencies": [ { "name": "lfx", "version": null + }, + { + "name": "langchain_core", + "version": "0.3.80" + }, + { + "name": "pydantic", + "version": "2.11.10" } ], - "total_dependencies": 1 + "total_dependencies": 3 }, "module": "lfx.components.files_and_knowledge.file.FileComponent" }, @@ -1275,7 +1283,7 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n display_name = \"Read File\"\n description = \"Loads content from one or more files.\"\n documentation: str = \"https://docs.langflow.org/components-data#file\"\n icon = \"file-text\"\n name = \"File\"\n\n # Extensions that can be processed without Docling (using standard text parsing)\n TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n # Extensions that require Docling for processing (images, advanced office formats, etc.)\n DOCLING_ONLY_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"jpg\",\n \"jpeg\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"webp\",\n ]\n\n # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n VALID_EXTENSIONS = [\n *TEXT_EXTENSIONS,\n *DOCLING_ONLY_EXTENSIONS,\n ]\n\n # Fixed export settings used when markdown export is requested.\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n input_item.tool_mode = False # Disable tool mode for file upload input\n input_item.required = False # Make it optional so it doesn't error in tool mode\n break\n\n inputs = [\n *_base_inputs,\n StrInput(\n name=\"file_path_str\",\n display_name=\"File Path\",\n info=(\n \"Path to the file to read. Used when component is called as a tool. \"\n \"If not provided, will use the uploaded file from 'path' input.\"\n ),\n show=False,\n advanced=True,\n tool_mode=True,\n required=False,\n ),\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Note that advanced document processing can consume significant resources.\"\n ),\n show=True,\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"None\", \"easyocr\"],\n value=\"easyocr\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n # Deprecated input retained for backward-compatibility.\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n ]\n\n # ------------------------------ UI helpers --------------------------------------\n\n def _path_value(self, template: dict) -> list[str]:\n \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n if field_name == \"path\":\n paths = self._path_value(build_config)\n\n # If all files can be processed by docling, do so\n allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n if not allow_advanced:\n build_config[\"advanced_mode\"][\"value\"] = False\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = False\n\n # Docling Processing\n elif field_name == \"advanced_mode\":\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = bool(field_value)\n if f == \"pipeline\":\n build_config[f][\"advanced\"] = not bool(field_value)\n\n elif field_name == \"pipeline\":\n if field_value == \"standard\":\n build_config[\"ocr_engine\"][\"show\"] = True\n build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n else:\n build_config[\"ocr_engine\"][\"show\"] = False\n build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n return frontend_node\n\n template = frontend_node.get(\"template\", {})\n paths = self._path_value(template)\n if not paths:\n return frontend_node\n\n frontend_node[\"outputs\"] = []\n if len(paths) == 1:\n file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Structured Content\",\n name=\"dataframe\",\n method=\"load_files_structured\",\n tool_mode=True,\n ),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n )\n\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n if advanced_mode:\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Structured Output\",\n name=\"advanced_dataframe\",\n method=\"load_files_dataframe\",\n tool_mode=True,\n ),\n )\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n ),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n )\n else:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n )\n else:\n # Multiple files => DataFrame output; advanced parser disabled\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n )\n\n return frontend_node\n\n # ------------------------------ Core processing ----------------------------------\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Override to handle file_path_str input from tool mode.\n\n When called as a tool, the file_path_str parameter can be set.\n If not provided, it will fall back to using the path FileInput (uploaded file).\n Priority:\n 1. file_path_str (if provided by the tool call)\n 2. path (uploaded file from UI)\n \"\"\"\n # Check if file_path_str is provided (from tool mode)\n file_path_str = getattr(self, \"file_path_str\", None)\n if file_path_str:\n # Use the string path from tool mode\n from pathlib import Path\n\n from lfx.schema.data import Data\n\n resolved_path = Path(self.resolve_path(file_path_str))\n if not resolved_path.exists():\n msg = f\"File or directory not found: {file_path_str}\"\n self.log(msg)\n if not self.silent_errors:\n raise ValueError(msg)\n return []\n\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n # Otherwise use the default implementation (uses path FileInput)\n return super()._validate_and_resolve_paths()\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n docling_exts = (\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpg\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n )\n return file_path.lower().endswith(docling_exts)\n\n async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n Args:\n file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n Returns:\n tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n if this is a temporary file that should be cleaned up\n \"\"\"\n settings = get_settings_service().settings\n if settings.storage_type == \"local\":\n return file_path, False\n\n # S3 storage - download to temp file\n parsed = parse_storage_path(file_path)\n if not parsed:\n msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n raise ValueError(msg)\n\n storage_service = get_storage_service()\n flow_id, filename = parsed\n\n # Get file content from S3\n content = await storage_service.get_file(flow_id, filename)\n\n suffix = Path(filename).suffix\n with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n tmp_file.write(content)\n temp_path = tmp_file.name\n\n return temp_path, True\n\n def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n We avoid multiprocessing pickling by launching `python -c \"