diff --git a/.eslintrc.json b/.eslintrc.json index fc12c18c2c5b..9881e439ed91 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -2,7 +2,8 @@ "extends": [ "eslint:recommended", "plugin:react/recommended", - "plugin:prettier/recommended" + "plugin:prettier/recommended", + "plugin:storybook/recommended" ], "plugins": [ "react", @@ -87,4 +88,4 @@ "node/prefer-promises/dns": "error", "node/prefer-promises/fs": "error" } -} +} \ No newline at end of file diff --git a/.github/workflows/deploy-docs-draft.yml b/.github/workflows/deploy-docs-draft.yml index 6c77f966b174..e99ab3e5f9ee 100644 --- a/.github/workflows/deploy-docs-draft.yml +++ b/.github/workflows/deploy-docs-draft.yml @@ -21,7 +21,7 @@ jobs: uses: actions/checkout@v6 - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: 22 cache: yarn diff --git a/.github/workflows/deploy-storybook.yml b/.github/workflows/deploy-storybook.yml new file mode 100644 index 000000000000..86c6df293fc1 --- /dev/null +++ b/.github/workflows/deploy-storybook.yml @@ -0,0 +1,49 @@ +name: Deploy Storybook to GitHub Pages + +on: + push: + branches: + - main + paths: + - 'src/frontend/**/*.stories.*' + - 'src/frontend/.storybook/**' + - 'src/frontend/package.json' + - '.github/workflows/deploy-storybook.yml' + workflow_dispatch: # Allow manual trigger + +jobs: + deploy: + name: Deploy Storybook + runs-on: ubuntu-latest + permissions: + contents: read + pages: write + id-token: write + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-node@v6 + with: + node-version: 22 + cache: npm + cache-dependency-path: src/frontend/package-lock.json + + - name: Install dependencies + run: cd src/frontend && npm ci + + - name: Build Storybook + run: cd src/frontend && npm run build-storybook + + - name: Setup Pages + uses: actions/configure-pages@v4 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: src/frontend/storybook-static + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + diff --git a/.github/workflows/deploy_gh-pages.yml b/.github/workflows/deploy_gh-pages.yml index 9582bb31131d..b57c2d486ea8 100644 --- a/.github/workflows/deploy_gh-pages.yml +++ b/.github/workflows/deploy_gh-pages.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 with: node-version: 22 cache: yarn diff --git a/.github/workflows/docs_test.yml b/.github/workflows/docs_test.yml index 3749332ee32e..d2bbd81157da 100644 --- a/.github/workflows/docs_test.yml +++ b/.github/workflows/docs_test.yml @@ -24,7 +24,7 @@ jobs: ref: ${{ inputs.branch || github.ref }} - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/jest_test.yml b/.github/workflows/jest_test.yml index a7e9f14a6004..2d536dacebbb 100644 --- a/.github/workflows/jest_test.yml +++ b/.github/workflows/jest_test.yml @@ -35,7 +35,7 @@ jobs: ref: ${{ inputs.ref || github.ref }} - name: Setup Node.js Environment - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/js_autofix.yml b/.github/workflows/js_autofix.yml index 7b195de07e18..a2acab088eee 100644 --- a/.github/workflows/js_autofix.yml +++ b/.github/workflows/js_autofix.yml @@ -19,7 +19,7 @@ jobs: uses: actions/checkout@v6 - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/lint-js.yml b/.github/workflows/lint-js.yml index e1bd2c905609..c2c98a6a643a 100644 --- a/.github/workflows/lint-js.yml +++ b/.github/workflows/lint-js.yml @@ -27,7 +27,7 @@ jobs: ref: ${{ inputs.branch || github.ref }} - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/migration-validation.yml b/.github/workflows/migration-validation.yml new file mode 100644 index 000000000000..54eaf7333c2b --- /dev/null +++ b/.github/workflows/migration-validation.yml @@ -0,0 +1,159 @@ +name: Database Migration Validation + +on: + pull_request: + paths: + - 'src/backend/base/langflow/alembic/versions/*.py' + - 'alembic/versions/*.py' + +jobs: + validate-migration: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install sqlalchemy alembic + + - name: Get changed migration files + id: changed-files + run: | + # Get all changed Python files in alembic/versions directories + + # CHANGED_FILES=$(git diff --name-only origin/main...HEAD | grep -E '(alembic|migrations)/versions/.*\.py$' || echo "") + + # Exclude test migrations, as they are not part of the main codebase + CHANGED_FILES=$(git diff --name-only origin/main...HEAD | grep -E '(alembic|migrations)/versions/.*\.py$' | grep -v 'test_migrations/' || echo "") + + if [ -z "$CHANGED_FILES" ]; then + echo "No migration files changed" + echo "files=" >> $GITHUB_OUTPUT + else + echo "Changed migration files:" + echo "$CHANGED_FILES" + # Convert newlines to spaces for passing as arguments + echo "files=$(echo $CHANGED_FILES | tr '\n' ' ')" >> $GITHUB_OUTPUT + fi + + - name: Validate migrations + if: steps.changed-files.outputs.files != '' + run: | + python src/backend/base/langflow/alembic/migration_validator.py ${{ steps.changed-files.outputs.files }} + +# - name: Check migration phase sequence +# if: steps.changed-files.outputs.files != '' +# run: | +# python scripts/check_phase_sequence.py ${{ steps.changed-files.outputs.files }} + + - name: Generate validation report + if: always() && steps.changed-files.outputs.files != '' + run: | + python src/backend/base/langflow/alembic/migration_validator.py \ + --json ${{ steps.changed-files.outputs.files }} > validation-report.json || true + + - name: Post PR comment with results + if: always() && steps.changed-files.outputs.files != '' + uses: actions/github-script@v6 + with: + script: | + const fs = require('fs'); + + let message = ''; + let validationPassed = true; + + try { + const report = JSON.parse(fs.readFileSync('validation-report.json', 'utf8')); + + for (const result of report) { + if (!result.valid) { + validationPassed = false; + } + } + + if (validationPassed) { + message = `โœ… **Migration Validation Passed**\n\n`; + message += `All migrations follow the Expand-Contract pattern correctly.\n\n`; + } else { + message = `โŒ **Migration Validation Failed**\n\n`; + message += `Your migrations don't follow the Expand-Contract pattern.\n\n`; + + for (const result of report) { + if (!result.valid || result.warnings.length > 0) { + message += `### File: \`${result.file.split('/').pop()}\`\n`; + message += `**Phase:** ${result.phase}\n\n`; + + if (result.violations && result.violations.length > 0) { + message += `**Violations:**\n`; + for (const v of result.violations) { + message += `- Line ${v.line}: ${v.message}\n`; + } + message += `\n`; + } + + if (result.warnings && result.warnings.length > 0) { + message += `**Warnings:**\n`; + for (const w of result.warnings) { + message += `- Line ${w.line}: ${w.message}\n`; + } + message += `\n`; + } + } + } + + message += `### ๐Ÿ“š Resources\n`; + message += `- Review the [DB Migration Guide](./src/backend/base/langflow/alembic/DB-MIGRATION-GUIDE.MD)\n`; + message += `- Use \`python scripts/generate_migration.py --help\` to generate compliant migrations\n\n`; + + message += `### Common Issues & Solutions\n`; + message += `- **New columns must be nullable:** Add \`nullable=True\` or \`server_default\`\n`; + message += `- **Missing phase marker:** Add \`Phase: EXPAND/MIGRATE/CONTRACT\` to docstring\n`; + message += `- **Column drops:** Only allowed in CONTRACT phase\n`; + message += `- **Direct renames:** Use expand-contract pattern instead\n`; + } + } catch (error) { + message = `โš ๏ธ **Migration validation check failed to run properly**\n`; + message += `Error: ${error.message}\n`; + } + + // Post or update comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('Migration Validation') + ); + + if (botComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: message + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: message + }); + } + + // Fail the workflow if validation didn't pass + if (!validationPassed) { + core.setFailed('Migration validation failed'); + } \ No newline at end of file diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index 33c5b5182b92..1df949012a9c 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -68,7 +68,7 @@ jobs: ref: ${{ inputs.ref || github.ref }} - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml index 1e57992fb23c..e50cebc715b4 100644 --- a/.github/workflows/smoke-tests.yml +++ b/.github/workflows/smoke-tests.yml @@ -120,7 +120,7 @@ jobs: ref: ${{ github.event.inputs.ref || github.ref }} - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: "22" cache: "npm" diff --git a/.github/workflows/typescript_test.yml b/.github/workflows/typescript_test.yml index 388e0714f8c9..d1bbf33e9c7c 100644 --- a/.github/workflows/typescript_test.yml +++ b/.github/workflows/typescript_test.yml @@ -214,7 +214,7 @@ jobs: echo "suites=$SUITES" >> $GITHUB_OUTPUT - name: Setup Node ${{ env.NODE_VERSION }} - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} @@ -239,12 +239,12 @@ jobs: echo "Total tests to run: $TEST_COUNT" - # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 40 + # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 50 SHARD_COUNT=$(( (TEST_COUNT + 4) / 5 )) if [ $SHARD_COUNT -lt 1 ]; then SHARD_COUNT=1 - elif [ $SHARD_COUNT -gt 40 ]; then - SHARD_COUNT=40 + elif [ $SHARD_COUNT -gt 50 ]; then + SHARD_COUNT=50 fi # Create the matrix combinations string @@ -284,7 +284,7 @@ jobs: ref: ${{ inputs.ref || github.ref }} - name: Setup Node.js Environment - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 id: setup-node with: node-version: ${{ env.NODE_VERSION }} @@ -385,7 +385,7 @@ jobs: - name: Setup Node.js if: ${{ steps.should_merge_reports.outputs.should_merge_reports == 'true' }} - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: ${{ env.NODE_VERSION }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0eaf8f34afe..3acee92cda03 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,21 @@ repos: language: system types_or: [python, pyi] args: [--config, pyproject.toml] + - id: validate-migrations + name: Validate Alembic Migrations (Expand-Contract) + entry: python src/backend/base/langflow/alembic/migration_validator.py + language: python + files: (alembic|migrations)/versions/.*\.py$ + additional_dependencies: [sqlalchemy, alembic] + pass_filenames: true + always_run: false + verbose: true + - id: check-migration-phase + name: Check Migration Phase Documentation + entry: python -c "import sys, re; content = open(sys.argv[1]).read(); sys.exit(0 if re.search(r'Phase:\s*(EXPAND|MIGRATE|CONTRACT)', content) else 1)" + language: python + files: (alembic|migrations)/versions/.*\.py$ + pass_filenames: true - repo: https://github.com/Yelp/detect-secrets rev: v1.5.0 hooks: @@ -55,3 +70,9 @@ repos: files: ^src/backend/base/langflow/initial_setup/starter_projects/.*\.json$ pass_filenames: false args: [--security-check] + - id: check-deprecated-imports + name: Check for deprecated langchain imports + entry: uv run python scripts/check_deprecated_imports.py + language: system + files: ^src/lfx/src/lfx/components/.*\.py$ + pass_filenames: false diff --git a/.secrets.baseline b/.secrets.baseline index b1bfef6ca20e..fd61ea0a306e 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -765,6 +765,122 @@ "is_secret": false } ], + "src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json", + "hashed_secret": "377e839f86c1529c656c82599fb225b4a1261ed5", + "is_verified": false, + "line_number": 731, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 1210, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json", + "hashed_secret": "1be2449adf6092e0729be455a98c93034cc90bc8", + "is_verified": false, + "line_number": 209, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/News Aggregator.json", + "hashed_secret": "7881caec48fc330c8cde89fb096ae27690c8d8a9", + "is_verified": false, + "line_number": 883, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json", + "hashed_secret": "b223275895a74015ca0555983d6e9685efdb03fe", + "is_verified": false, + "line_number": 201, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Portfolio Website Code Generator.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 934, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Research Translation Loop.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Research Translation Loop.json", + "hashed_secret": "abb09440424b40c661e344d4a61e560975620221", + "is_verified": false, + "line_number": 987, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Research Translation Loop.json", + "hashed_secret": "e66321745fc15e1b80035de7c59f8c700d7e9976", + "is_verified": false, + "line_number": 1624, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Text Sentiment Analysis.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 2342, + "is_secret": false + } + ], + "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json": [ + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "377e839f86c1529c656c82599fb225b4a1261ed5", + "is_verified": false, + "line_number": 586, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "ab06ef2a8cc8a90a8526e3511be8f376c7cb0387", + "is_verified": false, + "line_number": 764, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "3de7722ca43ab9676c384eb479950083fb2385bb", + "is_verified": false, + "line_number": 1357, + "is_secret": false + }, + { + "type": "Hex High Entropy String", + "filename": "src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json", + "hashed_secret": "a99d6de03c251f8eb8922fab5a383523e4acbadd", + "is_verified": false, + "line_number": 2678, + "is_secret": false + } + ], "src/backend/base/langflow/schema/table.py": [ { "type": "Secret Keyword", @@ -883,7 +999,7 @@ "filename": "src/backend/tests/unit/api/v2/test_files.py", "hashed_secret": "61fbb5a12cd7b1f1fe1624120089efc0cd299e43", "is_verified": false, - "line_number": 29, + "line_number": 40, "is_secret": false } ], @@ -1412,5 +1528,5 @@ } ] }, - "generated_at": "2025-11-10T17:33:14Z" + "generated_at": "2025-11-19T18:36:04Z" } diff --git a/.vscode/launch.json b/.vscode/launch.json index 45349aaa864b..201ec9af311d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,6 +1,7 @@ { "version": "0.2.0", "configurations": [ + { "name": "Debug Backend", "type": "debugpy", @@ -79,6 +80,13 @@ "purpose": ["debug-test"], "console": "integratedTerminal", "justMyCode": false - } + }, + { + "name": "Python Debugger: Python File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" +} ] } diff --git a/Makefile.frontend b/Makefile.frontend index aa2b08b2812b..7f35cfc8f339 100644 --- a/Makefile.frontend +++ b/Makefile.frontend @@ -5,7 +5,7 @@ FRONTEND_DIR = src/frontend NPM = npm -.PHONY: install_frontend install_frontendci install_frontendc frontend_deps_check build_frontend run_frontend frontend frontendc format_frontend tests_frontend test_frontend test_frontend_watch test_frontend_coverage test_frontend_verbose test_frontend_ci test_frontend_clean test_frontend_file test_frontend_pattern test_frontend_snapshots test_frontend_config test_frontend_bail test_frontend_silent test_frontend_coverage_open help_frontend +.PHONY: install_frontend install_frontendci install_frontendc frontend_deps_check build_frontend run_frontend frontend frontendc format_frontend tests_frontend test_frontend test_frontend_watch test_frontend_coverage test_frontend_verbose test_frontend_ci test_frontend_clean test_frontend_file test_frontend_pattern test_frontend_snapshots test_frontend_config test_frontend_bail test_frontend_silent test_frontend_coverage_open help_frontend storybook storybook_build storybook_network ###################### # FRONTEND DEPENDENCIES @@ -166,6 +166,23 @@ test_frontend_coverage_open: test_frontend_coverage ## run tests with coverage a echo "Coverage report generated at: $(FRONTEND_DIR)/coverage/lcov-report/index.html"; \ fi +###################### +# STORYBOOK +###################### + +storybook: frontend_deps_check ## run Storybook development server and open in browser + @echo "Starting Storybook development server on http://localhost:6006..." + @cd $(FRONTEND_DIR) && $(NPM) run storybook + +storybook_build: frontend_deps_check ## build static Storybook + @echo "Building static Storybook..." + @cd $(FRONTEND_DIR) && $(NPM) run build-storybook + @echo "Storybook built to $(FRONTEND_DIR)/storybook-static" + +storybook_network: frontend_deps_check ## run Storybook accessible on network (0.0.0.0:6006) + @echo "Starting Storybook development server accessible on network..." + @cd $(FRONTEND_DIR) && $(NPM) run storybook:network + ###################### # FRONTEND HELP ###################### @@ -212,5 +229,10 @@ help_frontend: ## show frontend help @echo " $(GREEN)make test_frontend_snapshots$(NC) - Update Jest snapshots" @echo " $(GREEN)make test_frontend_config$(NC) - Show Jest configuration" @echo '' + @echo "$(GREEN)Storybook:$(NC)" + @echo " $(GREEN)make storybook$(NC) - Run Storybook dev server and open in browser" + @echo " $(GREEN)make storybook_build$(NC) - Build static Storybook" + @echo " $(GREEN)make storybook_network$(NC) - Run Storybook accessible on network" + @echo '' @echo "$(GREEN)โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•$(NC)" @echo '' \ No newline at end of file diff --git a/docs/docs/Deployment/security.mdx b/docs/docs/Deployment/security.mdx index e644a228982b..75121c0c247f 100644 --- a/docs/docs/Deployment/security.mdx +++ b/docs/docs/Deployment/security.mdx @@ -35,7 +35,7 @@ Follow industry best practices for APIs: * Use a secure API gateway to provide authentication and authorization * Ensure user data is appropriately isolated -* Sanitize inputs and outputs against XSS and injection attacks +* Sanitize inputs and outputs against XSS and injection attacks, including regex patterns to prevent ReDoS vulnerabilities For more information on setting up a reverse proxy, see [Deploy Langflow with Nginx and SSL](/deployment-nginx-ssl). For more information on authentication configuration, see [API keys and authentication](/api-keys-and-authentication). diff --git a/docs/docs/Develop/memory.mdx b/docs/docs/Develop/memory.mdx index 774b1501c6cb..57506253b1c1 100644 --- a/docs/docs/Develop/memory.mdx +++ b/docs/docs/Develop/memory.mdx @@ -80,6 +80,8 @@ To fine-tune your database connection pool and timeout settings, you can set the * `LANGFLOW_DB_CONNECT_TIMEOUT`: The number of seconds to wait before giving up on a lock to be released or establishing a connection to the database. This may be separate from the `pool_timeout` in `LANGFLOW_DB_CONNECTION_SETTINGS`. Default: 30. +* `LANGFLOW_MIGRATION_LOCK_NAMESPACE`: Optional namespace identifier for PostgreSQL advisory lock during migrations. If not provided, a hash of the database URL will be used. Useful when multiple Langflow instances share the same database and need coordinated migration locking + * `LANGFLOW_DB_CONNECTION_SETTINGS`: A JSON dictionary containing the following database connection pool settings: - `pool_size`: The base number of connections to keep open in the connection pool. Default: 20. diff --git a/pyproject.toml b/pyproject.toml index 27b8de247e2e..c9c21f5e3dfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ dependencies = [ "duckduckgo_search==7.2.1", "opensearch-py==2.8.0", "langchain-google-genai==2.0.6", - "langchain-cohere==0.3.3", + "langchain-cohere>=0.3.3,<1.0.0", "langchain-huggingface==0.3.1", "langchain-anthropic==0.3.14", "langchain-astradb>=0.6.1,<1.0.0", @@ -85,14 +85,14 @@ dependencies = [ "langchain-pinecone>=0.2.8,<1.0.0", "langchain-mistralai==0.2.3", "langchain-chroma>=0.2.6,<1.0.0", - "langchain-aws==0.2.33", + "langchain-aws>=0.2.33,<1.0.0", "langchain-unstructured==0.1.5", "langchain-milvus==0.1.7", "langchain-mongodb==0.7.0", "langchain-nvidia-ai-endpoints==0.3.8", "langchain-google-calendar-tools==0.0.1", - "langchain-google-community==2.0.3", - "langchain-elasticsearch==0.3.0", + "langchain-google-community>=2.0.3,<3.0.0", + "langchain-elasticsearch>=0.3.0,<1.0.0", "langchain-ollama==0.3.10", "langchain-sambanova==0.1.0", "langchain-community>=0.3.21,<1.0.0", @@ -138,6 +138,7 @@ dependencies = [ "cuga==0.1.10", "agent-lifecycle-toolkit~=0.4.1", "astrapy>=2.1.0,<3.0.0", + "aioboto3>=15.2.0,<16.0.0" ] diff --git a/scripts/check_deprecated_imports.py b/scripts/check_deprecated_imports.py new file mode 100755 index 000000000000..d2f9e49bcf63 --- /dev/null +++ b/scripts/check_deprecated_imports.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +"""Check for deprecated langchain import patterns in component files. + +This script scans all Python files in the lfx/components directory for +deprecated import patterns and reports them. It's designed to be used +as a pre-commit hook to catch import issues early. + +Exit codes: + 0: No deprecated imports found + 1: Deprecated imports found + 2: Error during execution +""" + +import ast +import sys +from pathlib import Path + + +def check_deprecated_imports(components_path: Path) -> list[str]: + """Check for deprecated import patterns in component files. + + Args: + components_path: Path to the components directory + + Returns: + List of error messages for deprecated imports found + """ + deprecated_imports = [] + + # Known deprecated import patterns + deprecated_patterns = [ + ("langchain.embeddings.base", "langchain_core.embeddings"), + ("langchain.llms.base", "langchain_core.language_models.llms"), + ("langchain.chat_models.base", "langchain_core.language_models.chat_models"), + ("langchain.schema", "langchain_core.messages"), + ("langchain.vectorstores", "langchain_community.vectorstores"), + ("langchain.document_loaders", "langchain_community.document_loaders"), + ("langchain.text_splitter", "langchain_text_splitters"), + ] + + # Walk through all Python files in components + for py_file in components_path.rglob("*.py"): + # Skip private modules + if py_file.name.startswith("_"): + continue + + try: + content = py_file.read_text(encoding="utf-8") + tree = ast.parse(content, filename=str(py_file)) + + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom): + module = node.module or "" + + # Check against deprecated patterns + for deprecated, replacement in deprecated_patterns: + if module.startswith(deprecated): + relative_path = py_file.relative_to(components_path.parent) + deprecated_imports.append( + f"{relative_path}:{node.lineno}: " + f"Uses deprecated '{deprecated}' - should use '{replacement}'" + ) + + except Exception as e: # noqa: BLE001 + # Report parsing errors but continue - we want to check all files + print(f"Warning: Could not parse {py_file}: {e}", file=sys.stderr) + continue + + return deprecated_imports + + +def main() -> int: + """Main entry point for the script. + + Returns: + Exit code (0 for success, 1 for deprecated imports found, 2 for error) + """ + try: + # Find the lfx components directory + script_dir = Path(__file__).parent + repo_root = script_dir.parent + lfx_components = repo_root / "src" / "lfx" / "src" / "lfx" / "components" + + if not lfx_components.exists(): + print(f"Error: Components directory not found at {lfx_components}", file=sys.stderr) + return 2 + + # Check for deprecated imports + deprecated_imports = check_deprecated_imports(lfx_components) + + if deprecated_imports: + print("โŒ Found deprecated langchain imports:", file=sys.stderr) + print(file=sys.stderr) + for imp in deprecated_imports: + print(f" โ€ข {imp}", file=sys.stderr) + print(file=sys.stderr) + print( + "Please update these imports to use the current langchain import paths.", + file=sys.stderr, + ) + print("See: https://python.langchain.com/docs/versions/migrating_chains/", file=sys.stderr) + return 1 + # No deprecated imports found + print("โœ… No deprecated imports found") + except Exception as e: # noqa: BLE001 + # Catch-all for unexpected errors during script execution + print(f"Error: {e}", file=sys.stderr) + return 2 + else: + # Success case - no exceptions and no deprecated imports + return 0 + + +if __name__ == "__main__": + sys.exit(main()) + +# Made with Bob diff --git a/scripts/generate_migration.py b/scripts/generate_migration.py new file mode 100644 index 000000000000..8d43676d3d4a --- /dev/null +++ b/scripts/generate_migration.py @@ -0,0 +1,267 @@ +"""Generate Expand-Contract pattern compliant Alembic migrations.""" + +import hashlib # noqa: F401 +import random # noqa: F401 +import re # noqa: F401 +import subprocess # noqa: F401 +from datetime import datetime # noqa: F401 +from pathlib import Path # noqa: F401 +from typing import Optional # noqa: F401 + +import click # noqa: F401 + +TEMPLATES = { + "expand": '''""" +{description} +Phase: EXPAND +Safe to rollback: YES +Services compatible: All versions +Next phase: MIGRATE after all services deployed + +Revision ID: {revision} +Revises: {down_revision} +Create Date: {create_date} +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text, inspect + +# revision identifiers, used by Alembic +revision = '{revision}' +down_revision = {down_revision} +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """ + EXPAND PHASE: Add new schema elements (backward compatible) + - All new columns must be nullable or have defaults + - No breaking changes to existing schema + - Services using old schema continue to work + """ + bind = op.get_bind() + inspector = inspect(bind) + + # Get existing columns for idempotency + columns = [col['name'] for col in inspector.get_columns('{table_name}')] + } + + # Add new nullable column (always check existence first) + if '{column_name}' not in columns: + op.add_column('{table_name}', + sa.Column('{column_name}', sa.{column_type}(), nullable=True{default_value}) + ) + + print(f"โœ… Added column '{column_name}' to table '{table_name}'") + + # Optional: Add index for performance + # op.create_index('ix_{table_name}_{column_name}', '{table_name}', ['{column_name}']) + else: + print(f"โญ๏ธ Column '{column_name}' already exists in table '{table_name}'") + + # Verify the change + result = bind.execute(text( + "SELECT COUNT(*) as cnt FROM {table_name}" + )).first() + print(f"๐Ÿ“Š EXPAND phase complete for {{result.cnt}} rows in {table_name}") + + +def downgrade() -> None: + """ + Rollback EXPAND phase + - Safe to rollback as it only removes additions + - Check for data loss before dropping + """ + bind = op.get_bind() + inspector = inspect(bind) + columns = [col['name'] for col in inspector.get_columns('{table_name}')] + + if '{column_name}' in columns: + # Check if column has data + result = bind.execute(text(""" + SELECT COUNT(*) as cnt FROM {table_name} + WHERE {column_name} IS NOT NULL + """)).first() + + if result and result.cnt > 0: + print(f"โš ๏ธ Warning: Dropping column '{column_name}' with {{result.cnt}} non-null values") + + # Optional: Create backup table + backup_table = '_{table_name}_{column_name}_backup_' + datetime.now().strftime('%Y%m%d_%H%M%S') + bind.execute(text(f""" + CREATE TABLE {{backup_table}} AS + SELECT id, {column_name}, NOW() as backed_up_at + FROM {table_name} + WHERE {column_name} IS NOT NULL + """)) + print(f"๐Ÿ’พ Created backup table: {{backup_table}}") + + op.drop_column('{table_name}', '{column_name}') + print(f"โœ… Dropped column '{column_name}' from table '{table_name}'") + else: + print(f"โญ๏ธ Column '{column_name}' doesn't exist in table '{table_name}'") +''', + "migrate": '''""" +{description} +Phase: MIGRATE +Safe to rollback: PARTIAL (data migration may be lost) +Services compatible: Both old and new versions +Next phase: CONTRACT after 30+ days and full adoption + +Revision ID: {revision} +Revises: {down_revision} +Create Date: {create_date} +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text +from datetime import datetime + +# revision identifiers, used by Alembic +revision = '{revision}' +down_revision = {down_revision} +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """ + MIGRATE PHASE: Transition data to new schema + - Backfill data from old columns to new + - Both old and new columns coexist + - Services can use either column + """ + bind = op.get_bind() + + print("๐Ÿ”„ Starting data migration...") + + # Backfill data from old column to new (if applicable) + {migration_logic} + + # Report migration progress + result = bind.execute(text(""" + SELECT + COUNT(*) FILTER (WHERE {new_column} IS NOT NULL) as migrated, + COUNT(*) FILTER (WHERE {new_column} IS NULL) as not_migrated, + COUNT(*) as total + FROM {table_name} + """)).first() + + print(f"๐Ÿ“Š Migration Statistics:") + print(f" - Total rows: {{result.total}}") + print(f" - Migrated: {{result.migrated}} ({{result.migrated * 100 / result.total if result.total > 0 else 0:.1f}}%)") + print(f" - Not migrated: {{result.not_migrated}}") + + if result.not_migrated > 0: + print(f"โš ๏ธ WARNING: {{result.not_migrated}} rows not yet migrated") + print(f" Consider running a background job to complete migration") + else: + print(f"โœ… All rows successfully migrated") + + # Log migration completion + bind.execute(text(""" + INSERT INTO alembic_version_history (version_num, phase, completed_at) + VALUES (:version, 'MIGRATE', :timestamp) + ON CONFLICT (version_num) DO UPDATE + SET phase = 'MIGRATE', completed_at = :timestamp + """), {{"version": revision, "timestamp": datetime.now()}}) + + +def downgrade() -> None: + """ + Rollback MIGRATE phase + - Usually no action needed + - Data remains in both old and new columns + """ + print("โš ๏ธ MIGRATE phase rollback - data remains in both columns") + print(" Services can continue using either old or new schema") + + # Optional: Log rollback + bind = op.get_bind() + bind.execute(text(""" + UPDATE alembic_version_history + SET phase = 'MIGRATE_ROLLED_BACK', completed_at = NOW() + WHERE version_num = :version + """), {{"version": revision}}) +''', # noqa: E501 + "contract": '''""" +{description} +Phase: CONTRACT +Safe to rollback: NO (old schema removed) +Services compatible: New versions only +Prerequisites: All services using new schema for 30+ days + +Revision ID: {revision} +Revises: {down_revision} +Create Date: {create_date} +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text, inspect +from datetime import datetime, timedelta + +# revision identifiers, used by Alembic +revision = '{revision}' +down_revision = {down_revision} +branch_labels = None +depends_on = None + +# Configuration +MIN_MIGRATION_DAYS = 30 # Minimum days before contracting + + +def upgrade() -> None: + """ + CONTRACT PHASE: Remove old schema elements + - Verify all services have migrated + - Ensure data migration is complete + - Remove deprecated columns/tables + - Make new columns non-nullable if needed + """ + bind = op.get_bind() + inspector = inspect(bind) + + print("๐Ÿ” Verifying migration readiness...") + + # Check 1: Verify migration completion + {verification_checks} + + # Check 2: Verify no recent usage of old column (if monitoring is set up) + try: + result = bind.execute(text(""" + SELECT MAX(last_accessed) as last_use + FROM column_usage_stats + WHERE table_name = '{table_name}' + AND column_name = '{old_column}' + """)).first() + + if result and result.last_use: + days_since_use = (datetime.now() - result.last_use).days + if days_since_use < MIN_MIGRATION_DAYS: + raise Exception( + f"โŒ Cannot contract: old column used {{days_since_use}} days ago " + f"(minimum: {{MIN_MIGRATION_DAYS}} days)" + ) + print(f"โœ… Old column last used {{days_since_use}} days ago") + except Exception as e: + if "column_usage_stats" not in str(e): + raise + print("โญ๏ธ No usage tracking table found, skipping usage check") + + # Check 3: Create final backup before removing + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + backup_table = 'backup_{table_name}_{old_column}_' + timestamp + + print(f"๐Ÿ’พ Creating final backup: {{backup_table}}") + bind.execute(text(f""" + CREATE TABLE {{backup_table}} AS + SELECT * FROM {table_name} + WHERE {old_column} IS NOT NULL + LIMIT 10000 -- Limit backup size + """)) + + # Remove old column + columns = [col['name'] for col in inspector.get_columns('{table_name}')] +''', +} diff --git a/scripts/test_validator.py b/scripts/test_validator.py new file mode 100644 index 000000000000..e0d348f0a54d --- /dev/null +++ b/scripts/test_validator.py @@ -0,0 +1,223 @@ +"""Test script for migration validator.""" + +import os +import sys +import tempfile +from pathlib import Path + +# Add parent directory to path +sys.path.append(str(Path(__file__).parent.parent)) + +from src.backend.base.langflow.alembic.migration_validator import MigrationValidator + + +def create_test_migration(content: str, filename: str) -> Path: + """Create a temporary migration file for testing.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=filename, delete=False) as f: + f.write(content) + return Path(f.name) + + +def test_expand_phase(): + """Test EXPAND phase validations.""" + print("\n๐Ÿงช Testing EXPAND Phase Validations...") + + # Test: Good EXPAND migration + good_expand = '''""" +Description: Add email_verified column +Phase: EXPAND +Safe to rollback: YES + +Revision ID: test_expand_good +""" +from alembic import op +import sqlalchemy as sa + +def upgrade(): + bind = op.get_bind() + inspector = sa.inspect(bind) + columns = [col['name'] for col in inspector.get_columns('users')] + + if 'email_verified' not in columns: + op.add_column('users', sa.Column('email_verified', sa.Boolean(), nullable=True)) + +def downgrade(): + op.drop_column('users', 'email_verified') +''' + + # Test: Bad EXPAND migration + bad_expand = '''""" +Description: Add required column +Phase: EXPAND + +Revision ID: test_expand_bad +""" +from alembic import op +import sqlalchemy as sa + +def upgrade(): + # Missing existence check and non-nullable + op.add_column('users', sa.Column('email_verified', sa.Boolean(), nullable=False)) + # Dropping column in EXPAND phase + op.drop_column('users', 'old_column') + +def downgrade(): + pass +''' + + validator = MigrationValidator() + + # Test good migration + good_file = create_test_migration(good_expand, "good_expand.py") + result = validator.validate_migration_file(good_file) + print(f" โœ… Good EXPAND: Valid={result['valid']} (expected: True)") + assert result["valid"], "Good EXPAND should pass" # noqa: S101 + os.unlink(good_file) # noqa: PTH108 + + # Test bad migration + bad_file = create_test_migration(bad_expand, "bad_expand.py") + result = validator.validate_migration_file(bad_file) + print(f" โœ… Bad EXPAND: Valid={result['valid']} (expected: False)") + print(f" Violations: {len(result['violations'])}") + for v in result["violations"]: + print(f" - {v['type']}: {v['message']}") + assert not result["valid"], "Bad EXPAND should fail" # noqa: S101 + os.unlink(bad_file) # noqa: PTH108 + + +def test_contract_phase(): + """Test CONTRACT phase validations.""" + print("\n๐Ÿงช Testing CONTRACT Phase Validations...") + + good_contract = '''""" +Description: Remove old column +Phase: CONTRACT + +Revision ID: test_contract_good +""" +from alembic import op +import sqlalchemy as sa + +def upgrade(): + bind = op.get_bind() + + # Check data migration is complete + result = bind.execute(sa.text(""" + SELECT COUNT(*) as cnt FROM users + WHERE old_email IS NOT NULL AND new_email IS NULL + """)).first() + + if result.cnt > 0: + raise Exception(f"Cannot contract: {result.cnt} rows not migrated") + + op.drop_column('users', 'old_email') + +def downgrade(): + raise NotImplementedError("Cannot rollback CONTRACT phase") +''' + + validator = MigrationValidator() + + good_file = create_test_migration(good_contract, "good_contract.py") + result = validator.validate_migration_file(good_file) + print(f" โœ… Good CONTRACT: Valid={result['valid']} (expected: True)") + os.unlink(good_file) # noqa: PTH108 + + +def test_phase_detection(): + """Test phase detection from different formats.""" + print("\n๐Ÿงช Testing Phase Detection...") + + test_cases = [ + ("Phase: EXPAND", "EXPAND"), + ("phase: migrate", "MIGRATE"), + ("PHASE: CONTRACT", "CONTRACT"), + ("No phase marker", "UNKNOWN"), + ] + + validator = MigrationValidator() + + for content_marker, expected_phase in test_cases: + content = f'''""" +Migration description +{content_marker} +""" +def upgrade(): pass +def downgrade(): pass +''' + file = create_test_migration(content, "phase_test.py") + result = validator.validate_migration_file(file) + detected_phase = result["phase"] + print(f" โœ… '{content_marker}' โ†’ {detected_phase} (expected: {expected_phase})") + assert detected_phase == expected_phase, f"Phase detection failed for {content_marker}" # noqa: S101 + os.unlink(file) # noqa: PTH108 + + +def test_common_mistakes(): + """Test detection of common migration mistakes.""" + print("\n๐Ÿงช Testing Common Mistake Detection...") + + mistakes = { + "Direct rename": """ +def upgrade(): + op.rename_column('users', 'email', 'email_address') +""", + "Direct type change": """ +def upgrade(): + op.alter_column('users', 'age', type_=sa.Integer()) +""", + "Non-nullable without default": """ +def upgrade(): + op.add_column('users', sa.Column('required_field', sa.String(), nullable=False)) +""", + } + + validator = MigrationValidator() + + for mistake_name, code in mistakes.items(): + content = f'''""" +Test: {mistake_name} +Phase: EXPAND +""" +from alembic import op +import sqlalchemy as sa + +{code} + +def downgrade(): pass +''' + file = create_test_migration(content, f"{mistake_name}.py") + result = validator.validate_migration_file(file) + print(f" โœ… {mistake_name}: Detected={not result['valid']}") + assert not result["valid"], f"Should detect {mistake_name}" # noqa: S101 + os.unlink(file) # noqa: PTH108 + + +def main(): + print("=" * 60) + print("๐Ÿš€ Migration Validator Test Suite") + print("=" * 60) + + try: + test_expand_phase() + test_contract_phase() + test_phase_detection() + test_common_mistakes() + + print("\n" + "=" * 60) + print("โœ… All tests passed!") + print("=" * 60) + + except AssertionError as e: + print(f"\nโŒ Test failed: {e}") + sys.exit(1) + except (OSError, ImportError) as e: + print(f"\nโŒ Unexpected error: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/test_with_database.py b/scripts/test_with_database.py new file mode 100644 index 000000000000..cadc3ffca44b --- /dev/null +++ b/scripts/test_with_database.py @@ -0,0 +1,48 @@ +"""Test migrations with actual database.""" + +import sqlite3 +import tempfile + +from alembic import command +from alembic.config import Config + + +def test_real_migration(): + """Test migration with actual SQLite database.""" + # Create temporary database + with tempfile.NamedTemporaryFile(suffix=".db") as tmp: + db_path = tmp.name + + # Create test table + conn = sqlite3.connect(db_path) + conn.execute(""" + CREATE TABLE users ( + id INTEGER PRIMARY KEY, + name TEXT, + old_email TEXT + ) + """) + conn.commit() + + # Create alembic.ini + alembic_cfg = Config() + alembic_cfg.set_main_option("script_location", "src/backend/base/langflow/alembic") + alembic_cfg.set_main_option("sqlalchemy.url", f"sqlite:///{db_path}") + + # Run migration + try: + command.upgrade(alembic_cfg, "head") + print("โœ… Migration executed successfully") + except RuntimeError as e: + print(f"โŒ Migration failed: {e}") + + # Verify schema + cursor = conn.execute("PRAGMA table_info(users)") + columns = [row[1] for row in cursor.fetchall()] + print(f"Columns after migration: {columns}") + + conn.close() + + +if __name__ == "__main__": + test_real_migration() diff --git a/src/backend/base/langflow/__main__.py b/src/backend/base/langflow/__main__.py index 6a2d0929a371..b3c1d7434053 100644 --- a/src/backend/base/langflow/__main__.py +++ b/src/backend/base/langflow/__main__.py @@ -891,9 +891,7 @@ async def aapi_key(): await delete_api_key(session, api_key.id) api_key_create = ApiKeyCreate(name="CLI") - unmasked_api_key = await create_api_key(session, api_key_create, user_id=superuser.id) - await session.commit() - return unmasked_api_key + return await create_api_key(session, api_key_create, user_id=superuser.id) unmasked_api_key = asyncio.run(aapi_key()) # Create a banner to display the API key and tell the user it won't be shown again diff --git a/src/backend/base/langflow/alembic/DB-MIGRATION-GUIDE.MD b/src/backend/base/langflow/alembic/DB-MIGRATION-GUIDE.MD new file mode 100644 index 000000000000..0a2d9cf349e9 --- /dev/null +++ b/src/backend/base/langflow/alembic/DB-MIGRATION-GUIDE.MD @@ -0,0 +1,513 @@ +# Database Migration Guidelines: Expand-Contract Pattern + +## Overview + +This guide outlines our approach to database migrations in a multi-service architecture where multiple services with different versions share the same database. We follow the **Expand-Contract Pattern** to ensure zero-downtime deployments and maintain N-1 version compatibility. + +## Table of Contents + +1. [Core Principles](#core-principles) +2. [The Expand-Contract Pattern](#the-expand-contract-pattern) +3. [Migration Phases](#migration-phases) +4. [Implementation Guidelines](#implementation-guidelines) +5. [Safety Checks](#safety-checks) +6. [Rollback Procedures](#rollback-procedures) +7. [Best Practices](#best-practices) +8. [Anti-Patterns](#anti-patterns) +9. [Examples](#examples) +10. [Monitoring](#monitoring) + +## Core Principles + +### N-1 Version Support +- The database schema must support at least two consecutive versions of each service +- Never introduce breaking changes that would prevent older service versions from functioning +- All schema changes must be backward compatible + +### Zero-Downtime Deployments +- Database migrations should not require service downtime +- Services should continue operating during and after migration +- Rollback should be possible without data loss + +## The Expand-Contract Pattern + +The Expand-Contract pattern consists of three phases: + +```mermaid +flowchart LR + A[Current Schema] + B["**EXPAND: Add New Schema**"] + C["**MIGRATE: Transition Data**"] + D["**CONTRACT: Remove Old Schema**"] + + A --> B + B --> C + C --> D + + %% basic neutral colors + style B fill:#f2f2f2,stroke:#333,stroke-width:1.5px,color:#000 + style C fill:#d9eaff,stroke:#333,stroke-width:1.5px,color:#000 + style D fill:#d5f5d5,stroke:#333,stroke-width:1.5px,color:#000 + + + +``` +**Diagram Description:** +The Expand-Contract migration pattern consists of three phases: +- **Expand:** Add new schema elements (shown in neutral grey). +- **Migrate:** Transition data and service usage (shown in light blue). +- **Contract:** Remove old schema elements after full adoption (shown in light green). +Each phase flows sequentially: Current Schema โ†’ Expand โ†’ Migrate โ†’ Contract. + + +### Phase Timeline + +Considering a N days Contract cycle + +| Phase | Duration | Description | +|-------|----------|-------------| +| **Expand** | Day 1 | Add new schema elements (backward compatible) | +| **Migrate** | Days 2-N | Update services, backfill data, monitor adoption | +| **Contract** | Day N+1 | Remove deprecated schema (only after full adoption) | + +## Migration Phases + +### Phase 1: EXPAND (Non-Breaking Addition) + +**Goal**: Add new schema elements without breaking existing services + +```python +def upgrade() -> None: + """ + EXPAND PHASE: Add new schema elements as nullable/optional + """ + bind = op.get_bind() + inspector = sa.inspect(bind) + columns = [col['name'] for col in inspector.get_columns('table_name')] + + # Always check existence to ensure idempotency + if 'new_column' not in columns: + # CRITICAL: Use nullable=True for backward compatibility + op.add_column('table_name', sa.Column('new_column', sa.String(), nullable=True)) + + # Optional: Add index for performance + op.create_index('ix_table_new_column', 'table_name', ['new_column']) +``` + +**Service Compatibility**: +- โœ… Old services: Continue working (ignore new column) +- โœ… New services: Can start using new column immediately + +### Phase 2: MIGRATE (Data Transition) + +**Goal**: Gradually transition all services and data to use new schema + +**Database Operations**: +```sql +-- Backfill existing data if needed +UPDATE table_name +SET new_column = old_column +WHERE new_column IS NULL AND old_column IS NOT NULL; + +-- Monitor adoption +SELECT + COUNT(*) FILTER (WHERE new_column IS NOT NULL) as using_new, + COUNT(*) FILTER (WHERE new_column IS NULL) as not_using_new +FROM table_name +WHERE created_at > NOW() - INTERVAL '1 day'; +``` + +**Service Code Pattern**: +```python +class ServiceAdapter: + def read_data(self, row): + """Handle both old and new schema""" + return { + 'id': row.id, + # Gracefully handle missing columns + 'new_field': getattr(row, 'new_column', None), + # Fallback to old column if new doesn't exist + 'data': row.new_column or row.old_column + } + + def write_data(self, data): + """Write to both old and new schema during transition""" + return Model( + old_column=data, # Keep writing to old column + new_column=data # Also write to new column + ) +``` + +### Phase 3: CONTRACT (Cleanup) + +**Goal**: Remove deprecated schema elements after all services have migrated + +**Prerequisites Checklist**: +- [ ] All services deployed with new schema support +- [ ] No queries using old columns in past 30 days +- [ ] Data migration completed and verified +- [ ] Backup of deprecated data created + +```python +def upgrade_contract_phase() -> None: + """ + CONTRACT PHASE: Remove old schema (only after full migration) + """ + # Verify no services are using old column + bind = op.get_bind() + + # Safety check: Log usage before removal + result = bind.execute(sa.text(""" + SELECT COUNT(*) as cnt FROM table_name + WHERE old_column IS NOT NULL AND new_column IS NULL + """)).first() + + if result and result.cnt > 0: + raise Exception(f"Cannot contract: {result.cnt} rows still depend on old_column") + + # Safe to remove + op.drop_column('table_name', 'old_column') +``` + +## Implementation Guidelines + +### Adding Columns + +โœ… **DO**: +```python +# Always nullable for new columns +op.add_column('message', sa.Column('context_id', sa.String(), nullable=True)) + +# Add default value if needed (database-level) +op.add_column('message', sa.Column('status', sa.String(), + nullable=True, server_default='pending')) +``` + +โŒ **DON'T**: +```python +# Never add required columns directly +op.add_column('message', sa.Column('context_id', sa.String(), nullable=False)) +``` + +### Removing Columns + +โœ… **DO**: +1. Stop writing to the column (application code) +2. Stop reading from the column (application code) +3. Wait for all services to update (monitor for 30+ days) +4. Remove column in separate migration + +โŒ **DON'T**: +- Remove columns immediately after adding replacements +- Drop columns that might be in use by any service version + +### Renaming Columns + +โœ… **DO**: +1. Add new column with desired name +2. Copy data from old to new column +3. Update services to use new column +4. Remove old column (after migration period) + +โŒ **DON'T**: +```python +# Never rename directly - breaks old services +op.alter_column('table_name', 'old_name', new_column_name='new_name') +``` + +### Changing Column Types + +โœ… **DO**: +1. Add new column with desired type +2. Migrate data with type conversion +3. Switch services to new column +4. Drop old column after transition + +## Safety Checks + +### Pre-Migration Checks + +```python +def pre_migration_checks(): + """Run before applying migration""" + bind = op.get_bind() + + # Check table size for performance impact + result = bind.execute(sa.text( + "SELECT COUNT(*) as cnt FROM table_name" + )).first() + + if result.cnt > 1000000: + print(f"WARNING: Large table ({result.cnt} rows) - migration may be slow") + + # Check for running transactions + result = bind.execute(sa.text(""" + SELECT COUNT(*) FROM pg_stat_activity + WHERE state = 'active' AND query_start < NOW() - INTERVAL '5 minutes' + """)).first() + + if result[0] > 0: + print(f"WARNING: {result[0]} long-running transactions detected") +``` + +### Post-Migration Validation + +```python +def post_migration_validation(): + """Verify migration succeeded""" + bind = op.get_bind() + inspector = sa.inspect(bind) + + # Verify column exists + columns = [col['name'] for col in inspector.get_columns('table_name')] + assert 'new_column' in columns, "Migration failed: column not added" + + # Verify data integrity + result = bind.execute(sa.text(""" + SELECT COUNT(*) FROM table_name + WHERE new_column IS NOT NULL + """)).first() + + print(f"Migration complete: {result[0]} rows have new_column populated") +``` + +## Rollback Procedures + +### Safe Rollback Checklist + +Before rolling back, verify: +- [ ] No services depend solely on new schema +- [ ] No critical data exists only in new columns +- [ ] Rollback migration has been tested in staging + +### Rollback Implementation + +```python +def downgrade() -> None: + """ + Safe rollback with data preservation + """ + bind = op.get_bind() + inspector = sa.inspect(bind) + + # Check for data that would be lost + result = bind.execute(sa.text(""" + SELECT COUNT(*) as cnt FROM table_name + WHERE new_column IS NOT NULL + """)).first() + + if result and result.cnt > 0: + # Backup data before dropping + bind.execute(sa.text(""" + CREATE TABLE IF NOT EXISTS table_name_backup AS + SELECT id, new_column, NOW() as backed_up_at + FROM table_name WHERE new_column IS NOT NULL + """)) + print(f"Backed up {result.cnt} rows to table_name_backup") + + # Safe to drop column + columns = [col['name'] for col in inspector.get_columns('table_name')] + if 'new_column' in columns: + op.drop_column('table_name', 'new_column') +``` + +## Best Practices + +### 1. Always Use Idempotent Migrations + +```python +# Check existence before adding/dropping +if 'column_name' not in columns: + op.add_column(...) + +if 'column_name' in columns: + op.drop_column(...) +``` + +### 2. Document Migration Phases + +```python +""" +Migration: Add context_id to message table +Phase: EXPAND +Safe to rollback: YES +Services compatible: All versions +Next phase: MIGRATE after all services deployed + +Revision ID: 182e5471b900 +""" +``` + +### 3. Use Feature Flags for Service Transitions + +```python +class MessageService: + def process_message(self, message): + if feature_flags.is_enabled('use_context_id'): + # New logic using context_id + return self._process_with_context(message) + else: + # Old logic without context_id + return self._process_legacy(message) +``` + +### 4. Monitor Migration Progress + +```sql +-- Create monitoring view +CREATE VIEW migration_progress AS +SELECT + 'context_id_adoption' as migration, + COUNT(*) FILTER (WHERE context_id IS NOT NULL) * 100.0 / COUNT(*) as percentage_complete, + COUNT(*) as total_records, + MAX(updated_at) as last_update +FROM message +WHERE created_at > NOW() - INTERVAL '7 days'; +``` + +## Anti-Patterns + +### โŒ Breaking Changes Without Migration Path + +```python +# DON'T: This breaks existing services +op.alter_column('message', 'content', nullable=False) +``` + +### โŒ Immediate Schema Contraction + +```python +# DON'T: Remove old schema in same migration as adding new +def upgrade(): + op.add_column('table', sa.Column('new_col', ...)) + op.drop_column('table', 'old_col') # Services still using this! +``` + +### โŒ Data Type Changes Without Migration + +```python +# DON'T: Direct type change can fail or corrupt data +op.alter_column('table', 'amount', type_=sa.Integer()) # Was String +``` + +### โŒ Assuming Service Deployment Order + +```python +# DON'T: Assume all services update simultaneously +if datetime.now() > deployment_date: + op.drop_column('table', 'old_column') # Some services might be delayed! +``` + +## Examples + +### Example 1: Adding a New Required Field + +```python +# Migration 1: EXPAND (Day 1) +def upgrade_expand(): + op.add_column('user', sa.Column('email_verified', sa.Boolean(), + nullable=True, server_default='false')) + +# Migration 2: MIGRATE (Day 30, after all services updated) +def upgrade_migrate(): + # Backfill any NULL values + op.execute("UPDATE user SET email_verified = false WHERE email_verified IS NULL") + +# Migration 3: CONTRACT (Day 60, after verification) +def upgrade_contract(): + op.alter_column('user', 'email_verified', nullable=False) +``` + +### Example 2: Replacing a Column + +```python +# Migration 1: Add new column +def upgrade_phase1(): + op.add_column('order', sa.Column('status_code', sa.Integer(), nullable=True)) + + # Copy data from old column + op.execute(""" + UPDATE order SET status_code = + CASE status_text + WHEN 'pending' THEN 1 + WHEN 'processing' THEN 2 + WHEN 'complete' THEN 3 + ELSE 0 + END + """) + +# Migration 2: Remove old column (after transition period) +def upgrade_phase2(): + op.drop_column('order', 'status_text') +``` + +## Monitoring + +### Service Version Tracking + +```sql +-- Track which services are using new schema +CREATE TABLE service_schema_usage ( + service_name VARCHAR(100), + schema_version VARCHAR(50), + last_seen TIMESTAMP DEFAULT NOW(), + uses_new_schema BOOLEAN DEFAULT FALSE +); + +-- Update from application +INSERT INTO service_schema_usage (service_name, schema_version, uses_new_schema) +VALUES ('user-service', 'v2.1.0', true) +ON CONFLICT (service_name) +DO UPDATE SET + schema_version = EXCLUDED.schema_version, + last_seen = NOW(), + uses_new_schema = EXCLUDED.uses_new_schema; +``` + +### Migration Health Dashboard Queries + +```sql +-- Check migration adoption rate +SELECT + migration_name, + adopted_services, + total_services, + (adopted_services * 100.0 / total_services) as adoption_percentage, + days_since_deployment +FROM migration_tracking +WHERE is_active = true +ORDER BY days_since_deployment DESC; + +```sql +-- Identify services not yet migrated +SELECT + s.service_name, + s.version, + s.last_deployment, + m.migration_name +FROM services s +CROSS JOIN active_migrations m +WHERE NOT EXISTS ( + SELECT 1 FROM service_migrations sm + WHERE sm.service_name = s.service_name + AND sm.migration_name = m.migration_name +); +``` + +## Conclusion + +Following the Expand-Contract pattern ensures: +- โœ… Zero-downtime deployments +- โœ… Safe rollback capabilities +- โœ… N-1 version compatibility +- โœ… Gradual, monitored transitions +- โœ… No data loss during migrations + +Remember: **When in doubt, expand first, migrate slowly, and contract only when certain.** + +## References + +- [Evolutionary Database Design - Martin Fowler](https://martinfowler.com/articles/evodb.html) +- [Zero-Downtime Database Migrations](https://www.brunton-spall.co.uk/post/2014/05/06/database-migrations-done-right/) +- [Alembic Documentation](https://alembic.sqlalchemy.org/) + diff --git a/src/backend/base/langflow/alembic/__init__.py b/src/backend/base/langflow/alembic/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/src/backend/base/langflow/alembic/env.py b/src/backend/base/langflow/alembic/env.py index 79fea2b43be1..d2d4f4418dcb 100644 --- a/src/backend/base/langflow/alembic/env.py +++ b/src/backend/base/langflow/alembic/env.py @@ -1,15 +1,21 @@ # noqa: INP001 import asyncio +import hashlib +import os from logging.config import fileConfig from typing import Any + from alembic import context from sqlalchemy import pool, text from sqlalchemy.event import listen from sqlalchemy.ext.asyncio import async_engine_from_config +from lfx.log.logger import logger + from langflow.services.database.service import SQLModel + # this is the Alembic Config object, which provides # access to the values within the .ini file in use. config = context.config @@ -96,25 +102,33 @@ def _do_run_migrations(connection): configure_kwargs["prepare_threshold"] = None context.configure(**configure_kwargs) - with context.begin_transaction(): if connection.dialect.name == "postgresql": - connection.execute(text("SET LOCAL lock_timeout = '60s';")) - connection.execute(text("SELECT pg_advisory_xact_lock(112233);")) + # Use namespace from environment variable if provided, otherwise use default static key + namespace = os.getenv("LANGFLOW_MIGRATION_LOCK_NAMESPACE") + if namespace: + lock_key = int(hashlib.sha256(namespace.encode()).hexdigest()[:16], 16) % (2**63 - 1) + logger.info(f"Using migration lock namespace: {namespace}, lock_key: {lock_key}") + else: + lock_key = 11223344 + logger.info(f"Using default migration lock_key: {lock_key}") + + connection.execute(text("SET LOCAL lock_timeout = '180s';")) + connection.execute(text(f"SELECT pg_advisory_xact_lock({lock_key});")) context.run_migrations() - async def _run_async_migrations() -> None: - # Get database URL to determine dialect - url = config.get_main_option("sqlalchemy.url") - connect_args: dict[str, Any] = {} + # Disable prepared statements for PostgreSQL (required for PgBouncer compatibility) + # SQLite doesn't support this parameter, so only add it for PostgreSQL + config_section = config.get_section(config.config_ini_section, {}) + db_url = config_section.get("sqlalchemy.url", "") - # Only add prepare_threshold for PostgreSQL - if url and "postgresql" in url: + connect_args: dict[str, Any] = {} + if db_url and "postgresql" in db_url: connect_args["prepare_threshold"] = None connectable = async_engine_from_config( - config.get_section(config.config_ini_section, {}), + config_section, prefix="sqlalchemy.", poolclass=pool.NullPool, connect_args=connect_args, diff --git a/src/backend/base/langflow/alembic/migration_validator.py b/src/backend/base/langflow/alembic/migration_validator.py new file mode 100644 index 000000000000..f922993509c6 --- /dev/null +++ b/src/backend/base/langflow/alembic/migration_validator.py @@ -0,0 +1,376 @@ +"""Migration Validator - Enforces Expand-Contract Pattern for Alembic migrations.""" + +import ast +import json +import re +import sys +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Any + + +class MigrationPhase(Enum): + EXPAND = "EXPAND" + MIGRATE = "MIGRATE" + CONTRACT = "CONTRACT" + UNKNOWN = "UNKNOWN" + + +@dataclass +class Violation: + type: str + message: str + line: int + severity: str = "error" # error or warning + + +class MigrationValidator: + """Validates Alembic migrations follow Expand-Contract pattern.""" + + VIOLATIONS = { + "BREAKING_ADD_COLUMN": "Adding non-nullable column without default", + "DIRECT_RENAME": "Direct column rename detected", + "DIRECT_TYPE_CHANGE": "Direct type alteration detected", + "IMMEDIATE_DROP": "Dropping column without migration phase", + "MISSING_IDEMPOTENCY": "Migration not idempotent", + "NO_PHASE_MARKER": "Migration missing phase documentation", + "UNSAFE_ROLLBACK": "Downgrade may cause data loss", + "MISSING_DOWNGRADE": "Downgrade function not implemented", + "INVALID_PHASE_OPERATION": "Operation not allowed in this phase", + "NO_EXISTENCE_CHECK": "Operation should check existence first", + "MISSING_DATA_CHECK": "CONTRACT phase should verify data migration", + } + + def __init__(self, *, strict_mode: bool = True): + self.strict_mode = strict_mode + + ### Main validation method - it's a template method Go4 style.### + + def validate_migration_file(self, filepath: Path) -> dict[str, Any]: + """Validate a single migration file.""" + if not filepath.exists(): + return { + "file": str(filepath), + "valid": False, + "violations": [Violation("FILE_NOT_FOUND", f"File not found: {filepath}", 0)], + "warnings": [], + } + + content = filepath.read_text() + + try: + tree = ast.parse(content) + except SyntaxError as e: + return { + "file": str(filepath), + "valid": False, + "violations": [Violation("SYNTAX_ERROR", str(e), e.lineno or 0)], + "warnings": [], + } + + violations = [] + warnings = [] + + # Check for phase documentation + phase = self._extract_phase(content) + if phase == MigrationPhase.UNKNOWN: + violations.append( + Violation("NO_PHASE_MARKER", "Migration must specify phase: EXPAND, MIGRATE, or CONTRACT", 1) + ) + + # Check upgrade function + upgrade_node = self._find_function(tree, "upgrade") + if upgrade_node: + phase_violations = self._check_upgrade_operations(upgrade_node, phase) + violations.extend(phase_violations) + else: + violations.append(Violation("MISSING_UPGRADE", "Migration must have an upgrade() function", 1)) + + # Check downgrade function + downgrade_node = self._find_function(tree, "downgrade") + if downgrade_node: + downgrade_issues = self._check_downgrade_safety(downgrade_node, phase) + warnings.extend(downgrade_issues) + elif phase != MigrationPhase.CONTRACT: # CONTRACT phase may not support rollback + violations.append(Violation("MISSING_DOWNGRADE", "Migration must have a downgrade() function", 1)) + + # Additional phase-specific checks + if phase == MigrationPhase.CONTRACT: + contract_issues = self._check_contract_phase_requirements(content) + violations.extend(contract_issues) + + return { + "file": str(filepath), + "valid": len(violations) == 0, + "violations": [v.__dict__ for v in violations], + "warnings": [w.__dict__ for w in warnings], + "phase": phase.value, + } + + # Method to check DB operations constraints imposed by phases - + # New constraint requirements should be added here + + def _check_upgrade_operations(self, node: ast.FunctionDef, phase: MigrationPhase) -> list[Violation]: + """Check upgrade operations for violations.""" + violations = [] + + for child in ast.walk(node): + if isinstance(child, ast.Call): + if self._is_op_call(child, "add_column"): + violations.extend(self._check_add_column(child, phase, node)) + + elif self._is_op_call(child, "alter_column"): + violations.extend(self._check_alter_column(child, phase)) + + elif self._is_op_call(child, "drop_column"): + violations.extend(self._check_drop_column(child, phase)) + + elif self._is_op_call(child, "rename_table") or self._is_op_call(child, "rename_column"): + violations.append( + Violation("DIRECT_RENAME", "Use expand-contract pattern instead of direct rename", child.lineno) + ) + + return violations + + def _check_add_column(self, call: ast.Call, phase: MigrationPhase, func_node: ast.FunctionDef) -> list[Violation]: + """Check add_column operations.""" + violations = [] + + # Check if column is nullable or has default + if not self._has_nullable_true(call) and not self._has_server_default(call): + violations.append( + Violation( + "BREAKING_ADD_COLUMN", "New columns must be nullable=True or have server_default", call.lineno + ) + ) + + # Check for idempotency + if not self._has_existence_check_nearby(func_node, call): + violations.append( + Violation( + "NO_EXISTENCE_CHECK", "add_column should check if column exists first (idempotency)", call.lineno + ) + ) + + # Phase-specific checks + if phase == MigrationPhase.CONTRACT: + violations.append(Violation("INVALID_PHASE_OPERATION", "Cannot add columns in CONTRACT phase", call.lineno)) + + return violations + + def _check_alter_column(self, call: ast.Call, phase: MigrationPhase) -> list[Violation]: + """Check alter_column operations.""" + violations = [] + + # Check for type changes + if self._has_type_change(call) and phase != MigrationPhase.CONTRACT: + violations.append( + Violation("DIRECT_TYPE_CHANGE", "Type changes should use expand-contract pattern", call.lineno) + ) + + # Check for nullable changes + if self._changes_nullable_to_false(call) and phase != MigrationPhase.CONTRACT: + violations.append( + Violation( + "BREAKING_ADD_COLUMN", "Making column non-nullable only allowed in CONTRACT phase", call.lineno + ) + ) + + return violations + + def _check_drop_column(self, call: ast.Call, phase: MigrationPhase) -> list[Violation]: + """Check drop_column operations.""" + violations = [] + + if phase != MigrationPhase.CONTRACT: + violations.append( + Violation( + "IMMEDIATE_DROP", + f"Column drops only allowed in CONTRACT phase (current: {phase.value})", + call.lineno, + ) + ) + + return violations + + def _check_contract_phase_requirements(self, content: str) -> list[Violation]: + """Check CONTRACT phase specific requirements.""" + # Check for data migration before dropping columns + if not ("SELECT" in content and "COUNT" in content): + return [ + Violation( + "MISSING_DATA_CHECK", + "CONTRACT phase should verify data migration before dropping columns", + 1, + severity="warning", + ) + ] + return [] + + def _check_downgrade_safety(self, node: ast.FunctionDef, phase: MigrationPhase) -> list[Violation]: + """Check downgrade function for safety issues.""" + warnings = [] + + # Check if downgrade might lose data + for child in ast.walk(node): + if isinstance(child, ast.Call) and self._is_op_call(child, "alter_column"): + # Check if there's a backup mechanism + func_content = ast.unparse(node) + if "backup" not in func_content.lower() and "SELECT" not in func_content: + warnings.append( + Violation( + "UNSAFE_ROLLBACK", + "Downgrade drops column without checking/backing up data", + child.lineno, + severity="warning", + ) + ) + + # CONTRACT phase special handling + if phase == MigrationPhase.CONTRACT: + func_content = ast.unparse(node) + if "NotImplementedError" not in func_content and "raise" not in func_content: + warnings.append( + Violation( + "UNSAFE_ROLLBACK", + "CONTRACT phase downgrade should raise NotImplementedError or handle carefully", + node.lineno, + severity="warning", + ) + ) + + return warnings + + def _is_op_call(self, call: ast.Call, method: str) -> bool: + """Check if call is op.method().""" + func = call.func + + # Avoid multiple attribute resolutions and isinstance checks + if type(func) is ast.Attribute: + val = func.value + if type(val) is ast.Name: + return val.id == "op" and func.attr == method + return False + + def _has_nullable_true(self, call: ast.Call) -> bool: + """Check if call has nullable=True.""" + for keyword in call.keywords: + if keyword.arg == "nullable" and isinstance(keyword.value, ast.Constant): + return keyword.value.value is True + + for call_arg in call.args: + if isinstance(call_arg, ast.Call): + return self._has_nullable_true(call_arg) + + return False + + def _has_server_default(self, call: ast.Call) -> bool: + """Check if call has server_default.""" + return any(kw.arg == "server_default" for kw in call.keywords) + + def _has_type_change(self, call: ast.Call) -> bool: + """Check if alter_column changes type.""" + return any(kw.arg in ["type_", "type"] for kw in call.keywords) + + def _changes_nullable_to_false(self, call: ast.Call) -> bool: + """Check if alter_column sets nullable=False.""" + for keyword in call.keywords: + if keyword.arg == "nullable" and isinstance(keyword.value, ast.Constant): + return keyword.value.value is False + return False + + ### Helper method to check for existence checks around operations. + # It looks for if statements that might be checking column existence + # TODO: Evaluate if more sophisticated analysis is needed for existence checks + def _has_existence_check_nearby(self, func_node: ast.FunctionDef, target_call: ast.Call) -> bool: + """Check if operation is wrapped in existence check.""" + # Look for if statements that might be checking column existence + for node in ast.walk(func_node): + if isinstance(node, ast.If): + # Check if this if statement contains our target call + for child in ast.walk(node): + if child == target_call: + # Check if the condition mentions columns or inspector + condition = ast.unparse(node.test) + if any(keyword in condition.lower() for keyword in ["column", "inspector", "not in", "if not"]): + return True + return False + + ### Helper methods ### + + def _extract_phase(self, content: str) -> MigrationPhase: + """Extract migration phase from documentation.""" + # TODO: Support phase detection from inline comments and function + # annotations, not just docstrings or top-level comments. + # Look in docstring or comments + phase_pattern = r"Phase:\s*(EXPAND|MIGRATE|CONTRACT)" + match = re.search(phase_pattern, content, re.IGNORECASE) + + if match: + phase_str = match.group(1).upper() + return MigrationPhase[phase_str] + + return MigrationPhase.UNKNOWN + + def _find_function(self, tree: ast.Module, name: str) -> ast.FunctionDef | None: + """Find a function by name in the AST.""" + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) and node.name == name: + return node + return None + + +def main(): + """CLI entry point.""" + import argparse + + parser = argparse.ArgumentParser(description="Validate Alembic migrations") + parser.add_argument("files", nargs="+", help="Migration files to validate") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--strict", action="store_true", help="Treat warnings as errors") + + args = parser.parse_args() + + validator = MigrationValidator(strict_mode=args.strict) + all_valid = True + results = [] + + for file_path in args.files: + result = validator.validate_migration_file(Path(file_path)) + results.append(result) + + if not result["valid"]: + all_valid = False + + if args.strict and result["warnings"]: + all_valid = False + + import logging + + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger("migration_validator") + if args.json: + logger.info(json.dumps(results, indent=2)) + else: + for result in results: + logger.info("\n%s", "=" * 60) + logger.info("File: %s", result["file"]) + logger.info("Phase: %s", result["phase"]) + logger.info("Valid: %s", "โœ…" if result["valid"] else "โŒ") + + if result["violations"]: + logger.error("\nโŒ Violations:") + for v in result["violations"]: + logger.error(" Line %s: %s - %s", v["line"], v["type"], v["message"]) + + if result["warnings"]: + logger.warning("\nโš ๏ธ Warnings:") + for w in result["warnings"]: + logger.warning(" Line %s: %s - %s", w["line"], w["type"], w["message"]) + + sys.exit(0 if all_valid else 1) + + +if __name__ == "__main__": + main() diff --git a/src/backend/base/langflow/api/utils/core.py b/src/backend/base/langflow/api/utils/core.py index d53c981b74ee..9c02ff4e22b2 100644 --- a/src/backend/base/langflow/api/utils/core.py +++ b/src/backend/base/langflow/api/utils/core.py @@ -10,6 +10,7 @@ from fastapi_pagination import Params from lfx.graph.graph.base import Graph from lfx.log.logger import logger +from lfx.services.deps import injectable_session_scope, injectable_session_scope_readonly, session_scope from sqlalchemy import delete from sqlmodel.ext.asyncio.session import AsyncSession @@ -19,7 +20,6 @@ from langflow.services.database.models.transactions.model import TransactionTable from langflow.services.database.models.user.model import User from langflow.services.database.models.vertex_builds.model import VertexBuildTable -from langflow.services.deps import get_session, session_scope from langflow.services.store.utils import get_lf_version_from_pypi from langflow.utils.constants import LANGFLOW_GLOBAL_VAR_HEADER_PREFIX @@ -35,7 +35,10 @@ CurrentActiveUser = Annotated[User, Depends(get_current_active_user)] CurrentActiveMCPUser = Annotated[User, Depends(get_current_active_user_mcp)] -DbSession = Annotated[AsyncSession, Depends(get_session)] +# DbSession with auto-commit for write operations +DbSession = Annotated[AsyncSession, Depends(injectable_session_scope)] +# DbSessionReadOnly for read-only operations (no auto-commit, reduces lock contention) +DbSessionReadOnly = Annotated[AsyncSession, Depends(injectable_session_scope_readonly)] class EventDeliveryType(str, Enum): diff --git a/src/backend/base/langflow/api/v1/chat.py b/src/backend/base/langflow/api/v1/chat.py index 91e72a131eef..d32b6db6ae1b 100644 --- a/src/backend/base/langflow/api/v1/chat.py +++ b/src/backend/base/langflow/api/v1/chat.py @@ -289,6 +289,7 @@ async def build_vertex( if isinstance(cache, CacheMiss): # If there's no cache await logger.awarning(f"No cache found for {flow_id_str}. Building graph starting at {vertex_id}") + async with session_scope() as session: graph = await build_graph_from_db( flow_id=flow_id, diff --git a/src/backend/base/langflow/api/v1/files.py b/src/backend/base/langflow/api/v1/files.py index 96d69bd1d961..c6d2e03785d7 100644 --- a/src/backend/base/langflow/api/v1/files.py +++ b/src/backend/base/langflow/api/v1/files.py @@ -173,10 +173,9 @@ async def list_profile_pictures( people_path = config_path / "profile_pictures" / "People" space_path = config_path / "profile_pictures" / "Space" - # List files directly from local filesystem + # List files directly from local filesystem - bundled with the container people = [f.name for f in people_path.iterdir() if f.is_file()] if people_path.exists() else [] space = [f.name for f in space_path.iterdir() if f.is_file()] if space_path.exists() else [] - except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e diff --git a/src/backend/base/langflow/api/v1/flows.py b/src/backend/base/langflow/api/v1/flows.py index 6caa0f7bc05b..da64417bb724 100644 --- a/src/backend/base/langflow/api/v1/flows.py +++ b/src/backend/base/langflow/api/v1/flows.py @@ -160,11 +160,12 @@ async def create_flow( ): try: db_flow = await _new_flow(session=session, flow=flow, user_id=current_user.id) - await session.commit() + await session.flush() await session.refresh(db_flow) - await _save_flow_to_fs(db_flow) + # Convert to FlowRead while session is still active to avoid detached instance errors + flow_read = FlowRead.model_validate(db_flow, from_attributes=True) except Exception as e: if "UNIQUE constraint failed" in str(e): # Get the name of the column that failed @@ -180,7 +181,7 @@ async def create_flow( if isinstance(e, HTTPException): raise raise HTTPException(status_code=500, detail=str(e)) from e - return db_flow + return flow_read @router.get("/", response_model=list[FlowRead] | Page[FlowRead] | list[FlowHeader], status_code=200) @@ -258,8 +259,9 @@ async def read_flows( flow_headers = [FlowHeader.model_validate(flow, from_attributes=True) for flow in flows] return compress_response(flow_headers) - # Compress the full flows response - return compress_response(flows) + # Convert to FlowRead while session is still active to avoid detached instance errors + flow_reads = [FlowRead.model_validate(flow, from_attributes=True) for flow in flows] + return compress_response(flow_reads) stmt = stmt.where(Flow.folder_id == folder_id) @@ -295,7 +297,8 @@ async def read_flow( ): """Read a flow.""" if user_flow := await _read_flow(session, flow_id, current_user.id): - return user_flow + # Convert to FlowRead while session is still active to avoid detached instance errors + return FlowRead.model_validate(user_flow, from_attributes=True) raise HTTPException(status_code=404, detail="Flow not found") @@ -358,11 +361,13 @@ async def update_flow( db_flow.folder_id = default_folder.id session.add(db_flow) - await session.commit() + await session.flush() await session.refresh(db_flow) - await _save_flow_to_fs(db_flow) + # Convert to FlowRead while session is still active to avoid detached instance errors + flow_read = FlowRead.model_validate(db_flow, from_attributes=True) + except Exception as e: if "UNIQUE constraint failed" in str(e): # Get the name of the column that failed @@ -379,7 +384,7 @@ async def update_flow( raise HTTPException(status_code=e.status_code, detail=str(e)) from e raise HTTPException(status_code=500, detail=str(e)) from e - return db_flow + return flow_read @router.delete("/{flow_id}", status_code=200) @@ -398,7 +403,6 @@ async def delete_flow( if not flow: raise HTTPException(status_code=404, detail="Flow not found") await cascade_delete_flow(session, flow.id) - await session.commit() return {"message": "Flow deleted successfully"} @@ -416,10 +420,12 @@ async def create_flows( db_flow = Flow.model_validate(flow, from_attributes=True) session.add(db_flow) db_flows.append(db_flow) - await session.commit() + + await session.flush() for db_flow in db_flows: await session.refresh(db_flow) - return db_flows + + return [FlowRead.model_validate(db_flow, from_attributes=True) for db_flow in db_flows] @router.post("/upload/", response_model=list[FlowRead], status_code=201) @@ -444,10 +450,13 @@ async def upload_file( response_list.append(response) try: - await session.commit() + await session.flush() for db_flow in response_list: await session.refresh(db_flow) await _save_flow_to_fs(db_flow) + + # Convert to FlowRead while session is still active to avoid detached instance errors + flow_reads = [FlowRead.model_validate(db_flow, from_attributes=True) for db_flow in response_list] except Exception as e: if "UNIQUE constraint failed" in str(e): # Get the name of the column that failed @@ -464,7 +473,7 @@ async def upload_file( raise raise HTTPException(status_code=500, detail=str(e)) from e - return response_list + return flow_reads @router.delete("/") @@ -491,7 +500,7 @@ async def delete_multiple_flows( for flow in flows_to_delete: await cascade_delete_flow(db, flow.id) - await db.commit() + await db.flush() return {"deleted": len(flows_to_delete)} except Exception as exc: raise HTTPException(status_code=500, detail=str(exc)) from exc diff --git a/src/backend/base/langflow/api/v1/mcp_projects.py b/src/backend/base/langflow/api/v1/mcp_projects.py index 868d9f056dda..bb3c156e3ef5 100644 --- a/src/backend/base/langflow/api/v1/mcp_projects.py +++ b/src/backend/base/langflow/api/v1/mcp_projects.py @@ -440,6 +440,8 @@ async def update_project_mcp_settings( session.add(flow) updated_flows.append(flow) + await session.flush() + response: dict[str, Any] = { "message": f"Updated MCP settings for {len(updated_flows)} flows and project auth settings" } @@ -1201,8 +1203,6 @@ async def init_mcp_servers(): # Auto-configure starter projects with MCP server settings if enabled await auto_configure_starter_projects_mcp(session) - # Commit any auth settings updates - await session.commit() except Exception as e: # noqa: BLE001 msg = f"Failed to initialize MCP servers: {e}" diff --git a/src/backend/base/langflow/api/v1/monitor.py b/src/backend/base/langflow/api/v1/monitor.py index 488614bddfd9..a5aa1def475b 100644 --- a/src/backend/base/langflow/api/v1/monitor.py +++ b/src/backend/base/langflow/api/v1/monitor.py @@ -35,7 +35,6 @@ async def get_vertex_builds(flow_id: Annotated[UUID, Query()], session: DbSessio async def delete_vertex_builds(flow_id: Annotated[UUID, Query()], session: DbSession) -> None: try: await delete_vertex_builds_by_flow_id(session, flow_id) - await session.commit() except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e @@ -93,7 +92,6 @@ async def get_messages( async def delete_messages(message_ids: list[UUID], session: DbSession) -> None: try: await session.exec(delete(MessageTable).where(MessageTable.id.in_(message_ids))) # type: ignore[attr-defined] - await session.commit() except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e @@ -118,7 +116,7 @@ async def update_message( message_dict["edit"] = True db_message.sqlmodel_update(message_dict) session.add(db_message) - await session.commit() + await session.flush() await session.refresh(db_message) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e @@ -151,7 +149,7 @@ async def update_session_id( session.add_all(messages) - await session.commit() + await session.flush() message_responses = [] for message in messages: await session.refresh(message) @@ -173,7 +171,6 @@ async def delete_messages_session( .where(col(MessageTable.session_id) == session_id) .execution_options(synchronize_session="fetch") ) - await session.commit() except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e diff --git a/src/backend/base/langflow/api/v1/projects.py b/src/backend/base/langflow/api/v1/projects.py index d284a3839fa4..290b4a607a36 100644 --- a/src/backend/base/langflow/api/v1/projects.py +++ b/src/backend/base/langflow/api/v1/projects.py @@ -94,7 +94,7 @@ async def create_project( ) session.add(new_project) - await session.commit() + await session.flush() await session.refresh(new_project) # Auto-register MCP server for this project with configured default auth @@ -184,21 +184,22 @@ async def create_project( update(Flow).where(Flow.id.in_(project.components_list)).values(folder_id=new_project.id) # type: ignore[attr-defined] ) await session.exec(update_statement_components) - await session.commit() if project.flows_list: update_statement_flows = ( update(Flow).where(Flow.id.in_(project.flows_list)).values(folder_id=new_project.id) # type: ignore[attr-defined] ) await session.exec(update_statement_flows) - await session.commit() + + # Convert to FolderRead while session is still active to avoid detached instance errors + folder_read = FolderRead.model_validate(new_project, from_attributes=True) except HTTPException: # Re-raise HTTP exceptions (like 409 conflicts) without modification raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e - return new_project + return folder_read @router.get("/", response_model=list[FolderRead], status_code=200) @@ -216,7 +217,10 @@ async def read_projects( ) ).all() projects = [project for project in projects if project.name != STARTER_FOLDER_NAME] - return sorted(projects, key=lambda x: x.name != DEFAULT_FOLDER_NAME) + sorted_projects = sorted(projects, key=lambda x: x.name != DEFAULT_FOLDER_NAME) + + # Convert to FolderRead while session is still active to avoid detached instance errors + return [FolderRead.model_validate(project, from_attributes=True) for project in sorted_projects] except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e @@ -277,7 +281,9 @@ async def read_project( # If no pagination requested, return all flows for the current user flows_from_current_user_in_project = [flow for flow in project.flows if flow.user_id == current_user.id] project.flows = flows_from_current_user_in_project - return project # noqa: TRY300 + + # Convert to FolderReadWithFlows while session is still active to avoid detached instance errors + return FolderReadWithFlows.model_validate(project, from_attributes=True) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e @@ -415,7 +421,7 @@ async def update_project( existing_project.parent_id = project.parent_id session.add(existing_project) - await session.commit() + await session.flush() await session.refresh(existing_project) # Start MCP Composer if auth changed to OAuth @@ -453,14 +459,15 @@ async def update_project( update(Flow).where(Flow.id.in_(excluded_flows)).values(folder_id=my_collection_project.id) # type: ignore[attr-defined] ) await session.exec(update_statement_my_collection) - await session.commit() if concat_project_components: update_statement_components = ( update(Flow).where(Flow.id.in_(concat_project_components)).values(folder_id=existing_project.id) # type: ignore[attr-defined] ) await session.exec(update_statement_components) - await session.commit() + + # Convert to FolderRead while session is still active to avoid detached instance errors + folder_read = FolderRead.model_validate(existing_project, from_attributes=True) except HTTPException: # Re-raise HTTP exceptions (like 409 conflicts) without modification @@ -468,7 +475,7 @@ async def update_project( except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e - return existing_project + return folder_read @router.delete("/{project_id}", status_code=204) @@ -562,7 +569,6 @@ async def delete_project( try: await session.delete(project) - await session.commit() return Response(status_code=status.HTTP_204_NO_CONTENT) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) from e @@ -655,7 +661,7 @@ async def upload_file( ) session.add(new_project) - await session.commit() + await session.flush() await session.refresh(new_project) del data["folder_name"] del data["folder_description"] diff --git a/src/backend/base/langflow/api/v1/users.py b/src/backend/base/langflow/api/v1/users.py index d25073894023..02f3df29a432 100644 --- a/src/backend/base/langflow/api/v1/users.py +++ b/src/backend/base/langflow/api/v1/users.py @@ -33,13 +33,12 @@ async def add_user( new_user.password = get_password_hash(user.password) new_user.is_active = get_settings_service().auth_settings.NEW_USER_IS_ACTIVE session.add(new_user) - await session.commit() + await session.flush() await session.refresh(new_user) folder = await get_or_create_default_folder(session, new_user.id) if not folder: raise HTTPException(status_code=500, detail="Error creating default project") except IntegrityError as e: - await session.rollback() raise HTTPException(status_code=400, detail="This username is unavailable.") from e return new_user @@ -113,11 +112,14 @@ async def reset_password( if not user: raise HTTPException(status_code=404, detail="User not found") + if verify_password(user_update.password, user.password): raise HTTPException(status_code=400, detail="You can't use your current password") + new_password = get_password_hash(user_update.password) user.password = new_password - await session.commit() + + await session.flush() await session.refresh(user) return user @@ -141,6 +143,4 @@ async def delete_user( raise HTTPException(status_code=404, detail="User not found") await session.delete(user_db) - await session.commit() - return {"detail": "User deleted"} diff --git a/src/backend/base/langflow/api/v2/files.py b/src/backend/base/langflow/api/v2/files.py index 1943e31fcdd0..8cb3b606ce99 100644 --- a/src/backend/base/langflow/api/v2/files.py +++ b/src/backend/base/langflow/api/v2/files.py @@ -28,6 +28,40 @@ SAMPLE_DATA_DIR = Path(__file__).parent / "sample_data" +def is_permanent_storage_failure(error: Exception) -> bool: + """Check if a storage deletion error is a permanent failure (file/storage gone). + + Permanent failures are safe to delete from DB because the file/storage is already gone. + Transient failures (network, permissions) should keep DB record for retry. + + Args: + error: The exception raised during storage deletion + + Returns: + True if this is a permanent failure (safe to delete from DB), False otherwise + """ + # Check for standard Python file not found errors (local storage) + if isinstance(error, FileNotFoundError): + return True + + # Check for S3 error codes (boto3/aioboto3) + # S3 errors have a 'response' attribute with Error.Code + if hasattr(error, "response"): + response = error.response + if isinstance(response, dict): + error_code = response.get("Error", {}).get("Code") + # Permanent failures: file/bucket doesn't exist + if error_code in ("NoSuchBucket", "NoSuchKey", "404"): + return True + + # Fallback: Check error message for known permanent failure patterns + # This is less ideal but provides a safety net for edge cases + error_str = str(error) + permanent_patterns = ("NoSuchBucket", "NoSuchKey", "not found", "FileNotFoundError") + + return any(pattern in error_str for pattern in permanent_patterns) + + async def get_mcp_file(current_user: CurrentActiveUser, *, extension: bool = False) -> str: # Create a unique MCP servers file with the user id appended return f"{MCP_SERVERS_FILE}_{current_user.id!s}" + (".json" if extension else "") @@ -69,7 +103,15 @@ async def fetch_file_object(file_id: uuid.UUID, current_user: CurrentActiveUser, return file -async def save_file_routine(file, storage_service, current_user: CurrentActiveUser, file_content=None, file_name=None): +async def save_file_routine( + file, + storage_service, + current_user: CurrentActiveUser, + file_content=None, + file_name=None, + *, + append: bool = False, +): """Routine to save the file content to the storage service.""" file_id = uuid.uuid4() @@ -79,7 +121,7 @@ async def save_file_routine(file, storage_service, current_user: CurrentActiveUs file_name = file.filename # Save the file using the storage service. - await storage_service.save_file(flow_id=str(current_user.id), file_name=file_name, data=file_content) + await storage_service.save_file(flow_id=str(current_user.id), file_name=file_name, data=file_content, append=append) return file_id, file_name @@ -92,6 +134,8 @@ async def upload_user_file( current_user: CurrentActiveUser, storage_service: Annotated[StorageService, Depends(get_storage_service)], settings_service: Annotated[SettingsService, Depends(get_settings_service)], + *, + append: bool = False, ) -> UploadFileResponse: """Upload a file for the current user and track it in the database.""" # Get the max allowed file size from settings (in MB) @@ -124,12 +168,27 @@ async def upload_user_file( mcp_file = await get_mcp_file(current_user) mcp_file_ext = await get_mcp_file(current_user, extension=True) + # Initialize existing_file for append mode + existing_file = None + if new_filename == mcp_file_ext: # Check if an existing record exists; if so, delete it to replace with the new one existing_mcp_file = await get_file_by_name(mcp_file, current_user, session) if existing_mcp_file: await delete_file(existing_mcp_file.id, current_user, session, storage_service) + # Flush the session to ensure the deletion is committed before creating the new file + await session.flush() unique_filename = new_filename + elif append: + # In append mode, check if file exists and reuse the same filename + existing_file = await get_file_by_name(root_filename, current_user, session) + if existing_file: + # File exists, append to it by reusing the same filename + # Extract the filename from the path + unique_filename = existing_file.path.split("/")[-1] if "/" in existing_file.path else existing_file.path + else: + # File doesn't exist yet, create new one with extension + unique_filename = f"{root_filename}.{file_extension}" if file_extension else root_filename else: # For normal files, ensure unique name by appending a count if necessary stmt = select(UserFile).where( @@ -153,32 +212,59 @@ async def upload_user_file( # Create the unique filename with extension for storage unique_filename = f"{root_filename}.{file_extension}" if file_extension else root_filename - # Read file content and save with unique filename + # Read file content, save with unique filename, and compute file size in one routine try: file_id, stored_file_name = await save_file_routine( - file, storage_service, current_user, file_name=unique_filename + file, storage_service, current_user, file_name=unique_filename, append=append + ) + file_size = await storage_service.get_file_size( + flow_id=str(current_user.id), + file_name=stored_file_name, ) + except FileNotFoundError as e: + # S3 bucket doesn't exist or file not found, or file was uploaded but can't be found + raise HTTPException(status_code=404, detail=str(e)) from e + except PermissionError as e: + # Access denied or invalid credentials + raise HTTPException(status_code=403, detail=str(e)) from e except Exception as e: - raise HTTPException(status_code=500, detail=f"Error saving file: {e}") from e - - # Compute the file size based on the path - file_size = await storage_service.get_file_size( - flow_id=str(current_user.id), - file_name=stored_file_name, - ) + # General error saving file or getting file size + raise HTTPException(status_code=500, detail=f"Error accessing file: {e}") from e + + if append and existing_file: + existing_file.size = file_size + session.add(existing_file) + await session.commit() + await session.refresh(existing_file) + new_file = existing_file + else: + # Create a new file record + new_file = UserFile( + id=file_id, + user_id=current_user.id, + name=root_filename, + path=f"{current_user.id}/{stored_file_name}", + size=file_size, + ) - # Create a new file record - new_file = UserFile( - id=file_id, - user_id=current_user.id, - name=root_filename, - path=f"{current_user.id}/{stored_file_name}", - size=file_size, - ) session.add(new_file) - - await session.commit() - await session.refresh(new_file) + try: + await session.flush() + await session.refresh(new_file) + except Exception as db_err: + # Database insert failed - clean up the uploaded file to avoid orphaned files + try: + await storage_service.delete_file(flow_id=str(current_user.id), file_name=stored_file_name) + except OSError as e: + # If delete fails, just log the error + await logger.aerror(f"Failed to clean up uploaded file {stored_file_name}: {e}") + + raise HTTPException( + status_code=500, detail=f"Error inserting file metadata into database: {db_err}" + ) from db_err + except HTTPException: + # Re-raise HTTP exceptions (like 409 conflicts) without modification + raise except Exception as e: # Optionally, you could also delete the file from disk if the DB insert fails. raise HTTPException(status_code=500, detail=f"Database error: {e}") from e @@ -241,7 +327,7 @@ async def load_sample_files(current_user: CurrentActiveUser, session: DbSession, session.add(sample_file) - await session.commit() + await session.flush() await session.refresh(sample_file) @@ -288,19 +374,84 @@ async def delete_files_batch( if not files: raise HTTPException(status_code=404, detail="No files found") + # Track storage deletion failures + storage_failures = [] + # Track database deletion failures + db_failures = [] + # Delete all files from the storage service for file in files: - await storage_service.delete_file(flow_id=str(current_user.id), file_name=file.path) - await session.delete(file) - - # Delete all files from the database - await session.commit() # Commit deletion + # Extract just the filename from the path (strip user_id prefix) + file_name = file.path.split("/")[-1] + storage_deleted = False + + try: + await storage_service.delete_file(flow_id=str(current_user.id), file_name=file_name) + storage_deleted = True + except OSError as err: + # Check if this is a "permanent" failure where file/storage is gone + # These are safe to delete from DB even if storage deletion failed + if is_permanent_storage_failure(err): + # File/storage is permanently gone - safe to delete from DB + await logger.awarning( + "File %s not found in storage (permanent failure), will remove from database: %s", + file_name, + err, + ) + storage_deleted = True # Treat as "deleted" for DB purposes + else: + # Transient failure (network, timeout, permissions) - keep in DB for retry + storage_failures.append(f"{file_name}: {err}") + await logger.awarning( + "Failed to delete file %s from storage (transient error, keeping in database for retry): %s", + file_name, + err, + ) + + # Only delete from database if storage deletion succeeded OR it was a permanent failure + if storage_deleted: + try: + await session.delete(file) + except OSError as db_error: + # Log database deletion failure but continue processing remaining files + db_failures.append(f"{file_name}: {db_error}") + await logger.aerror( + "Failed to delete file %s from database: %s", + file_name, + db_error, + ) + + # If there were storage failures, include them in the response + if storage_failures: + await logger.awarning( + "Batch delete completed with %d storage failures: %s", len(storage_failures), storage_failures + ) + # If there were database failures, log them + if db_failures: + await logger.aerror("Batch delete completed with %d database failures: %s", len(db_failures), db_failures) + # If all database deletions failed, raise an error + if len(db_failures) == len(files): + raise HTTPException(status_code=500, detail=f"Failed to delete any files from database: {db_failures}") + + # Calculate how many files were actually deleted from database + # Files successfully deleted = total - (kept due to transient storage failures) - (DB deletion failures) + files_deleted = len(files) - len(storage_failures) - len(db_failures) + files_kept = len(storage_failures) # Files with transient storage failures kept in DB + + # Build response message + if files_deleted == len(files): + message = f"{files_deleted} files deleted successfully" + elif files_deleted > 0: + message = f"{files_deleted} files deleted successfully" + if files_kept > 0: + message += f", {files_kept} files kept in database due to transient storage errors (can retry)" + else: + message = "No files were deleted from database" except Exception as e: - await session.rollback() # Rollback on failure raise HTTPException(status_code=500, detail=f"Error deleting files: {e}") from e - return {"message": f"{len(files)} files deleted successfully"} + return {"message": message} @router.post("/batch/", status_code=HTTPStatus.OK) @@ -352,6 +503,8 @@ async def download_files_batch( headers={"Content-Disposition": f"attachment; filename={filename}"}, ) + except FileNotFoundError as e: + raise HTTPException(status_code=404, detail=f"File not found: {e}") from e except Exception as e: raise HTTPException(status_code=500, detail=f"Error downloading files: {e}") from e @@ -422,18 +575,25 @@ async def download_file( # Get the basename of the file path file_name = file.path.split("/")[-1] - # Get file stream - file_stream = await storage_service.get_file(flow_id=str(current_user.id), file_name=file_name) - - if file_stream is None: - raise HTTPException(status_code=404, detail="File stream not available") - # If return_content is True, read the file content and return it if return_content: - return await read_file_content(file_stream, decode=True) - - # For streaming, ensure file_stream is an async iterator returning bytes - byte_stream = byte_stream_generator(file_stream) + # For content return, get the full file + file_content = await storage_service.get_file(flow_id=str(current_user.id), file_name=file_name) + if file_content is None: + raise HTTPException(status_code=404, detail="File not found") + return await read_file_content(file_content, decode=True) + + # For streaming, use the appropriate method based on storage type + if hasattr(storage_service, "get_file_stream"): + # S3 storage - use streaming method + file_stream = storage_service.get_file_stream(flow_id=str(current_user.id), file_name=file_name) + byte_stream = file_stream + else: + # Local storage - get file and convert to stream + file_content = await storage_service.get_file(flow_id=str(current_user.id), file_name=file_name) + if file_content is None: + raise HTTPException(status_code=404, detail="File not found") + byte_stream = byte_stream_generator(file_content) # Create the filename with extension file_extension = Path(file.path).suffix @@ -448,6 +608,8 @@ async def download_file( except HTTPException: raise + except FileNotFoundError as e: + raise HTTPException(status_code=404, detail=f"File not found: {e}") from e except Exception as e: raise HTTPException(status_code=500, detail=f"Error downloading file: {e}") from e @@ -466,7 +628,7 @@ async def edit_file_name( # Update the file name file.name = name - await session.commit() + session.add(file) except Exception as e: raise HTTPException(status_code=500, detail=f"Error editing file: {e}") from e @@ -487,13 +649,52 @@ async def delete_file( if not file_to_delete: raise HTTPException(status_code=404, detail="File not found") - # Delete the file from the storage service - await storage_service.delete_file(flow_id=str(current_user.id), file_name=file_to_delete.path) + # Extract just the filename from the path (strip user_id prefix) + file_name = file_to_delete.path.split("/")[-1] - # Delete from the database - await session.delete(file_to_delete) - await session.commit() + # Delete the file from the storage service first + storage_deleted = False + try: + await storage_service.delete_file(flow_id=str(current_user.id), file_name=file_name) + storage_deleted = True + except Exception as err: + # Check if this is a "permanent" failure where file/storage is gone + # These are safe to delete from DB even if storage deletion failed + if is_permanent_storage_failure(err): + await logger.awarning( + "File %s not found in storage (permanent failure), will remove from database: %s", + file_name, + err, + ) + storage_deleted = True + else: + # Transient failure (network, timeout, permissions) - keep in DB for retry + await logger.awarning( + "Failed to delete file %s from storage (transient error, keeping in database for retry): %s", + file_name, + err, + ) + # Don't delete from DB - user can retry + raise HTTPException( + status_code=500, + detail=f"Failed to delete file from storage. Please try again. Error: {err}", + ) from err + + # Only delete from database if storage deletion succeeded OR it was a permanent failure + if storage_deleted: + try: + await session.delete(file_to_delete) + except Exception as db_error: + await logger.aerror( + "Failed to delete file %s from database: %s", + file_to_delete.name, + db_error, + ) + raise HTTPException( + status_code=500, detail=f"Error deleting file from database: {db_error}" + ) from db_error + return {"detail": f"File {file_to_delete.name} deleted successfully"} except HTTPException: # Re-raise HTTPException to avoid being caught by the generic exception handler raise @@ -501,7 +702,6 @@ async def delete_file( # Log and return a generic server error await logger.aerror("Error deleting file %s: %s", file_id, e) raise HTTPException(status_code=500, detail=f"Error deleting file: {e}") from e - return {"detail": f"File {file_to_delete.name} deleted successfully"} @router.delete("") @@ -518,16 +718,77 @@ async def delete_all_files( results = await session.exec(stmt) files = results.all() + storage_failures = [] + db_failures = [] + # Delete all files from the storage service for file in files: - await storage_service.delete_file(flow_id=str(current_user.id), file_name=file.path) - await session.delete(file) + # Extract just the filename from the path (strip user_id prefix) + file_name = file.path.split("/")[-1] + storage_deleted = False + + try: + await storage_service.delete_file(flow_id=str(current_user.id), file_name=file_name) + storage_deleted = True + except OSError as err: + # Check if this is a "permanent" failure where file/storage is gone + # These are safe to delete from DB even if storage deletion failed + if is_permanent_storage_failure(err): + # File/storage is permanently gone - safe to delete from DB + await logger.awarning( + "File %s not found in storage, also removing from database: %s", + file_name, + err, + ) + storage_deleted = True + else: + # Transient failure (network, timeout, permissions) - keep in DB for retry + storage_failures.append(f"{file_name}: {err}") + await logger.awarning( + "Failed to delete file %s from storage (transient error, keeping in database for retry): %s", + file_name, + err, + ) + + # Only delete from database if storage deletion succeeded OR it was a permanent failure + if storage_deleted: + try: + await session.delete(file) + except OSError as db_error: + # Log database deletion failure but continue processing remaining files + db_failures.append(f"{file_name}: {db_error}") + await logger.aerror( + "Failed to delete file %s from database: %s", + file_name, + db_error, + ) + + if storage_failures: + await logger.awarning( + "Batch delete completed with %d storage failures: %s", len(storage_failures), storage_failures + ) - # Delete all files from the database - await session.commit() # Commit deletion + if db_failures: + await logger.aerror("Batch delete completed with %d database failures: %s", len(db_failures), db_failures) + # If all database deletions failed, raise an error + if len(db_failures) == len(files): + raise HTTPException(status_code=500, detail=f"Failed to delete any files from database: {db_failures}") + + # Calculate how many files were actually deleted from database + # Files successfully deleted = total - (kept due to transient storage failures) - (DB deletion failures) + files_deleted = len(files) - len(storage_failures) - len(db_failures) + files_kept = len(storage_failures) + len(db_failures) + + if files_deleted == len(files): + message = f"All {files_deleted} files deleted successfully" + elif files_deleted > 0: + message = f"{files_deleted} files deleted successfully" + if files_kept > 0: + message += f", {files_kept} files failed to delete. See logs for details." + else: + message = "Failed to delete files. See logs for details." except Exception as e: - await session.rollback() # Rollback on failure - raise HTTPException(status_code=500, detail=f"Error deleting files: {e}") from e + raise HTTPException(status_code=500, detail=f"Error deleting all files: {e}") from e - return {"message": "All files deleted successfully"} + return {"message": message} diff --git a/src/backend/base/langflow/helpers/flow.py b/src/backend/base/langflow/helpers/flow.py index 46f4b3810f33..fae1d1e390b5 100644 --- a/src/backend/base/langflow/helpers/flow.py +++ b/src/backend/base/langflow/helpers/flow.py @@ -6,7 +6,8 @@ from fastapi import HTTPException from lfx.log.logger import logger from pydantic.v1 import BaseModel, Field, create_model -from sqlmodel import select +from sqlalchemy.orm import aliased +from sqlmodel import asc, desc, select from langflow.schema.schema import INPUT_FIELD_NAME from langflow.services.database.models.flow.model import Flow, FlowRead @@ -19,13 +20,17 @@ from lfx.graph.schema import RunOutputs from lfx.graph.vertex.base import Vertex - from langflow.schema.data import Data +from langflow.schema.data import Data INPUT_TYPE_MAP = { "ChatInput": {"type_hint": "Optional[str]", "default": '""'}, "TextInput": {"type_hint": "Optional[str]", "default": '""'}, "JSONInput": {"type_hint": "Optional[dict]", "default": "{}"}, } +SORT_DISPATCHER = { + "asc": asc, + "desc": desc, +} async def list_flows(*, user_id: str | None = None) -> list[Data]: @@ -44,6 +49,120 @@ async def list_flows(*, user_id: str | None = None) -> list[Data]: raise ValueError(msg) from e +async def list_flows_by_flow_folder( + *, + user_id: str | None = None, + flow_id: str | None = None, + order_params: dict | None = {"column": "updated_at", "direction": "desc"}, # noqa: B006 +) -> list[Data]: + if not user_id: + msg = "Session is invalid" + raise ValueError(msg) + if not flow_id: + msg = "Flow ID is required" + raise ValueError(msg) + try: + async with session_scope() as session: + uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id + uuid_flow_id = UUID(flow_id) if isinstance(flow_id, str) else flow_id + # get all flows belonging to the specified user + # and inside the same folder as the specified flow + flow_ = aliased(Flow) # flow table alias, used to retrieve the folder + stmt = ( + select(Flow.id, Flow.name, Flow.updated_at) + .join(flow_, Flow.folder_id == flow_.folder_id) + .where(flow_.id == uuid_flow_id) + .where(flow_.user_id == uuid_user_id) + .where(Flow.user_id == uuid_user_id) + .where(Flow.id != uuid_flow_id) + ) + # sort flows by the specified column and direction + if order_params is not None: + sort_col = getattr(Flow, order_params.get("column", "updated_at"), Flow.updated_at) + sort_dir = SORT_DISPATCHER.get(order_params.get("direction", "desc"), desc) + stmt = stmt.order_by(sort_dir(sort_col)) + + flows = (await session.exec(stmt)).all() + return [Data(data=dict(flow._mapping)) for flow in flows] # noqa: SLF001 + except Exception as e: + msg = f"Error listing flows: {e}" + raise ValueError(msg) from e + + +async def list_flows_by_folder_id( + *, user_id: str | None = None, folder_id: str | None = None, order_params: dict | None = None +) -> list[Data]: + if not user_id: + msg = "Session is invalid" + raise ValueError(msg) + if not folder_id: + msg = "Folder ID is required" + raise ValueError(msg) + + if order_params is None: + order_params = {"column": "updated_at", "direction": "desc"} + + try: + async with session_scope() as session: + uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id + uuid_folder_id = UUID(folder_id) if isinstance(folder_id, str) else folder_id + stmt = ( + select(Flow.id, Flow.name, Flow.updated_at) + .where(Flow.user_id == uuid_user_id) + .where(Flow.folder_id == uuid_folder_id) + ) + if order_params is not None: + sort_col = getattr(Flow, order_params.get("column", "updated_at"), Flow.updated_at) + sort_dir = SORT_DISPATCHER.get(order_params.get("direction", "desc"), desc) + stmt = stmt.order_by(sort_dir(sort_col)) + + flows = (await session.exec(stmt)).all() + return [Data(data=dict(flow._mapping)) for flow in flows] # noqa: SLF001 + except Exception as e: + msg = f"Error listing flows: {e}" + raise ValueError(msg) from e + + +async def get_flow_by_id_or_name( + *, + user_id: str | None = None, + flow_id: str | None = None, + flow_name: str | None = None, +) -> Data | None: + if not user_id: + msg = "Session is invalid" + raise ValueError(msg) + if not (flow_id or flow_name): + msg = "Flow ID or Flow Name is required" + raise ValueError(msg) + + # set user provided flow id or flow name. + # if both are provided, flow_id is used. + attr, val = None, None + if flow_name: + attr = "name" + val = flow_name + if flow_id: + attr = "id" + val = flow_id + if not (attr and val): + msg = "Flow id or Name is required" + raise ValueError(msg) + try: + async with session_scope() as session: + uuid_user_id = UUID(user_id) if isinstance(user_id, str) else user_id # type: ignore[assignment] + uuid_flow_id_or_name = val # type: ignore[assignment] + if isinstance(val, str) and attr == "id": + uuid_flow_id_or_name = UUID(val) # type: ignore[assignment] + stmt = select(Flow).where(Flow.user_id == uuid_user_id).where(getattr(Flow, attr) == uuid_flow_id_or_name) + flow = (await session.exec(stmt)).first() + return flow.to_data() if flow else None + + except Exception as e: + msg = f"Error getting flow by id: {e}" + raise ValueError(msg) from e + + async def load_flow( user_id: str, flow_id: str | None = None, flow_name: str | None = None, tweaks: dict | None = None ) -> Graph: diff --git a/src/backend/base/langflow/helpers/user.py b/src/backend/base/langflow/helpers/user.py index 268b0dac08b4..34417acd508d 100644 --- a/src/backend/base/langflow/helpers/user.py +++ b/src/backend/base/langflow/helpers/user.py @@ -1,15 +1,15 @@ from uuid import UUID from fastapi import HTTPException +from lfx.services.deps import session_scope_readonly from sqlmodel import select from langflow.services.database.models.flow.model import Flow from langflow.services.database.models.user.model import User, UserRead -from langflow.services.deps import session_scope async def get_user_by_flow_id_or_endpoint_name(flow_id_or_name: str) -> UserRead | None: - async with session_scope() as session: + async with session_scope_readonly() as session: try: flow_id = UUID(flow_id_or_name) flow = await session.get(Flow, flow_id) diff --git a/src/backend/base/langflow/initial_setup/setup.py b/src/backend/base/langflow/initial_setup/setup.py index 953159f93428..06080987979a 100644 --- a/src/backend/base/langflow/initial_setup/setup.py +++ b/src/backend/base/langflow/initial_setup/setup.py @@ -702,7 +702,6 @@ async def delete_starter_projects(session, folder_id) -> None: flows = await get_all_flows_similar_to_project(session, folder_id) for flow in flows: await session.delete(flow) - await session.commit() async def folder_exists(session, folder_name): @@ -716,7 +715,7 @@ async def get_or_create_starter_folder(session): new_folder = FolderCreate(name=STARTER_FOLDER_NAME, description=STARTER_FOLDER_DESCRIPTION) db_folder = Folder.model_validate(new_folder, from_attributes=True) session.add(db_folder) - await session.commit() + await session.flush() await session.refresh(db_folder) return db_folder stmt = select(Folder).where(Folder.name == STARTER_FOLDER_NAME) @@ -1034,8 +1033,8 @@ async def upsert_flow_from_file(file_content: AnyStr, filename: str, session: As # Ensure that the flow is associated with an existing default folder if existing.folder_id is None: - folder_id = await get_or_create_default_folder(session, user_id) - existing.folder_id = folder_id + folder = await get_or_create_default_folder(session, user_id) + existing.folder_id = folder.id if isinstance(existing.id, str): try: @@ -1094,7 +1093,8 @@ async def create_or_update_starter_projects(all_types_dict: dict) -> None: # 1. Delete all existing starter projects successfully_updated_projects = 0 await delete_starter_projects(session, new_folder.id) - await copy_profile_pictures() + # Profile pictures are now served directly from the package installation directory + # No need to copy them to config_dir # 2. Update all starter projects with the latest component versions (this modifies the actual file data) for project_path, project in starter_projects: @@ -1245,7 +1245,7 @@ async def get_or_create_default_folder(session: AsyncSession, user_id: UUID) -> legacy_folder.description = DEFAULT_FOLDER_DESCRIPTION session.add(legacy_folder) try: - await session.commit() + await session.flush() await session.refresh(legacy_folder) return FolderRead.model_validate(legacy_folder, from_attributes=True) except sa.exc.IntegrityError: @@ -1257,7 +1257,7 @@ async def get_or_create_default_folder(session: AsyncSession, user_id: UUID) -> try: folder_obj = Folder(user_id=user_id, name=DEFAULT_FOLDER_NAME, description=DEFAULT_FOLDER_DESCRIPTION) session.add(folder_obj) - await session.commit() + await session.flush() await session.refresh(folder_obj) except sa.exc.IntegrityError as e: # Another worker may have created the folder concurrently. @@ -1294,7 +1294,7 @@ async def sync_flows_from_fs(): setattr(flow, field_name, new_value) if folder_id := update_data.get("folder_id"): flow.folder_id = UUID(folder_id) - await session.commit() + await session.flush() await session.refresh(flow) except Exception: # noqa: BLE001 await logger.aexception( diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json index fff9580895f2..bbcdbb49e997 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompt Chaining.json @@ -362,7 +362,7 @@ "legacy": false, "lf_version": "1.5.0", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -410,7 +410,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -632,7 +632,7 @@ "legacy": false, "lf_version": "1.5.0", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "cae45e2d53f6", "dependencies": { "dependencies": [ { @@ -706,7 +706,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id or self.graph.session_id or \"\"\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json index f65b36a724b8..969f0f19c1a5 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Basic Prompting.json @@ -117,7 +117,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -165,7 +165,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -584,7 +584,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "cae45e2d53f6", "dependencies": { "dependencies": [ { @@ -658,7 +658,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id or self.graph.session_id or \"\"\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json b/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json index 250ded6d972e..870c3c48e5a3 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Blog Writer.json @@ -352,7 +352,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "3dd28ea591b9", + "code_hash": "3d80f125b734", "dependencies": { "dependencies": [ { @@ -400,7 +400,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.io.text import TextComponent\nfrom lfx.io import MultilineInput, Output\nfrom lfx.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" + "value": "from lfx.base.io.text import TextComponent\nfrom lfx.io import MultilineInput, Output\nfrom lfx.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/text-input-and-output\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" }, "input_value": { "_input_type": "MultilineInput", @@ -477,7 +477,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "cae45e2d53f6", "dependencies": { "dependencies": [ { @@ -551,7 +551,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id or self.graph.session_id or \"\"\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -777,7 +777,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "17514953c7e8", + "code_hash": "3cda25c3f7b5", "dependencies": { "dependencies": [ { @@ -827,7 +827,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n # Use format_map with a dict that returns default_value for missing keys\n class DefaultDict(dict):\n def __missing__(self, key):\n return data.default_value or \"\"\n\n formatted_text = self.pattern.format_map(DefaultDict(data.data))\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" + "value": "from lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n # Use format_map with a dict that returns default_value for missing keys\n class DefaultDict(dict):\n def __missing__(self, key):\n return data.default_value or \"\"\n\n formatted_text = self.pattern.format_map(DefaultDict(data.data))\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, "input_data": { "_input_type": "HandleInput", @@ -973,7 +973,7 @@ "legacy": false, "lf_version": "1.4.2", "metadata": { - "code_hash": "cdb7d379306e", + "code_hash": "47d3ccb92d71", "dependencies": { "dependencies": [ { @@ -1085,7 +1085,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import importlib\nimport re\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom langchain_community.document_loaders import RecursiveUrlLoader\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.helpers.data import safe_convert\nfrom lfx.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SliderInput, TableInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.utils.request_utils import get_user_agent\n\n# Constants\nDEFAULT_TIMEOUT = 30\nDEFAULT_MAX_DEPTH = 1\nDEFAULT_FORMAT = \"Text\"\n\n\nURL_REGEX = re.compile(\n r\"^(https?:\\/\\/)?\" r\"(www\\.)?\" r\"([a-zA-Z0-9.-]+)\" r\"(\\.[a-zA-Z]{2,})?\" r\"(:\\d+)?\" r\"(\\/[^\\s]*)?$\",\n re.IGNORECASE,\n)\n\nUSER_AGENT = None\n# Check if langflow is installed using importlib.util.find_spec(name))\nif importlib.util.find_spec(\"langflow\"):\n langflow_installed = True\n USER_AGENT = get_user_agent()\nelse:\n langflow_installed = False\n USER_AGENT = \"lfx\"\n\n\nclass URLComponent(Component):\n \"\"\"A component that loads and parses content from web pages recursively.\n\n This component allows fetching content from one or more URLs, with options to:\n - Control crawl depth\n - Prevent crawling outside the root domain\n - Use async loading for better performance\n - Extract either raw HTML or clean text\n - Configure request headers and timeouts\n \"\"\"\n\n display_name = \"URL\"\n description = \"Fetch content from one or more web pages, following links recursively.\"\n documentation: str = \"https://docs.langflow.org/components-data#url\"\n icon = \"layout-template\"\n name = \"URLComponent\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs to crawl recursively, by clicking the '+' button.\",\n is_list=True,\n tool_mode=True,\n placeholder=\"Enter a URL...\",\n list_add_label=\"Add URL\",\n input_types=[],\n ),\n SliderInput(\n name=\"max_depth\",\n display_name=\"Depth\",\n info=(\n \"Controls how many 'clicks' away from the initial page the crawler will go:\\n\"\n \"- depth 1: only the initial page\\n\"\n \"- depth 2: initial page + all pages linked directly from it\\n\"\n \"- depth 3: initial page + direct links + links found on those direct link pages\\n\"\n \"Note: This is about link traversal, not URL path depth.\"\n ),\n value=DEFAULT_MAX_DEPTH,\n range_spec=RangeSpec(min=1, max=5, step=1),\n required=False,\n min_label=\" \",\n max_label=\" \",\n min_label_icon=\"None\",\n max_label_icon=\"None\",\n # slider_input=True\n ),\n BoolInput(\n name=\"prevent_outside\",\n display_name=\"Prevent Outside\",\n info=(\n \"If enabled, only crawls URLs within the same domain as the root URL. \"\n \"This helps prevent the crawler from going to external websites.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"use_async\",\n display_name=\"Use Async\",\n info=(\n \"If enabled, uses asynchronous loading which can be significantly faster \"\n \"but might use more system resources.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n DropdownInput(\n name=\"format\",\n display_name=\"Output Format\",\n info=\"Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.\",\n options=[\"Text\", \"HTML\"],\n value=DEFAULT_FORMAT,\n advanced=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n info=\"Timeout for the request in seconds.\",\n value=DEFAULT_TIMEOUT,\n required=False,\n advanced=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": USER_AGENT}],\n advanced=True,\n input_types=[\"DataFrame\"],\n ),\n BoolInput(\n name=\"filter_text_html\",\n display_name=\"Filter Text/HTML\",\n info=\"If enabled, filters out text/css content type from the results.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"continue_on_failure\",\n display_name=\"Continue on Failure\",\n info=\"If enabled, continues crawling even if some requests fail.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"check_response_status\",\n display_name=\"Check Response Status\",\n info=\"If enabled, checks the response status of the request.\",\n value=False,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"autoset_encoding\",\n display_name=\"Autoset Encoding\",\n info=\"If enabled, automatically sets the encoding of the request.\",\n value=True,\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Extracted Pages\", name=\"page_results\", method=\"fetch_content\"),\n Output(display_name=\"Raw Content\", name=\"raw_results\", method=\"fetch_content_as_message\", tool_mode=False),\n ]\n\n @staticmethod\n def validate_url(url: str) -> bool:\n \"\"\"Validates if the given string matches URL pattern.\n\n Args:\n url: The URL string to validate\n\n Returns:\n bool: True if the URL is valid, False otherwise\n \"\"\"\n return bool(URL_REGEX.match(url))\n\n def ensure_url(self, url: str) -> str:\n \"\"\"Ensures the given string is a valid URL.\n\n Args:\n url: The URL string to validate and normalize\n\n Returns:\n str: The normalized URL\n\n Raises:\n ValueError: If the URL is invalid\n \"\"\"\n url = url.strip()\n if not url.startswith((\"http://\", \"https://\")):\n url = \"https://\" + url\n\n if not self.validate_url(url):\n msg = f\"Invalid URL: {url}\"\n raise ValueError(msg)\n\n return url\n\n def _create_loader(self, url: str) -> RecursiveUrlLoader:\n \"\"\"Creates a RecursiveUrlLoader instance with the configured settings.\n\n Args:\n url: The URL to load\n\n Returns:\n RecursiveUrlLoader: Configured loader instance\n \"\"\"\n headers_dict = {header[\"key\"]: header[\"value\"] for header in self.headers if header[\"value\"] is not None}\n extractor = (lambda x: x) if self.format == \"HTML\" else (lambda x: BeautifulSoup(x, \"lxml\").get_text())\n\n return RecursiveUrlLoader(\n url=url,\n max_depth=self.max_depth,\n prevent_outside=self.prevent_outside,\n use_async=self.use_async,\n extractor=extractor,\n timeout=self.timeout,\n headers=headers_dict,\n check_response_status=self.check_response_status,\n continue_on_failure=self.continue_on_failure,\n base_url=url, # Add base_url to ensure consistent domain crawling\n autoset_encoding=self.autoset_encoding, # Enable automatic encoding detection\n exclude_dirs=[], # Allow customization of excluded directories\n link_regex=None, # Allow customization of link filtering\n )\n\n def fetch_url_contents(self) -> list[dict]:\n \"\"\"Load documents from the configured URLs.\n\n Returns:\n List[Data]: List of Data objects containing the fetched content\n\n Raises:\n ValueError: If no valid URLs are provided or if there's an error loading documents\n \"\"\"\n try:\n urls = list({self.ensure_url(url) for url in self.urls if url.strip()})\n logger.debug(f\"URLs: {urls}\")\n if not urls:\n msg = \"No valid URLs provided.\"\n raise ValueError(msg)\n\n all_docs = []\n for url in urls:\n logger.debug(f\"Loading documents from {url}\")\n\n try:\n loader = self._create_loader(url)\n docs = loader.load()\n\n if not docs:\n logger.warning(f\"No documents found for {url}\")\n continue\n\n logger.debug(f\"Found {len(docs)} documents from {url}\")\n all_docs.extend(docs)\n\n except requests.exceptions.RequestException as e:\n logger.exception(f\"Error loading documents from {url}: {e}\")\n continue\n\n if not all_docs:\n msg = \"No documents were successfully loaded from any URL\"\n raise ValueError(msg)\n\n # data = [Data(text=doc.page_content, **doc.metadata) for doc in all_docs]\n data = [\n {\n \"text\": safe_convert(doc.page_content, clean_data=True),\n \"url\": doc.metadata.get(\"source\", \"\"),\n \"title\": doc.metadata.get(\"title\", \"\"),\n \"description\": doc.metadata.get(\"description\", \"\"),\n \"content_type\": doc.metadata.get(\"content_type\", \"\"),\n \"language\": doc.metadata.get(\"language\", \"\"),\n }\n for doc in all_docs\n ]\n except Exception as e:\n error_msg = e.message if hasattr(e, \"message\") else e\n msg = f\"Error loading documents: {error_msg!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n return data\n\n def fetch_content(self) -> DataFrame:\n \"\"\"Convert the documents to a DataFrame.\"\"\"\n return DataFrame(data=self.fetch_url_contents())\n\n def fetch_content_as_message(self) -> Message:\n \"\"\"Convert the documents to a Message.\"\"\"\n url_contents = self.fetch_url_contents()\n return Message(text=\"\\n\\n\".join([x[\"text\"] for x in url_contents]), data={\"data\": url_contents})\n" + "value": "import importlib\nimport re\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom langchain_community.document_loaders import RecursiveUrlLoader\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.helpers.data import safe_convert\nfrom lfx.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SliderInput, TableInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.utils.request_utils import get_user_agent\n\n# Constants\nDEFAULT_TIMEOUT = 30\nDEFAULT_MAX_DEPTH = 1\nDEFAULT_FORMAT = \"Text\"\n\n\nURL_REGEX = re.compile(\n r\"^(https?:\\/\\/)?\" r\"(www\\.)?\" r\"([a-zA-Z0-9.-]+)\" r\"(\\.[a-zA-Z]{2,})?\" r\"(:\\d+)?\" r\"(\\/[^\\s]*)?$\",\n re.IGNORECASE,\n)\n\nUSER_AGENT = None\n# Check if langflow is installed using importlib.util.find_spec(name))\nif importlib.util.find_spec(\"langflow\"):\n langflow_installed = True\n USER_AGENT = get_user_agent()\nelse:\n langflow_installed = False\n USER_AGENT = \"lfx\"\n\n\nclass URLComponent(Component):\n \"\"\"A component that loads and parses content from web pages recursively.\n\n This component allows fetching content from one or more URLs, with options to:\n - Control crawl depth\n - Prevent crawling outside the root domain\n - Use async loading for better performance\n - Extract either raw HTML or clean text\n - Configure request headers and timeouts\n \"\"\"\n\n display_name = \"URL\"\n description = \"Fetch content from one or more web pages, following links recursively.\"\n documentation: str = \"https://docs.langflow.org/url\"\n icon = \"layout-template\"\n name = \"URLComponent\"\n\n inputs = [\n MessageTextInput(\n name=\"urls\",\n display_name=\"URLs\",\n info=\"Enter one or more URLs to crawl recursively, by clicking the '+' button.\",\n is_list=True,\n tool_mode=True,\n placeholder=\"Enter a URL...\",\n list_add_label=\"Add URL\",\n input_types=[],\n ),\n SliderInput(\n name=\"max_depth\",\n display_name=\"Depth\",\n info=(\n \"Controls how many 'clicks' away from the initial page the crawler will go:\\n\"\n \"- depth 1: only the initial page\\n\"\n \"- depth 2: initial page + all pages linked directly from it\\n\"\n \"- depth 3: initial page + direct links + links found on those direct link pages\\n\"\n \"Note: This is about link traversal, not URL path depth.\"\n ),\n value=DEFAULT_MAX_DEPTH,\n range_spec=RangeSpec(min=1, max=5, step=1),\n required=False,\n min_label=\" \",\n max_label=\" \",\n min_label_icon=\"None\",\n max_label_icon=\"None\",\n # slider_input=True\n ),\n BoolInput(\n name=\"prevent_outside\",\n display_name=\"Prevent Outside\",\n info=(\n \"If enabled, only crawls URLs within the same domain as the root URL. \"\n \"This helps prevent the crawler from going to external websites.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"use_async\",\n display_name=\"Use Async\",\n info=(\n \"If enabled, uses asynchronous loading which can be significantly faster \"\n \"but might use more system resources.\"\n ),\n value=True,\n required=False,\n advanced=True,\n ),\n DropdownInput(\n name=\"format\",\n display_name=\"Output Format\",\n info=\"Output Format. Use 'Text' to extract the text from the HTML or 'HTML' for the raw HTML content.\",\n options=[\"Text\", \"HTML\"],\n value=DEFAULT_FORMAT,\n advanced=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n info=\"Timeout for the request in seconds.\",\n value=DEFAULT_TIMEOUT,\n required=False,\n advanced=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": USER_AGENT}],\n advanced=True,\n input_types=[\"DataFrame\"],\n ),\n BoolInput(\n name=\"filter_text_html\",\n display_name=\"Filter Text/HTML\",\n info=\"If enabled, filters out text/css content type from the results.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"continue_on_failure\",\n display_name=\"Continue on Failure\",\n info=\"If enabled, continues crawling even if some requests fail.\",\n value=True,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"check_response_status\",\n display_name=\"Check Response Status\",\n info=\"If enabled, checks the response status of the request.\",\n value=False,\n required=False,\n advanced=True,\n ),\n BoolInput(\n name=\"autoset_encoding\",\n display_name=\"Autoset Encoding\",\n info=\"If enabled, automatically sets the encoding of the request.\",\n value=True,\n required=False,\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Extracted Pages\", name=\"page_results\", method=\"fetch_content\"),\n Output(display_name=\"Raw Content\", name=\"raw_results\", method=\"fetch_content_as_message\", tool_mode=False),\n ]\n\n @staticmethod\n def validate_url(url: str) -> bool:\n \"\"\"Validates if the given string matches URL pattern.\n\n Args:\n url: The URL string to validate\n\n Returns:\n bool: True if the URL is valid, False otherwise\n \"\"\"\n return bool(URL_REGEX.match(url))\n\n def ensure_url(self, url: str) -> str:\n \"\"\"Ensures the given string is a valid URL.\n\n Args:\n url: The URL string to validate and normalize\n\n Returns:\n str: The normalized URL\n\n Raises:\n ValueError: If the URL is invalid\n \"\"\"\n url = url.strip()\n if not url.startswith((\"http://\", \"https://\")):\n url = \"https://\" + url\n\n if not self.validate_url(url):\n msg = f\"Invalid URL: {url}\"\n raise ValueError(msg)\n\n return url\n\n def _create_loader(self, url: str) -> RecursiveUrlLoader:\n \"\"\"Creates a RecursiveUrlLoader instance with the configured settings.\n\n Args:\n url: The URL to load\n\n Returns:\n RecursiveUrlLoader: Configured loader instance\n \"\"\"\n headers_dict = {header[\"key\"]: header[\"value\"] for header in self.headers if header[\"value\"] is not None}\n extractor = (lambda x: x) if self.format == \"HTML\" else (lambda x: BeautifulSoup(x, \"lxml\").get_text())\n\n return RecursiveUrlLoader(\n url=url,\n max_depth=self.max_depth,\n prevent_outside=self.prevent_outside,\n use_async=self.use_async,\n extractor=extractor,\n timeout=self.timeout,\n headers=headers_dict,\n check_response_status=self.check_response_status,\n continue_on_failure=self.continue_on_failure,\n base_url=url, # Add base_url to ensure consistent domain crawling\n autoset_encoding=self.autoset_encoding, # Enable automatic encoding detection\n exclude_dirs=[], # Allow customization of excluded directories\n link_regex=None, # Allow customization of link filtering\n )\n\n def fetch_url_contents(self) -> list[dict]:\n \"\"\"Load documents from the configured URLs.\n\n Returns:\n List[Data]: List of Data objects containing the fetched content\n\n Raises:\n ValueError: If no valid URLs are provided or if there's an error loading documents\n \"\"\"\n try:\n urls = list({self.ensure_url(url) for url in self.urls if url.strip()})\n logger.debug(f\"URLs: {urls}\")\n if not urls:\n msg = \"No valid URLs provided.\"\n raise ValueError(msg)\n\n all_docs = []\n for url in urls:\n logger.debug(f\"Loading documents from {url}\")\n\n try:\n loader = self._create_loader(url)\n docs = loader.load()\n\n if not docs:\n logger.warning(f\"No documents found for {url}\")\n continue\n\n logger.debug(f\"Found {len(docs)} documents from {url}\")\n all_docs.extend(docs)\n\n except requests.exceptions.RequestException as e:\n logger.exception(f\"Error loading documents from {url}: {e}\")\n continue\n\n if not all_docs:\n msg = \"No documents were successfully loaded from any URL\"\n raise ValueError(msg)\n\n # data = [Data(text=doc.page_content, **doc.metadata) for doc in all_docs]\n data = [\n {\n \"text\": safe_convert(doc.page_content, clean_data=True),\n \"url\": doc.metadata.get(\"source\", \"\"),\n \"title\": doc.metadata.get(\"title\", \"\"),\n \"description\": doc.metadata.get(\"description\", \"\"),\n \"content_type\": doc.metadata.get(\"content_type\", \"\"),\n \"language\": doc.metadata.get(\"language\", \"\"),\n }\n for doc in all_docs\n ]\n except Exception as e:\n error_msg = e.message if hasattr(e, \"message\") else e\n msg = f\"Error loading documents: {error_msg!s}\"\n logger.exception(msg)\n raise ValueError(msg) from e\n return data\n\n def fetch_content(self) -> DataFrame:\n \"\"\"Convert the documents to a DataFrame.\"\"\"\n return DataFrame(data=self.fetch_url_contents())\n\n def fetch_content_as_message(self) -> Message:\n \"\"\"Convert the documents to a Message.\"\"\"\n url_contents = self.fetch_url_contents()\n return Message(text=\"\\n\\n\".join([x[\"text\"] for x in url_contents]), data={\"data\": url_contents})\n" }, "continue_on_failure": { "_input_type": "BoolInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json b/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json index 61e033900cb2..e69b8ab92b79 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Custom Component Generator.json @@ -237,7 +237,7 @@ "legacy": false, "lf_version": "1.6.0", "metadata": { - "code_hash": "227e053b4704", + "code_hash": "efd064ef48ff", "dependencies": { "dependencies": [ { @@ -299,7 +299,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any, cast\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import data_to_text\nfrom lfx.inputs.inputs import DropdownInput, HandleInput, IntInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.memory import aget_messages, astore_message\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.dotdict import dotdict\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\nfrom lfx.utils.component_utils import set_current_fields, set_field_display\nfrom lfx.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass MemoryComponent(Component):\n display_name = \"Message History\"\n description = \"Stores or retrieves stored chat messages from Langflow tables or an external memory.\"\n documentation: str = \"https://docs.langflow.org/components-helpers#message-history\"\n icon = \"message-square-more\"\n name = \"Memory\"\n default_keys = [\"mode\", \"memory\", \"session_id\", \"context_id\"]\n mode_config = {\n \"Store\": [\"message\", \"memory\", \"sender\", \"sender_name\", \"session_id\", \"context_id\"],\n \"Retrieve\": [\"n_messages\", \"order\", \"template\", \"memory\", \"session_id\", \"context_id\"],\n }\n\n inputs = [\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Retrieve\", \"Store\"],\n value=\"Retrieve\",\n info=\"Operation mode: Store messages or Retrieve messages.\",\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"message\",\n display_name=\"Message\",\n info=\"The chat message to be stored.\",\n tool_mode=True,\n dynamic=True,\n show=False,\n ),\n HandleInput(\n name=\"memory\",\n display_name=\"External Memory\",\n input_types=[\"Memory\"],\n info=\"Retrieve messages from an external memory. If empty, it will use the Langflow tables.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"sender_type\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER, \"Machine and User\"],\n value=\"Machine and User\",\n info=\"Filter by sender type.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender\",\n display_name=\"Sender\",\n info=\"The sender of the message. Might be Machine or User. \"\n \"If empty, the current sender parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Filter by sender name.\",\n advanced=True,\n show=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Messages\",\n value=100,\n info=\"Number of messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n DropdownInput(\n name=\"order\",\n display_name=\"Order\",\n options=[\"Ascending\", \"Descending\"],\n value=\"Ascending\",\n info=\"Order of the messages.\",\n advanced=True,\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {sender} or any other key in the message data.\",\n value=\"{sender_name}: {text}\",\n advanced=True,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Message\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True),\n Output(display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True),\n ]\n\n def update_outputs(self, frontend_node: dict, field_name: str, field_value: Any) -> dict:\n \"\"\"Dynamically show only the relevant output based on the selected output type.\"\"\"\n if field_name == \"mode\":\n # Start with empty outputs\n frontend_node[\"outputs\"] = []\n if field_value == \"Store\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Stored Messages\",\n name=\"stored_messages\",\n method=\"store_message\",\n hidden=True,\n dynamic=True,\n )\n ]\n if field_value == \"Retrieve\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Messages\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True\n ),\n Output(\n display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True\n ),\n ]\n return frontend_node\n\n async def store_message(self) -> Message:\n message = Message(text=self.message) if isinstance(self.message, str) else self.message\n\n message.context_id = self.context_id or message.context_id\n message.session_id = self.session_id or message.session_id\n message.sender = self.sender or message.sender or MESSAGE_SENDER_AI\n message.sender_name = self.sender_name or message.sender_name or MESSAGE_SENDER_NAME_AI\n\n stored_messages: list[Message] = []\n\n if self.memory:\n self.memory.context_id = message.context_id\n self.memory.session_id = message.session_id\n lc_message = message.to_lc_message()\n await self.memory.aadd_messages([lc_message])\n\n stored_messages = await self.memory.aget_messages() or []\n\n stored_messages = [Message.from_lc_message(m) for m in stored_messages] if stored_messages else []\n\n if message.sender:\n stored_messages = [m for m in stored_messages if m.sender == message.sender]\n else:\n await astore_message(message, flow_id=self.graph.flow_id)\n stored_messages = (\n await aget_messages(\n session_id=message.session_id,\n context_id=message.context_id,\n sender_name=message.sender_name,\n sender=message.sender,\n )\n or []\n )\n\n if not stored_messages:\n msg = \"No messages were stored. Please ensure that the session ID and sender are properly set.\"\n raise ValueError(msg)\n\n stored_message = stored_messages[0]\n self.status = stored_message\n return stored_message\n\n async def retrieve_messages(self) -> Data:\n sender_type = self.sender_type\n sender_name = self.sender_name\n session_id = self.session_id\n context_id = self.context_id\n n_messages = self.n_messages\n order = \"DESC\" if self.order == \"Descending\" else \"ASC\"\n\n if sender_type == \"Machine and User\":\n sender_type = None\n\n if self.memory and not hasattr(self.memory, \"aget_messages\"):\n memory_name = type(self.memory).__name__\n err_msg = f\"External Memory object ({memory_name}) must have 'aget_messages' method.\"\n raise AttributeError(err_msg)\n # Check if n_messages is None or 0\n if n_messages == 0:\n stored = []\n elif self.memory:\n # override session_id\n self.memory.session_id = session_id\n self.memory.context_id = context_id\n\n stored = await self.memory.aget_messages()\n # langchain memories are supposed to return messages in ascending order\n\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages first\n\n if order == \"DESC\":\n stored = stored[::-1] # Then reverse if needed\n\n stored = [Message.from_lc_message(m) for m in stored]\n if sender_type:\n expected_type = MESSAGE_SENDER_AI if sender_type == MESSAGE_SENDER_AI else MESSAGE_SENDER_USER\n stored = [m for m in stored if m.type == expected_type]\n else:\n # For internal memory, we always fetch the last N messages by ordering by DESC\n stored = await aget_messages(\n sender=sender_type,\n sender_name=sender_name,\n session_id=session_id,\n context_id=context_id,\n limit=10000,\n order=order,\n )\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages\n\n # self.status = stored\n return cast(\"Data\", stored)\n\n async def retrieve_messages_as_text(self) -> Message:\n stored_text = data_to_text(self.template, await self.retrieve_messages())\n # self.status = stored_text\n return Message(text=stored_text)\n\n async def retrieve_messages_dataframe(self) -> DataFrame:\n \"\"\"Convert the retrieved messages into a DataFrame.\n\n Returns:\n DataFrame: A DataFrame containing the message data.\n \"\"\"\n messages = await self.retrieve_messages()\n return DataFrame(messages)\n\n def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any, # noqa: ARG002\n field_name: str | None = None, # noqa: ARG002\n ) -> dotdict:\n return set_current_fields(\n build_config=build_config,\n action_fields=self.mode_config,\n selected_action=build_config[\"mode\"][\"value\"],\n default_fields=self.default_keys,\n func=set_field_display,\n )\n" + "value": "from typing import Any, cast\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import data_to_text\nfrom lfx.inputs.inputs import DropdownInput, HandleInput, IntInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.memory import aget_messages, astore_message\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.dotdict import dotdict\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\nfrom lfx.utils.component_utils import set_current_fields, set_field_display\nfrom lfx.utils.constants import MESSAGE_SENDER_AI, MESSAGE_SENDER_NAME_AI, MESSAGE_SENDER_USER\n\n\nclass MemoryComponent(Component):\n display_name = \"Message History\"\n description = \"Stores or retrieves stored chat messages from Langflow tables or an external memory.\"\n documentation: str = \"https://docs.langflow.org/message-history\"\n icon = \"message-square-more\"\n name = \"Memory\"\n default_keys = [\"mode\", \"memory\", \"session_id\", \"context_id\"]\n mode_config = {\n \"Store\": [\"message\", \"memory\", \"sender\", \"sender_name\", \"session_id\", \"context_id\"],\n \"Retrieve\": [\"n_messages\", \"order\", \"template\", \"memory\", \"session_id\", \"context_id\"],\n }\n\n inputs = [\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Retrieve\", \"Store\"],\n value=\"Retrieve\",\n info=\"Operation mode: Store messages or Retrieve messages.\",\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"message\",\n display_name=\"Message\",\n info=\"The chat message to be stored.\",\n tool_mode=True,\n dynamic=True,\n show=False,\n ),\n HandleInput(\n name=\"memory\",\n display_name=\"External Memory\",\n input_types=[\"Memory\"],\n info=\"Retrieve messages from an external memory. If empty, it will use the Langflow tables.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"sender_type\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER, \"Machine and User\"],\n value=\"Machine and User\",\n info=\"Filter by sender type.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender\",\n display_name=\"Sender\",\n info=\"The sender of the message. Might be Machine or User. \"\n \"If empty, the current sender parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Filter by sender name.\",\n advanced=True,\n show=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Messages\",\n value=100,\n info=\"Number of messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n DropdownInput(\n name=\"order\",\n display_name=\"Order\",\n options=[\"Ascending\", \"Descending\"],\n value=\"Ascending\",\n info=\"Order of the messages.\",\n advanced=True,\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {sender} or any other key in the message data.\",\n value=\"{sender_name}: {text}\",\n advanced=True,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Message\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True),\n Output(display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True),\n ]\n\n def update_outputs(self, frontend_node: dict, field_name: str, field_value: Any) -> dict:\n \"\"\"Dynamically show only the relevant output based on the selected output type.\"\"\"\n if field_name == \"mode\":\n # Start with empty outputs\n frontend_node[\"outputs\"] = []\n if field_value == \"Store\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Stored Messages\",\n name=\"stored_messages\",\n method=\"store_message\",\n hidden=True,\n dynamic=True,\n )\n ]\n if field_value == \"Retrieve\":\n frontend_node[\"outputs\"] = [\n Output(\n display_name=\"Messages\", name=\"messages_text\", method=\"retrieve_messages_as_text\", dynamic=True\n ),\n Output(\n display_name=\"Dataframe\", name=\"dataframe\", method=\"retrieve_messages_dataframe\", dynamic=True\n ),\n ]\n return frontend_node\n\n async def store_message(self) -> Message:\n message = Message(text=self.message) if isinstance(self.message, str) else self.message\n\n message.context_id = self.context_id or message.context_id\n message.session_id = self.session_id or message.session_id\n message.sender = self.sender or message.sender or MESSAGE_SENDER_AI\n message.sender_name = self.sender_name or message.sender_name or MESSAGE_SENDER_NAME_AI\n\n stored_messages: list[Message] = []\n\n if self.memory:\n self.memory.context_id = message.context_id\n self.memory.session_id = message.session_id\n lc_message = message.to_lc_message()\n await self.memory.aadd_messages([lc_message])\n\n stored_messages = await self.memory.aget_messages() or []\n\n stored_messages = [Message.from_lc_message(m) for m in stored_messages] if stored_messages else []\n\n if message.sender:\n stored_messages = [m for m in stored_messages if m.sender == message.sender]\n else:\n await astore_message(message, flow_id=self.graph.flow_id)\n stored_messages = (\n await aget_messages(\n session_id=message.session_id,\n context_id=message.context_id,\n sender_name=message.sender_name,\n sender=message.sender,\n )\n or []\n )\n\n if not stored_messages:\n msg = \"No messages were stored. Please ensure that the session ID and sender are properly set.\"\n raise ValueError(msg)\n\n stored_message = stored_messages[0]\n self.status = stored_message\n return stored_message\n\n async def retrieve_messages(self) -> Data:\n sender_type = self.sender_type\n sender_name = self.sender_name\n session_id = self.session_id\n context_id = self.context_id\n n_messages = self.n_messages\n order = \"DESC\" if self.order == \"Descending\" else \"ASC\"\n\n if sender_type == \"Machine and User\":\n sender_type = None\n\n if self.memory and not hasattr(self.memory, \"aget_messages\"):\n memory_name = type(self.memory).__name__\n err_msg = f\"External Memory object ({memory_name}) must have 'aget_messages' method.\"\n raise AttributeError(err_msg)\n # Check if n_messages is None or 0\n if n_messages == 0:\n stored = []\n elif self.memory:\n # override session_id\n self.memory.session_id = session_id\n self.memory.context_id = context_id\n\n stored = await self.memory.aget_messages()\n # langchain memories are supposed to return messages in ascending order\n\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages first\n\n if order == \"DESC\":\n stored = stored[::-1] # Then reverse if needed\n\n stored = [Message.from_lc_message(m) for m in stored]\n if sender_type:\n expected_type = MESSAGE_SENDER_AI if sender_type == MESSAGE_SENDER_AI else MESSAGE_SENDER_USER\n stored = [m for m in stored if m.type == expected_type]\n else:\n # For internal memory, we always fetch the last N messages by ordering by DESC\n stored = await aget_messages(\n sender=sender_type,\n sender_name=sender_name,\n session_id=session_id,\n context_id=context_id,\n limit=10000,\n order=order,\n )\n if n_messages:\n stored = stored[-n_messages:] # Get last N messages\n\n # self.status = stored\n return cast(\"Data\", stored)\n\n async def retrieve_messages_as_text(self) -> Message:\n stored_text = data_to_text(self.template, await self.retrieve_messages())\n # self.status = stored_text\n return Message(text=stored_text)\n\n async def retrieve_messages_dataframe(self) -> DataFrame:\n \"\"\"Convert the retrieved messages into a DataFrame.\n\n Returns:\n DataFrame: A DataFrame containing the message data.\n \"\"\"\n messages = await self.retrieve_messages()\n return DataFrame(messages)\n\n def update_build_config(\n self,\n build_config: dotdict,\n field_value: Any, # noqa: ARG002\n field_name: str | None = None, # noqa: ARG002\n ) -> dotdict:\n return set_current_fields(\n build_config=build_config,\n action_fields=self.mode_config,\n selected_action=build_config[\"mode\"][\"value\"],\n default_fields=self.default_keys,\n func=set_field_display,\n )\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -1957,7 +1957,7 @@ "legacy": false, "lf_version": "1.6.0", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -2007,7 +2007,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -2237,7 +2237,7 @@ "key": "ChatOutput", "legacy": false, "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "cae45e2d53f6", "dependencies": { "dependencies": [ { @@ -2313,7 +2313,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id or self.graph.session_id or \"\"\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json index 75541d187c29..7011788d8bf0 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Document Q&A.json @@ -147,7 +147,7 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "0014a5b41817", + "code_hash": "7a26c54d89ed", "dependencies": { "dependencies": [ { @@ -195,7 +195,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-input\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=self.session_id,\n context_id=self.context_id,\n files=files,\n )\n if self.session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" + "value": "from lfx.base.data.utils import IMG_FILE_TYPES, TEXT_FILE_TYPES\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.inputs.inputs import BoolInput\nfrom lfx.io import (\n DropdownInput,\n FileInput,\n MessageTextInput,\n MultilineInput,\n Output,\n)\nfrom lfx.schema.message import Message\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_USER,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatInput(ChatComponent):\n display_name = \"Chat Input\"\n description = \"Get chat inputs from the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatInput\"\n minimized = True\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Text\",\n value=\"\",\n info=\"Message to be passed as input.\",\n input_types=[],\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_USER,\n info=\"Type of sender.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_USER,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n FileInput(\n name=\"files\",\n display_name=\"Files\",\n file_types=TEXT_FILE_TYPES + IMG_FILE_TYPES,\n info=\"Files to be sent with the message.\",\n advanced=True,\n is_list=True,\n temp_file=True,\n ),\n ]\n outputs = [\n Output(display_name=\"Chat Message\", name=\"message\", method=\"message_response\"),\n ]\n\n async def message_response(self) -> Message:\n # Ensure files is a list and filter out empty/None values\n files = self.files if self.files else []\n if files and not isinstance(files, list):\n files = [files]\n # Filter out None/empty values\n files = [f for f in files if f is not None and f != \"\"]\n\n session_id = self.session_id or self.graph.session_id or \"\"\n message = await Message.create(\n text=self.input_value,\n sender=self.sender,\n sender_name=self.sender_name,\n session_id=session_id,\n context_id=self.context_id,\n files=files,\n )\n if session_id and isinstance(message, Message) and self.should_store_message:\n stored_message = await self.send_message(\n message,\n )\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -411,7 +411,7 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "4848ad3e35d5", + "code_hash": "cae45e2d53f6", "dependencies": { "dependencies": [ { @@ -485,7 +485,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.base.io.chat import ChatComponent\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.schema.properties import Source\nfrom lfx.template.field.base import Output\nfrom lfx.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/chat-input-and-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"context_id\",\n display_name=\"Context ID\",\n info=\"The context ID of the chat. Adds an extra layer to the local memory.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n BoolInput(\n name=\"clean_data\",\n display_name=\"Basic Clean Data\",\n value=True,\n advanced=True,\n info=\"Whether to clean data before converting to string.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, _, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message) and not self.is_connected_to_chat_input():\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id or self.graph.session_id or \"\"\n message.context_id = self.context_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if message.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, "context_id": { "_input_type": "MessageTextInput", @@ -1207,7 +1207,7 @@ "legacy": false, "lf_version": "1.4.3", "metadata": { - "code_hash": "85abc1094130", + "code_hash": "9ff3bde1cf43", "dependencies": { "dependencies": [ { @@ -1275,7 +1275,7 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame # noqa: TC001\nfrom lfx.schema.message import Message\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n display_name = \"Read File\"\n description = \"Loads content from one or more files.\"\n documentation: str = \"https://docs.langflow.org/components-data#file\"\n icon = \"file-text\"\n name = \"File\"\n\n # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n VALID_EXTENSIONS = [\n *TEXT_FILE_TYPES,\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"jpg\",\n \"jpeg\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"webp\",\n ]\n\n # Fixed export settings used when markdown export is requested.\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n break\n\n inputs = [\n *_base_inputs,\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Note that advanced document processing can consume significant resources.\"\n ),\n show=True,\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"None\", \"easyocr\"],\n value=\"easyocr\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n # Deprecated input retained for backward-compatibility.\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n ]\n\n # ------------------------------ UI helpers --------------------------------------\n\n def _path_value(self, template: dict) -> list[str]:\n \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n if field_name == \"path\":\n paths = self._path_value(build_config)\n\n # If all files can be processed by docling, do so\n allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n if not allow_advanced:\n build_config[\"advanced_mode\"][\"value\"] = False\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = False\n\n # Docling Processing\n elif field_name == \"advanced_mode\":\n for f in (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\"):\n if f in build_config:\n build_config[f][\"show\"] = bool(field_value)\n if f == \"pipeline\":\n build_config[f][\"advanced\"] = not bool(field_value)\n\n elif field_name == \"pipeline\":\n if field_value == \"standard\":\n build_config[\"ocr_engine\"][\"show\"] = True\n build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n else:\n build_config[\"ocr_engine\"][\"show\"] = False\n build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n return frontend_node\n\n template = frontend_node.get(\"template\", {})\n paths = self._path_value(template)\n if not paths:\n return frontend_node\n\n frontend_node[\"outputs\"] = []\n if len(paths) == 1:\n file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"dataframe\", method=\"load_files_structured\"),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\"),\n )\n\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n if advanced_mode:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Output\", name=\"advanced_dataframe\", method=\"load_files_dataframe\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # Multiple files => DataFrame output; advanced parser disabled\n frontend_node[\"outputs\"].append(Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\"))\n\n return frontend_node\n\n # ------------------------------ Core processing ----------------------------------\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n docling_exts = (\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpg\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n )\n return file_path.lower().endswith(docling_exts)\n\n def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n We avoid multiprocessing pickling by launching `python -c \"