Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
b453a72
feat: add category column to variable model and validation logic
ogabrielluiz Mar 14, 2025
dcf8d0c
feat: add LLM loading and utility functions
ogabrielluiz Mar 14, 2025
ee37cc8
feat: enhance variable management with category support and LLM settings
ogabrielluiz Mar 14, 2025
b1b862a
feat: implement async LLM generation and loading in Component
ogabrielluiz Mar 14, 2025
5699903
test: add unit and integration tests for LLM loading functionality
ogabrielluiz Mar 14, 2025
a9b0d26
feat: add hook to retrieve variables by category and enhance global v…
ogabrielluiz Mar 14, 2025
4890a45
feat: add LLM settings page and integrate into settings navigation
ogabrielluiz Mar 14, 2025
f183436
feat: enhance validation for LLM settings in DatabaseVariableService
ogabrielluiz Mar 14, 2025
2dda874
feat: add async method to retrieve global LLM instance
ogabrielluiz Mar 14, 2025
205f9b1
feat: add nullable category column to variable table in migration script
ogabrielluiz Apr 7, 2025
0fcb3d8
fix: update down_revision in migration script for category column add…
ogabrielluiz Apr 16, 2025
9fc688c
Merge branch 'main' into global-llm
ogabrielluiz Sep 17, 2025
7408646
refactor: remove unused LLM attribute from Component class
ogabrielluiz Sep 17, 2025
0dddf96
feat: add abstract method for retrieving variables by category
ogabrielluiz Sep 17, 2025
320f8ad
feat: add CATEGORY_KB to valid categories in constants
ogabrielluiz Sep 17, 2025
30effce
feat: add non-nullable category column to variable table
ogabrielluiz Sep 17, 2025
9ff9226
chore: clean up package-lock.json by removing deprecated and unused d…
ogabrielluiz Sep 17, 2025
a9a9e02
feat: replace LLMSettingsPage with KBSettingsPage for Knowledge Base …
ogabrielluiz Sep 17, 2025
7825f52
feat: implement vector store factory for Knowledge Bases
ogabrielluiz Sep 17, 2025
6c19fa1
feat: add provider-aware metadata adapters for Knowledge Bases
ogabrielluiz Sep 17, 2025
45a6cfa
feat: enhance metadata extraction for Knowledge Bases with user-aware…
ogabrielluiz Sep 17, 2025
6a42bc5
feat: refactor vector store integration for Knowledge Bases
ogabrielluiz Sep 17, 2025
ba9a795
feat: add unit tests for knowledge bases functionality
ogabrielluiz Sep 17, 2025
51da64d
feat: add unit tests for knowledge bases functionality
ogabrielluiz Sep 17, 2025
4c4e15e
Merge branch 'main' into global-llm
ogabrielluiz Sep 18, 2025
d33858b
feat: enhance OpenSearch vector store integration with new adapter an…
ogabrielluiz Sep 18, 2025
dd95e47
fix: improve OpenSearch metadata adapter functionality and error hand…
ogabrielluiz Sep 18, 2025
07866f5
refactor: enhance OpenSearch with classmethods
ogabrielluiz Sep 18, 2025
74bf0d2
chore: add blank line for improved readability in test file
ogabrielluiz Sep 18, 2025
de4cdf2
refactor: update OpenSearch vector store tests to use new adapter
ogabrielluiz Sep 18, 2025
0227bd8
refactor: update OpenSearch vector store tests to use real adapter
ogabrielluiz Sep 18, 2025
d62f78e
Merge branch 'main' of https://github.com/langflow-ai/langflow into g…
deon-sanchez Oct 16, 2025
4a658ca
chore: update component index
github-actions[bot] Oct 16, 2025
a8ee772
Merge branch 'main' into global-llm
deon-sanchez Oct 16, 2025
0e605b4
chore: update component index
github-actions[bot] Oct 16, 2025
7739c56
Update src/backend/base/langflow/alembic/versions/e5fc330efa7c_add_ca…
erichare Oct 16, 2025
1bb4bb0
Update src/frontend/src/pages/SettingsPage/KBSettingsPage.tsx
erichare Oct 16, 2025
b65362b
Update src/backend/tests/unit/base/knowledge_bases/test_database_fact…
erichare Oct 16, 2025
7d3fcb2
Update src/frontend/src/pages/SettingsPage/KBSettingsPage.tsx
erichare Oct 16, 2025
11d58c2
chore: update component index
github-actions[bot] Oct 16, 2025
ebaa39d
Merge branch 'main' into global-llm
erichare Oct 16, 2025
c8062cc
Update src/backend/tests/unit/base/knowledge_bases/test_metadata_adap…
erichare Oct 16, 2025
763880d
Update src/backend/tests/unit/base/knowledge_bases/test_database_fact…
erichare Oct 16, 2025
2dcc710
chore: update component index
github-actions[bot] Oct 16, 2025
5b224c7
Update vector_store_factory.py
erichare Oct 16, 2025
db4fd7c
chore: update component index
github-actions[bot] Oct 16, 2025
8b8f97c
Update KBSettingsPage.tsx
erichare Oct 16, 2025
d328c49
[autofix.ci] apply automated fixes
autofix-ci[bot] Oct 16, 2025
069d157
Update e5fc330efa7c_add_category_column_in_variable.py
erichare Oct 16, 2025
d6422c5
chore: update component index
github-actions[bot] Oct 16, 2025
99fce7f
Update e5fc330efa7c_add_category_column_in_variable.py
erichare Oct 16, 2025
86c7b8b
chore: update component index
github-actions[bot] Oct 16, 2025
54f8f19
Merge branch 'main' into global-llm
erichare Oct 16, 2025
0734ced
chore: update component index
github-actions[bot] Oct 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Merge multiple heads.

Revision ID: c263b157d227
Revises: d37bc4322900, e5fc330efa7c
Create Date: 2025-10-16 10:30:00.000000

"""

from collections.abc import Sequence

# revision identifiers, used by Alembic.
revision: str = "c263b157d227" # pragma: allowlist secret
down_revision: str | Sequence[str] | None = ("d37bc4322900", "e5fc330efa7c") # pragma: allowlist secret
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Merge multiple heads - no changes needed."""


def downgrade() -> None:
"""Downgrade merge - no changes needed."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Add category column in variable.

Revision ID: e5fc330efa7c
Revises: 0882f9657f22
Create Date: 2025-03-14 15:12:39.234016

"""

from collections.abc import Sequence

import sqlalchemy as sa
import sqlmodel
from alembic import op

# revision identifiers, used by Alembic.
revision: str = "e5fc330efa7c"
down_revision: str | None = "0882f9657f22"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn) # type: ignore[arg-type]
# ### commands auto generated by Alembic - please adjust! ###
columns = inspector.get_columns("variable")
column_names = [column["name"] for column in columns]

with op.batch_alter_table("variable", schema=None) as batch_op:
if "category" not in column_names:
# Add new column as non-nullable with default
batch_op.add_column(
sa.Column("category", sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default="global")
)
else:
# Check if column exists but is nullable - need to make it non-nullable
category_column = next((col for col in columns if col["name"] == "category"), None)
if category_column and category_column.get("nullable", True):
# First update any NULL values to 'global'
op.execute("UPDATE variable SET category = 'global' WHERE category IS NULL")
# Then make the column non-nullable
batch_op.alter_column("category", nullable=False, server_default="global")

# ### end Alembic commands ###


def downgrade() -> None:
# Get current connection and inspector
conn = op.get_bind()
inspector = sa.inspect(conn) # type: ignore[arg-type]
# ### commands auto generated by Alembic - please adjust! ###
column_names = [column["name"] for column in inspector.get_columns("variable")]
with op.batch_alter_table("variable", schema=None) as batch_op:
if "category" in column_names:
batch_op.drop_column("category")

# ### end Alembic commands ###
117 changes: 41 additions & 76 deletions src/backend/base/langflow/api/v1/knowledge_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

import pandas as pd
from fastapi import APIRouter, HTTPException
from langchain_chroma import Chroma
from lfx.log import logger
from pydantic import BaseModel

from langflow.api.utils import CurrentActiveUser
from langflow.services.deps import get_settings_service
from langflow.base.knowledge_bases.metadata_adapters import extract_metadata
from langflow.base.knowledge_bases.vector_store_factory import build_kb_vector_store
from langflow.services.deps import get_settings_service, session_scope

router = APIRouter(tags=["Knowledge Bases"], prefix="/knowledge_bases")

Expand Down Expand Up @@ -201,38 +202,11 @@ def calculate_text_metrics(df: pd.DataFrame, text_columns: list[str]) -> tuple[i
return total_words, total_characters


def get_kb_metadata(kb_path: Path) -> dict:
"""Extract metadata from a knowledge base directory."""
metadata: dict[str, float | int | str] = {
"chunks": 0,
"words": 0,
"characters": 0,
"avg_chunk_size": 0.0,
"embedding_provider": "Unknown",
"embedding_model": "Unknown",
}
async def get_kb_metadata(kb_path: Path, user_id: str) -> dict:
"""Extract metadata from a knowledge base directory using user-aware provider configuration."""
from uuid import UUID

try:
# First check embedding metadata file for accurate provider and model info
metadata_file = kb_path / "embedding_metadata.json"
if metadata_file.exists():
try:
with metadata_file.open("r", encoding="utf-8") as f:
embedding_metadata = json.load(f)
if isinstance(embedding_metadata, dict):
if "embedding_provider" in embedding_metadata:
metadata["embedding_provider"] = embedding_metadata["embedding_provider"]
if "embedding_model" in embedding_metadata:
metadata["embedding_model"] = embedding_metadata["embedding_model"]
except (OSError, json.JSONDecodeError) as _:
logger.exception("Error reading embedding metadata file '%s'", metadata_file)

# Fallback to detection if not found in metadata file
if metadata["embedding_provider"] == "Unknown":
metadata["embedding_provider"] = detect_embedding_provider(kb_path)
if metadata["embedding_model"] == "Unknown":
metadata["embedding_model"] = detect_embedding_model(kb_path)

# Read schema for text column information
schema_data = None
schema_file = kb_path / "schema.json"
Expand All @@ -245,48 +219,39 @@ def get_kb_metadata(kb_path: Path) -> dict:
except (ValueError, TypeError, OSError) as _:
logger.exception("Error reading schema file '%s'", schema_file)

# Create vector store
chroma = Chroma(
persist_directory=str(kb_path),
collection_name=kb_path.name,
)

# Access the raw collection
collection = chroma._collection

# Fetch all documents and metadata
results = collection.get(include=["documents", "metadatas"])

# Convert to pandas DataFrame
source_chunks = pd.DataFrame(
{
"document": results["documents"],
"metadata": results["metadatas"],
}
)

# Process the source data for metadata
try:
metadata["chunks"] = len(source_chunks)

# Get text columns and calculate metrics
text_columns = get_text_columns(source_chunks, schema_data)
if text_columns:
words, characters = calculate_text_metrics(source_chunks, text_columns)
metadata["words"] = words
metadata["characters"] = characters

# Calculate average chunk size
if int(metadata["chunks"]) > 0:
metadata["avg_chunk_size"] = round(int(characters) / int(metadata["chunks"]), 1)

except (OSError, ValueError, TypeError) as _:
logger.exception("Error processing Chroma DB '%s'", kb_path.name)

except (OSError, ValueError, TypeError) as _:
logger.exception("Error processing knowledge base directory '%s'", kb_path)

return metadata
# Create vector store using user's configuration from database
async with session_scope() as session:
vector_store = await build_kb_vector_store(
kb_path=kb_path,
collection_name=kb_path.name,
embedding_function=None, # Not needed for metadata operations
user_id=UUID(user_id),
session=session,
)

# Use the enhanced metadata extraction with provider-specific adapters
return extract_metadata(
vector_store=vector_store,
kb_path=kb_path,
schema_data=schema_data,
)

except (OSError, ValueError, ImportError, AttributeError) as e:
logger.exception("Error getting metadata for KB '%s': %s", kb_path.name, e)

# Return minimal metadata on error
return {
"chunks": 0,
"words": 0,
"characters": 0,
"avg_chunk_size": 0.0,
"embedding_provider": "Unknown",
"embedding_model": "Unknown",
"provider": "unknown",
"collection_info": {},
"provider_specific": {},
"supports_embeddings": False,
}


@router.get("", status_code=HTTPStatus.OK)
Expand All @@ -312,7 +277,7 @@ async def list_knowledge_bases(current_user: CurrentActiveUser) -> list[Knowledg
size = get_directory_size(kb_dir)

# Get metadata from KB files
metadata = get_kb_metadata(kb_dir)
metadata = await get_kb_metadata(kb_dir, current_user.id)

kb_info = KnowledgeBaseInfo(
id=kb_dir.name,
Expand Down Expand Up @@ -357,7 +322,7 @@ async def get_knowledge_base(kb_name: str, current_user: CurrentActiveUser) -> K
size = get_directory_size(kb_path)

# Get metadata from KB files
metadata = get_kb_metadata(kb_path)
metadata = await get_kb_metadata(kb_path, current_user.id)

return KnowledgeBaseInfo(
id=kb_name,
Expand Down
26 changes: 25 additions & 1 deletion src/backend/base/langflow/api/v1/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from langflow.api.utils import CurrentActiveUser, DbSession
from langflow.services.database.models.variable.model import VariableCreate, VariableRead, VariableUpdate
from langflow.services.deps import get_variable_service
from langflow.services.variable.constants import CREDENTIAL_TYPE
from langflow.services.variable.constants import CREDENTIAL_TYPE, VALID_CATEGORIES
from langflow.services.variable.service import DatabaseVariableService

router = APIRouter(prefix="/variables", tags=["Variables"])
Expand Down Expand Up @@ -39,6 +39,7 @@ async def create_variable(
value=variable.value,
default_fields=variable.default_fields or [],
type_=variable.type or CREDENTIAL_TYPE,
category=variable.category,
session=session,
)
except Exception as e:
Expand All @@ -64,6 +65,29 @@ async def read_variables(
raise HTTPException(status_code=500, detail=str(e)) from e


@router.get("/category/{category}", response_model=list[VariableRead], status_code=200)
async def read_variables_by_category(
*,
session: DbSession,
category: str,
current_user: CurrentActiveUser,
):
"""Read all variables for a specific category."""
variable_service = get_variable_service()
if not isinstance(variable_service, DatabaseVariableService):
msg = "Variable service is not an instance of DatabaseVariableService"
raise TypeError(msg)

# Validate category
if category not in VALID_CATEGORIES:
raise HTTPException(status_code=400, detail=f"Invalid category. Must be one of: {', '.join(VALID_CATEGORIES)}")

try:
return await variable_service.get_by_category(user_id=current_user.id, category=category, session=session)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) from e


@router.patch("/{variable_id}", response_model=VariableRead, status_code=200)
async def update_variable(
*,
Expand Down
Empty file.
109 changes: 109 additions & 0 deletions src/backend/base/langflow/llm/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from uuid import UUID

from langchain.chat_models import init_chat_model
from langchain_core.language_models import BaseChatModel
from sqlmodel.ext.asyncio.session import AsyncSession

from langflow.llm.utils import load_llm_settings_from_db
from langflow.services.settings.llm import LLMSettings
from langflow.services.variable.service import DatabaseVariableService

# Cache to store the initialized chat model
_CHAT_MODEL_CACHE: dict[tuple[str, str, str | None, str | None], BaseChatModel] = {}


async def load_llm(llm_settings: LLMSettings) -> BaseChatModel:
"""Load a chat model based on the provided LLM settings.

Uses the universal init_chat_model function to initialize the appropriate
chat model based on the provider, model, and API key.

The model is cached based on its configuration to avoid reinitializing
the same model multiple times.

Args:
llm_settings: Configuration settings for the LLM

Returns:
An initialized chat model instance

Raises:
ValueError: If the API key is not provided in the settings
"""
return await _fetch_llm_instance(llm_settings)


async def load_llm_for_user(
user_id: UUID | str,
variable_service: DatabaseVariableService,
session: AsyncSession,
override_settings: LLMSettings | None = None,
) -> BaseChatModel:
"""Load a chat model for a specific user, using their saved settings from the database.

If override_settings is provided, those settings will be used instead of the database settings.

Args:
user_id: The user ID
variable_service: The Variable service
session: The database session
override_settings: Optional settings to override the database settings

Returns:
An initialized chat model instance

Raises:
ValueError: If the API key is not available
"""
# Use override settings if provided, otherwise load from database
if override_settings:
llm_settings = override_settings
else:
llm_settings = await load_llm_settings_from_db(
user_id=user_id,
variable_service=variable_service,
session=session,
)

# Load the model
return await _fetch_llm_instance(llm_settings)


async def _fetch_llm_instance(llm_settings: LLMSettings) -> BaseChatModel:
"""Internal function to fetch or initialize an LLM instance.

Args:
llm_settings: Configuration settings for the LLM

Returns:
An initialized chat model instance

Raises:
ValueError: If the API key is not provided in the settings
"""
if not llm_settings.api_key:
msg = "API key is required to initialize the global chat model"
raise ValueError(msg)

# Create a cache key based on the model configuration
cache_key = (
llm_settings.provider,
llm_settings.model,
llm_settings.api_key,
getattr(llm_settings, "base_url", None),
)

# Return cached model if it exists
if cache_key in _CHAT_MODEL_CACHE:
return _CHAT_MODEL_CACHE[cache_key]

# Initialize and cache the model
model = init_chat_model(
model=llm_settings.model,
model_provider=llm_settings.provider,
api_key=llm_settings.api_key,
base_url=llm_settings.base_url if hasattr(llm_settings, "base_url") else None,
)

_CHAT_MODEL_CACHE[cache_key] = model
return model
Loading