Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
8c1dbab
feat: Make knowledge bases user-stored
erichare Aug 20, 2025
e5a66ca
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
9c0f659
[autofix.ci] apply automated fixes (attempt 2/3)
autofix-ci[bot] Aug 20, 2025
37eca57
Fix ruff error
erichare Aug 20, 2025
d19b199
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
53f446f
Reuse code
erichare Aug 20, 2025
33e3c81
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
d454e7c
[autofix.ci] apply automated fixes (attempt 2/3)
autofix-ci[bot] Aug 20, 2025
c9e9a3a
Don't show options by default
erichare Aug 20, 2025
79ddd12
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
707f79c
Pass in the Langflow API key if set
erichare Aug 20, 2025
9adc180
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
3da9551
Update files.py
erichare Aug 20, 2025
b18d0cc
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
bbe4af6
Properly handle secret retrieval
erichare Aug 20, 2025
7f72a4a
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 20, 2025
0147d8d
Merge branch 'main' into feat-kb-enhancements
erichare Aug 21, 2025
ab96b5f
Update src/backend/base/langflow/base/data/kb_utils.py
erichare Aug 21, 2025
6493ab4
Update src/backend/base/langflow/base/data/kb_utils.py
erichare Aug 21, 2025
d3fb1ed
Update src/backend/base/langflow/components/data/kb_ingest.py
erichare Aug 21, 2025
81c386c
Merge branch 'main' into feat-kb-enhancements
erichare Aug 21, 2025
49b9397
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
78a5ebe
[autofix.ci] apply automated fixes (attempt 2/3)
autofix-ci[bot] Aug 21, 2025
ffeb134
Feedback from review
erichare Aug 21, 2025
c36f16b
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
4529cf7
Fix other uses of incorrect user
erichare Aug 21, 2025
0353be6
Merge branch 'main' into feat-kb-enhancements
erichare Aug 21, 2025
83dabaa
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
02eaab1
[autofix.ci] apply automated fixes (attempt 2/3)
autofix-ci[bot] Aug 21, 2025
9d579a6
Feedback from review 2
erichare Aug 21, 2025
ea3f816
Merge branch 'feat-kb-enhancements' of https://github.com/langflow-ai…
erichare Aug 21, 2025
8862920
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
f90efa4
Update kb_ingest.py
erichare Aug 21, 2025
8ca107f
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
8afeae6
Merge branch 'main' into feat-kb-enhancements
erichare Aug 21, 2025
21e3c36
Update tests
erichare Aug 21, 2025
0b70b7b
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
c19a561
Update kb_ingest.py
erichare Aug 21, 2025
7bb5fa8
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
aa1064b
Merge branch 'main' into feat-kb-enhancements
erichare Aug 21, 2025
cfdd2f7
Fix mypy issues
erichare Aug 21, 2025
2bdbb11
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
f517dc7
Update kb_utils.py
erichare Aug 21, 2025
efd1e18
Update test_kb_ingest.py
erichare Aug 21, 2025
368fdbd
Fix tests
erichare Aug 21, 2025
f15a9b1
[autofix.ci] apply automated fixes
autofix-ci[bot] Aug 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: Make knowledge bases user-stored
  • Loading branch information
erichare committed Aug 20, 2025
commit 8c1dbab60def95107eb5c1b5989444b482606fb9
25 changes: 16 additions & 9 deletions src/backend/base/langflow/api/v1/knowledge_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from loguru import logger
from pydantic import BaseModel

from langflow.api.utils import CurrentActiveUser
from langflow.services.deps import get_settings_service

router = APIRouter(tags=["Knowledge Bases"], prefix="/knowledge_bases")
Expand Down Expand Up @@ -290,17 +291,19 @@ def get_kb_metadata(kb_path: Path) -> dict:

@router.get("", status_code=HTTPStatus.OK)
@router.get("/", status_code=HTTPStatus.OK)
async def list_knowledge_bases() -> list[KnowledgeBaseInfo]:
async def list_knowledge_bases(current_user: CurrentActiveUser) -> list[KnowledgeBaseInfo]:
"""List all available knowledge bases."""
try:
kb_root_path = get_kb_root_path()
kb_user = current_user.username
kb_path = kb_root_path / kb_user

if not kb_root_path.exists():
if not kb_path.exists():
return []

knowledge_bases = []

for kb_dir in kb_root_path.iterdir():
for kb_dir in kb_path.iterdir():
if not kb_dir.is_dir() or kb_dir.name.startswith("."):
continue

Expand Down Expand Up @@ -340,11 +343,12 @@ async def list_knowledge_bases() -> list[KnowledgeBaseInfo]:


@router.get("/{kb_name}", status_code=HTTPStatus.OK)
async def get_knowledge_base(kb_name: str) -> KnowledgeBaseInfo:
async def get_knowledge_base(kb_name: str, current_user: CurrentActiveUser) -> KnowledgeBaseInfo:
"""Get detailed information about a specific knowledge base."""
try:
kb_root_path = get_kb_root_path()
kb_path = kb_root_path / kb_name
kb_user = current_user.username
kb_path = kb_root_path / kb_user / kb_name

if not kb_path.exists() or not kb_path.is_dir():
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
Expand Down Expand Up @@ -374,11 +378,12 @@ async def get_knowledge_base(kb_name: str) -> KnowledgeBaseInfo:


@router.delete("/{kb_name}", status_code=HTTPStatus.OK)
async def delete_knowledge_base(kb_name: str) -> dict[str, str]:
async def delete_knowledge_base(kb_name: str, current_user: CurrentActiveUser) -> dict[str, str]:
"""Delete a specific knowledge base."""
try:
kb_root_path = get_kb_root_path()
kb_path = kb_root_path / kb_name
kb_user = current_user.username
kb_path = kb_root_path / kb_user / kb_name

if not kb_path.exists() or not kb_path.is_dir():
raise HTTPException(status_code=404, detail=f"Knowledge base '{kb_name}' not found")
Expand All @@ -396,15 +401,17 @@ async def delete_knowledge_base(kb_name: str) -> dict[str, str]:

@router.delete("", status_code=HTTPStatus.OK)
@router.delete("/", status_code=HTTPStatus.OK)
async def delete_knowledge_bases_bulk(request: BulkDeleteRequest) -> dict[str, object]:
async def delete_knowledge_bases_bulk(request: BulkDeleteRequest, current_user: CurrentActiveUser) -> dict[str, object]:
"""Delete multiple knowledge bases."""
try:
kb_root_path = get_kb_root_path()
kb_user = current_user.username
kb_path = kb_root_path / kb_user
deleted_count = 0
not_found_kbs = []

for kb_name in request.kb_names:
kb_path = kb_root_path / kb_name
kb_path = kb_path / kb_name

Comment thread
erichare marked this conversation as resolved.
if not kb_path.exists() or not kb_path.is_dir():
not_found_kbs.append(kb_name)
Expand Down
125 changes: 103 additions & 22 deletions src/backend/base/langflow/components/data/kb_ingest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import contextlib
import hashlib
import json
import re
Expand All @@ -16,12 +17,14 @@

from langflow.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES
from langflow.custom import Component
from langflow.custom.custom_component.custom_component import get_variable_service
Comment thread
erichare marked this conversation as resolved.
Outdated
from langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, Output, SecretStrInput, StrInput, TableInput
from langflow.schema.data import Data
from langflow.schema.dotdict import dotdict # noqa: TC001
from langflow.schema.table import EditMode
from langflow.services.auth.utils import decrypt_api_key, encrypt_api_key
from langflow.services.deps import get_settings_service
from langflow.services.auth.utils import create_user_longterm_token, decrypt_api_key, encrypt_api_key, get_current_user
from langflow.services.database.models.user.crud import get_user_by_id
from langflow.services.deps import get_session, get_settings_service

HUGGINGFACE_MODEL_NAMES = ["sentence-transformers/all-MiniLM-L6-v2", "sentence-transformers/all-mpnet-base-v2"]
COHERE_MODEL_NAMES = ["embed-english-v3.0", "embed-multilingual-v3.0"]
Expand Down Expand Up @@ -76,7 +79,7 @@
display_name="API Key",
info="Provider API key for embedding model",
required=True,
load_from_db=True,
load_from_db=False,
),
},
},
Expand Down Expand Up @@ -157,6 +160,13 @@
advanced=True,
required=False,
),
SecretStrInput(
name="langflow_api_key",
display_name="Langflow API Key",
info="Langflow API key for authentication when saving the knowledge base.",
required=False,
advanced=True,
),
BoolInput(
name="allow_duplicates",
display_name="Allow Duplicates",
Expand Down Expand Up @@ -329,22 +339,47 @@

return metadata

def _create_vector_store(
async def _get_current_user(self, db):
"""Get the current user based on the provided API key or create a new user."""
if self.langflow_api_key:
current_user = await get_current_user(
token="",
query_param=self.langflow_api_key,
header_param="",
db=db,
)
else:
user_id, _ = await create_user_longterm_token(db)
current_user = await get_user_by_id(db, user_id)

# Fail if the user is not found
if not current_user:
msg = "User not found. Please provide a valid Langflow API key or ensure the user exists."
raise ValueError(msg)

return current_user

async def _create_vector_store(
self, df_source: pd.DataFrame, config_list: list[dict[str, Any]], embedding_model: str, api_key: str
) -> None:
"""Create vector store following Local DB component pattern."""
try:
# Get the current user
async for db in get_session():
current_user = await self._get_current_user(db)

# Set up vector store directory
base_dir = self._get_kb_root()
kb_user = current_user.username

vector_store_dir = base_dir / self.knowledge_base
vector_store_dir = base_dir / kb_user / self.knowledge_base
vector_store_dir.mkdir(parents=True, exist_ok=True)

# Create embeddings model
embedding_function = self._build_embeddings(embedding_model, api_key)

# Convert DataFrame to Data objects (following Local DB pattern)
data_objects = self._convert_df_to_data_objects(df_source, config_list)
data_objects = await self._convert_df_to_data_objects(df_source, config_list)

# Create vector store
chroma = Chroma(
Expand All @@ -367,16 +402,22 @@
except (OSError, ValueError, RuntimeError) as e:
self.log(f"Error creating vector store: {e}")

def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:
async def _convert_df_to_data_objects(self, df_source: pd.DataFrame, config_list: list[dict[str, Any]]) -> list[Data]:

Check failure on line 405 in src/backend/base/langflow/components/data/kb_ingest.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.13)

Ruff (E501)

src/backend/base/langflow/components/data/kb_ingest.py:405:121: E501 Line too long (122 > 120)
"""Convert DataFrame to Data objects for vector store."""
data_objects: list[Data] = []

# Get the current user
async for db in get_session():
current_user = await self._get_current_user(db)

# Set up vector store directory
base_dir = self._get_kb_root()
kb_user = current_user.username
kb_path = base_dir / kb_user / self.knowledge_base

# If we don't allow duplicates, we need to get the existing hashes
chroma = Chroma(
persist_directory=str(base_dir / self.knowledge_base),
persist_directory=str(kb_path),
collection_name=self.knowledge_base,
)

Expand Down Expand Up @@ -469,7 +510,7 @@
# ---------------------------------------------------------------------
# OUTPUT METHODS
# ---------------------------------------------------------------------
def build_kb_info(self) -> Data:
async def build_kb_info(self) -> Data:
"""Main ingestion routine → returns a dict with KB metadata."""
try:
# Get source DataFrame
Expand All @@ -479,9 +520,14 @@
config_list = self._validate_column_config(df_source)
column_metadata = self._build_column_metadata(config_list, df_source)

# Prepare KB folder (using File Component patterns)
# Get the current user
async for db in get_session():
current_user = await self._get_current_user(db)

# Set up vector store directory
kb_root = self._get_kb_root()
kb_path = kb_root / self.knowledge_base
kb_user = current_user.username
kb_path = kb_root / kb_user / self.knowledge_base

# Read the embedding info from the knowledge base folder
metadata_path = kb_path / "embedding_metadata.json"
Expand All @@ -506,7 +552,7 @@
)

# Create vector store following Local DB component pattern
self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)
await self._create_vector_store(df_source, config_list, embedding_model=embedding_model, api_key=api_key)

# Save KB files (using File Component storage patterns)
self._save_kb_files(kb_path, config_list)
Expand All @@ -532,21 +578,48 @@
self.status = f"❌ KB ingestion failed: {e}"
return Data(data={"error": str(e), "kb_name": self.knowledge_base})

def _get_knowledge_bases(self) -> list[str]:
async def _get_knowledge_bases(self) -> list[str]:
"""Retrieve a list of available knowledge bases.

Returns:
A list of knowledge base names.
"""
# Return the list of directories in the knowledge base root path
kb_root_path = self._get_kb_root()
if not KNOWLEDGE_BASES_ROOT_PATH.exists():
return []

# Get the current user
async for db in get_session():
current_user = await self._get_current_user(db)

if not kb_root_path.exists():
# Set up vector store directory
kb_user = current_user.username
kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user

if not kb_path.exists():
return []

return [str(d.name) for d in kb_root_path.iterdir() if not d.name.startswith(".") and d.is_dir()]
return [str(d.name) for d in kb_path.iterdir() if not d.name.startswith(".") and d.is_dir()]

async def _get_api_key_variable(self, field_value: dict[str, Any]):
async for db in get_session():
Comment thread
erichare marked this conversation as resolved.
Outdated
current_user = await self._get_current_user(db)
variable_service = get_variable_service()

def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:
# Process the api_key field variable
return await variable_service.get_variable(
user_id=current_user.id,
name=field_value["03_api_key"],
field="",
session=db,
)
return None

async def update_build_config(
self,
build_config: dotdict,
field_value: Any,
field_name: str | None = None,
) -> dotdict:
"""Update build configuration based on provider selection."""
# Create a new knowledge base
if field_name == "knowledge_base":
Expand All @@ -556,28 +629,36 @@
msg = f"Invalid knowledge base name: {field_value['01_new_kb_name']}"
raise ValueError(msg)

api_key = field_value.get("03_api_key", None)
with contextlib.suppress(Exception):
# If the API key is a variable, resolve it
api_key = await self._get_api_key_variable(field_value)

# We need to test the API Key one time against the embedding model
embed_model = self._build_embeddings(
embedding_model=field_value["02_embedding_model"], api_key=field_value["03_api_key"]
embedding_model=field_value["02_embedding_model"], api_key=api_key
)

# Try to generate a dummy embedding to validate the API key
embed_model.embed_query("test")

Comment thread
erichare marked this conversation as resolved.
# Create the new knowledge base directory
kb_path = KNOWLEDGE_BASES_ROOT_PATH / field_value["01_new_kb_name"]
async for db in get_session():
current_user = await self._get_current_user(db)
kb_user = current_user.username
kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / field_value["01_new_kb_name"]
kb_path.mkdir(parents=True, exist_ok=True)

# Save the embedding metadata
build_config["knowledge_base"]["value"] = field_value["01_new_kb_name"]
self._save_embedding_metadata(
kb_path=kb_path,
embedding_model=field_value["02_embedding_model"],
api_key=field_value["03_api_key"],
api_key=api_key,
)

# Update the knowledge base options dynamically
build_config["knowledge_base"]["options"] = self._get_knowledge_bases()
build_config["knowledge_base"]["options"] = await self._get_knowledge_bases()
if build_config["knowledge_base"]["value"] not in build_config["knowledge_base"]["options"]:
build_config["knowledge_base"]["value"] = None

Expand Down
Loading
Loading