Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions cognee-mcp/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
[project]
name = "cognee-mcp"
version = "0.2.3"
version = "0.3.0"
description = "A MCP server project"
readme = "README.md"
requires-python = ">=3.10"

dependencies = [
"cognee[postgres,codegraph,gemini,huggingface]==0.1.39",
# For local cognee repo usage remove comment bellow and add absolute path to cognee
#"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
"cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
"fastmcp>=1.0",
"mcp==1.5.0",
"uv>=0.6.3",
Expand All @@ -28,5 +30,8 @@ dev = [
"debugpy>=1.8.12",
]

[tool.hatch.metadata]
allow-direct-references = true

[project.scripts]
cognee = "src:main"
31 changes: 25 additions & 6 deletions cognee-mcp/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from contextlib import redirect_stdout
import mcp.types as types
from mcp.server import FastMCP
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.users.methods import get_default_user
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
from cognee.modules.search.types import SearchType
from cognee.shared.data_models import KnowledgeGraph
Expand All @@ -28,7 +31,6 @@ async def cognify_task(
"""Build knowledge graph from the input text"""
# NOTE: MCP uses stdout to communicate, we must redirect all output
# going to stdout ( like the print function ) to stderr.
# As cognify is an async background job the output had to be redirected again.
with redirect_stdout(sys.stderr):
logger.info("Cognify process starting.")
if graph_model_file and graph_model_name:
Expand All @@ -55,8 +57,8 @@ async def cognify_task(

text = (
f"Background process launched due to MCP timeout limitations.\n"
f"Average completion time is around 4 minutes.\n"
f"For current cognify status you can check the log file at: {log_file}"
f"To check current cognify status use the cognify_status tool\n"
f"or check the log file at: {log_file}"
)

return [
Expand All @@ -72,7 +74,6 @@ async def codify(repo_path: str) -> list:
async def codify_task(repo_path: str):
# NOTE: MCP uses stdout to communicate, we must redirect all output
# going to stdout ( like the print function ) to stderr.
# As codify is an async background job the output had to be redirected again.
with redirect_stdout(sys.stderr):
logger.info("Codify process starting.")
results = []
Expand All @@ -88,8 +89,8 @@ async def codify_task(repo_path: str):

text = (
f"Background process launched due to MCP timeout limitations.\n"
f"Average completion time is around 4 minutes.\n"
f"For current codify status you can check the log file at: {log_file}"
f"To check current codify status use the codify_status tool\n"
f"or you can check the log file at: {log_file}"
)

return [
Expand Down Expand Up @@ -138,6 +139,24 @@ async def prune():
return [types.TextContent(type="text", text="Pruned")]


@mcp.tool()
async def cognify_status():
"""Get status of cognify pipeline"""
with redirect_stdout(sys.stderr):
user = await get_default_user()
status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)])
return [types.TextContent(type="text", text=str(status))]


@mcp.tool()
async def codify_status():
"""Get status of codify pipeline"""
with redirect_stdout(sys.stderr):
user = await get_default_user()
status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)])
return [types.TextContent(type="text", text=str(status))]


def node_to_string(node):
node_data = ", ".join(
[f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]
Expand Down
4,513 changes: 0 additions & 4,513 deletions cognee-mcp/uv.lock

This file was deleted.

3 changes: 2 additions & 1 deletion cognee/api/v1/cognify/code_graph_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from cognee.modules.users.methods import get_default_user
from cognee.shared.data_models import KnowledgeGraph
from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.ingestion import ingest_data
from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
Expand Down Expand Up @@ -64,7 +65,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
),
]

dataset_id = uuid5(NAMESPACE_OID, "codebase")
dataset_id = await get_unique_dataset_id("codebase", user)

if include_docs:
non_code_pipeline_run = run_tasks(
Expand Down
1 change: 1 addition & 0 deletions cognee/modules/data/methods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .get_datasets_by_name import get_datasets_by_name
from .get_dataset_data import get_dataset_data
from .get_data import get_data
from .get_unique_dataset_id import get_unique_dataset_id

# Delete
from .delete_dataset import delete_dataset
Expand Down
14 changes: 9 additions & 5 deletions cognee/modules/data/methods/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@
from sqlalchemy.orm import joinedload
from cognee.modules.data.models import Dataset

from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.users.models import User


async def create_dataset(dataset_name: str, user: User, session: AsyncSession) -> Dataset:
owner_id = user.id

async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSession) -> Dataset:
dataset = (
await session.scalars(
select(Dataset)
Expand All @@ -16,10 +21,9 @@ async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSessio
).first()

if dataset is None:
# Dataset id should be generated based on dataset_name and owner_id so multiple users can use the same dataset_name
dataset = Dataset(
id=uuid5(NAMESPACE_OID, f"{dataset_name}{str(owner_id)}"), name=dataset_name, data=[]
)
# Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name
dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user)
dataset = Dataset(id=dataset_id, name=dataset_name, data=[])
dataset.owner_id = owner_id

session.add(dataset)
Expand Down
6 changes: 6 additions & 0 deletions cognee/modules/data/methods/get_unique_dataset_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from uuid import UUID, uuid5, NAMESPACE_OID
from cognee.modules.users.models import User


async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
3 changes: 2 additions & 1 deletion cognee/modules/pipelines/operations/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from cognee.modules.data.methods import get_datasets, get_datasets_by_name
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.data.models import Data, Dataset
from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.models import PipelineRunStatus
Expand Down Expand Up @@ -93,7 +94,7 @@ async def run_pipeline(
elif isinstance(dataset, str):
check_dataset_name(dataset)
# Generate id based on unique dataset_id formula
dataset_id = uuid5(NAMESPACE_OID, f"{dataset}{str(user.id)}")
dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user)

if not data:
data: list[Data] = await get_dataset_data(dataset_id=dataset_id)
Expand Down
2 changes: 1 addition & 1 deletion cognee/tasks/ingestion/ingest_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ async def store_data_to_dataset(
db_engine = get_relational_engine()

async with db_engine.get_async_session() as session:
dataset = await create_dataset(dataset_name, user.id, session)
dataset = await create_dataset(dataset_name, user, session)

# Check to see if data should be updated
data_point = (
Expand Down
7 changes: 3 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ dependencies = [
"sentry-sdk[fastapi]>=2.9.0,<3",
"structlog>=25.2.0,<26",
"onnxruntime<=1.21.1",
"pylance==0.22.0",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -122,7 +123,6 @@ dev = [
"ruff>=0.9.2,<1.0.0",
"tweepy==4.14.0",
"gitpython>=3.1.43,<4",
"pylance==0.22.0",
"mkdocs-material>=9.5.42,<10",
"mkdocs-minify-plugin>=0.8.0,<0.9",
"mkdocstrings[python]>=0.26.2,<0.27",
Expand Down
Loading
Loading