topoteretes · dexters1 · May 13, 2025 · Apr 28, 2025 · Apr 28, 2025 · Apr 28, 2025
diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml
@@ -1,12 +1,14 @@
 [project]
 name = "cognee-mcp"
-version = "0.2.3"
+version = "0.3.0"
 description = "A MCP server project"
 readme = "README.md"
 requires-python = ">=3.10"
 
 dependencies = [
-    "cognee[postgres,codegraph,gemini,huggingface]==0.1.39",
+    # For local cognee repo usage remove comment bellow and add absolute path to cognee
+    #"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
+    "cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
     "fastmcp>=1.0",
     "mcp==1.5.0",
     "uv>=0.6.3",
@@ -28,5 +30,8 @@ dev = [
     "debugpy>=1.8.12",
 ]
 
+[tool.hatch.metadata]
+allow-direct-references = true
+
 [project.scripts]
 cognee = "src:main"
diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py
@@ -9,6 +9,9 @@
 from contextlib import redirect_stdout
 import mcp.types as types
 from mcp.server import FastMCP
+from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
+from cognee.modules.users.methods import get_default_user
 from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
 from cognee.modules.search.types import SearchType
 from cognee.shared.data_models import KnowledgeGraph
@@ -28,7 +31,6 @@ async def cognify_task(
         """Build knowledge graph from the input text"""
         # NOTE: MCP uses stdout to communicate, we must redirect all output
         #       going to stdout ( like the print function ) to stderr.
-        #       As cognify is an async background job the output had to be redirected again.
         with redirect_stdout(sys.stderr):
             logger.info("Cognify process starting.")
             if graph_model_file and graph_model_name:
@@ -55,8 +57,8 @@ async def cognify_task(
 
     text = (
         f"Background process launched due to MCP timeout limitations.\n"
-        f"Average completion time is around 4 minutes.\n"
-        f"For current cognify status you can check the log file at: {log_file}"
+        f"To check current cognify status use the cognify_status tool\n"
+        f"or check the log file at: {log_file}"
     )
 
     return [
@@ -72,7 +74,6 @@ async def codify(repo_path: str) -> list:
     async def codify_task(repo_path: str):
         # NOTE: MCP uses stdout to communicate, we must redirect all output
         #       going to stdout ( like the print function ) to stderr.
-        #       As codify is an async background job the output had to be redirected again.
         with redirect_stdout(sys.stderr):
             logger.info("Codify process starting.")
             results = []
@@ -88,8 +89,8 @@ async def codify_task(repo_path: str):
 
     text = (
         f"Background process launched due to MCP timeout limitations.\n"
-        f"Average completion time is around 4 minutes.\n"
-        f"For current codify status you can check the log file at: {log_file}"
+        f"To check current codify status use the codify_status tool\n"
+        f"or you can check the log file at: {log_file}"
     )
 
     return [
@@ -138,6 +139,24 @@ async def prune():
         return [types.TextContent(type="text", text="Pruned")]
 
 
+@mcp.tool()
+async def cognify_status():
+    """Get status of cognify pipeline"""
+    with redirect_stdout(sys.stderr):
+        user = await get_default_user()
+        status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)])
+        return [types.TextContent(type="text", text=str(status))]
+
+
+@mcp.tool()
+async def codify_status():
+    """Get status of codify pipeline"""
+    with redirect_stdout(sys.stderr):
+        user = await get_default_user()
+        status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)])
+        return [types.TextContent(type="text", text=str(status))]
+
+
 def node_to_string(node):
     node_data = ", ".join(
         [f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]

diff --git a/cognee-mcp/uv.lock b/cognee-mcp/uv.lock
diff --git a/cognee/api/v1/cognify/code_graph_pipeline.py b/cognee/api/v1/cognify/code_graph_pipeline.py
@@ -13,6 +13,7 @@
 from cognee.modules.users.methods import get_default_user
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
 from cognee.tasks.graph import extract_graph_from_data
 from cognee.tasks.ingestion import ingest_data
 from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
@@ -64,7 +65,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
             ),
         ]
 
-    dataset_id = uuid5(NAMESPACE_OID, "codebase")
+    dataset_id = await get_unique_dataset_id("codebase", user)
 
     if include_docs:
         non_code_pipeline_run = run_tasks(

diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py
@@ -7,6 +7,7 @@
 from .get_datasets_by_name import get_datasets_by_name
 from .get_dataset_data import get_dataset_data
 from .get_data import get_data
+from .get_unique_dataset_id import get_unique_dataset_id
 
 # Delete
 from .delete_dataset import delete_dataset

diff --git a/cognee/modules/data/methods/create_dataset.py b/cognee/modules/data/methods/create_dataset.py
@@ -4,8 +4,13 @@
 from sqlalchemy.orm import joinedload
 from cognee.modules.data.models import Dataset
 
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
+from cognee.modules.users.models import User
+
+
+async def create_dataset(dataset_name: str, user: User, session: AsyncSession) -> Dataset:
+    owner_id = user.id
 
-async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSession) -> Dataset:
     dataset = (
         await session.scalars(
             select(Dataset)
@@ -16,10 +21,9 @@ async def create_dataset(dataset_name: str, owner_id: UUID, session: AsyncSessio
     ).first()
 
     if dataset is None:
-        # Dataset id should be generated based on dataset_name and owner_id so multiple users can use the same dataset_name
-        dataset = Dataset(
-            id=uuid5(NAMESPACE_OID, f"{dataset_name}{str(owner_id)}"), name=dataset_name, data=[]
-        )
+        # Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name
+        dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user)
+        dataset = Dataset(id=dataset_id, name=dataset_name, data=[])
         dataset.owner_id = owner_id
 
         session.add(dataset)

diff --git a/cognee/modules/data/methods/get_unique_dataset_id.py b/cognee/modules/data/methods/get_unique_dataset_id.py
@@ -0,0 +1,6 @@
+from uuid import UUID, uuid5, NAMESPACE_OID
+from cognee.modules.users.models import User
+
+
+async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
+    return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
diff --git a/cognee/modules/pipelines/operations/pipeline.py b/cognee/modules/pipelines/operations/pipeline.py
@@ -5,6 +5,7 @@
 
 from cognee.modules.data.methods import get_datasets, get_datasets_by_name
 from cognee.modules.data.methods.get_dataset_data import get_dataset_data
+from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
 from cognee.modules.data.models import Data, Dataset
 from cognee.modules.pipelines.operations.run_tasks import run_tasks
 from cognee.modules.pipelines.models import PipelineRunStatus
@@ -93,7 +94,7 @@ async def run_pipeline(
     elif isinstance(dataset, str):
         check_dataset_name(dataset)
         # Generate id based on unique dataset_id formula
-        dataset_id = uuid5(NAMESPACE_OID, f"{dataset}{str(user.id)}")
+        dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user)
 
     if not data:
         data: list[Data] = await get_dataset_data(dataset_id=dataset_id)

diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py
@@ -104,7 +104,7 @@ async def store_data_to_dataset(
                 db_engine = get_relational_engine()
 
                 async with db_engine.get_async_session() as session:
-                    dataset = await create_dataset(dataset_name, user.id, session)
+                    dataset = await create_dataset(dataset_name, user, session)
 
                     # Check to see if data should be updated
                     data_point = (

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,6 +57,7 @@ dependencies = [
     "sentry-sdk[fastapi]>=2.9.0,<3",
     "structlog>=25.2.0,<26",
     "onnxruntime<=1.21.1",
+    "pylance==0.22.0",
 ]
 
 [project.optional-dependencies]
@@ -122,7 +123,6 @@ dev = [
     "ruff>=0.9.2,<1.0.0",
     "tweepy==4.14.0",
     "gitpython>=3.1.43,<4",
-    "pylance==0.22.0",
     "mkdocs-material>=9.5.42,<10",
     "mkdocs-minify-plugin>=0.8.0,<0.9",
     "mkdocstrings[python]>=0.26.2,<0.27",