topoteretes · hajdul88 · Jun 5, 2025 · May 31, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@
 
 
 
-Build dynamic Agent memory using scalable, modular ECL (Extract, Cognify, Load) pipelines.
+Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Extract, Cognify, Load) pipelines.
 
 More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github.com/topoteretes/cognee/tree/main/evals)
 
@@ -55,7 +55,7 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
 ## Features
 
 - Interconnect and retrieve your past conversations, documents, images and audio transcriptions
-- Reduce hallucinations, developer effort, and cost.
+- Replaces RAG systems and reduces developer effort, and cost.
 - Load data to graph and vector databases using only Pydantic
 - Manipulate your data while ingesting from 30+ data sources
 

diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
@@ -29,6 +29,7 @@
 
 logger = get_logger("Neo4jAdapter", level=ERROR)
 
+BASE_LABEL = "__Node__"
 
 class Neo4jAdapter(GraphDBInterface):
     """
@@ -48,6 +49,12 @@ def __init__(
             graph_database_url,
             auth=(graph_database_username, graph_database_password),
             max_connection_lifetime=120,
+            notifications_min_severity="OFF",
+        )
+        # Create contraint/index
+        self.query(
+            ("CREATE CONSTRAINT IF NOT EXISTS FOR " 
+            f"(n:`{BASE_LABEL}`) REQUIRE n.id IS UNIQUE;")
         )
 
     @asynccontextmanager
@@ -103,8 +110,8 @@ async def has_node(self, node_id: str) -> bool:
             - bool: True if the node exists, otherwise False.
         """
         results = self.query(
-            """
-                MATCH (n)
+            f"""
+                MATCH (n:`{BASE_LABEL}`)
                 WHERE n.id = $node_id
                 RETURN COUNT(n) > 0 AS node_exists
             """,
@@ -129,7 +136,7 @@ async def add_node(self, node: DataPoint):
         serialized_properties = self.serialize_properties(node.model_dump())
 
         query = dedent(
-            """MERGE (node {id: $node_id})
+            f"""MERGE (node: `{BASE_LABEL}`{{id: $node_id}})
                 ON CREATE SET node += $properties, node.updated_at = timestamp()
                 ON MATCH SET node += $properties, node.updated_at = timestamp()
                 WITH node, $node_label AS label
@@ -161,9 +168,9 @@ async def add_nodes(self, nodes: list[DataPoint]) -> None:
 
             - None: None
         """
-        query = """
+        query = f"""
         UNWIND $nodes AS node
-        MERGE (n {id: node.node_id})
+        MERGE (n: `{BASE_LABEL}`{{id: node.node_id}})
         ON CREATE SET n += node.properties, n.updated_at = timestamp()
         ON MATCH SET n += node.properties, n.updated_at = timestamp()
         WITH n, node.label AS label
@@ -215,9 +222,9 @@ async def extract_nodes(self, node_ids: List[str]):
 
             A list of nodes represented as dictionaries.
         """
-        query = """
+        query = f"""
         UNWIND $node_ids AS id
-        MATCH (node {id: id})
+        MATCH (node: `{BASE_LABEL}`{{id: id}})
         RETURN node"""
 
         params = {"node_ids": node_ids}
@@ -240,7 +247,7 @@ async def delete_node(self, node_id: str):
 
             The result of the query execution, typically indicating success or failure.
         """
-        query = "MATCH (node {id: $node_id}) DETACH DELETE node"
+        query = f"MATCH (node: `{BASE_LABEL}`{{id: $node_id}}) DETACH DELETE node"
         params = {"node_id": node_id}
 
         return await self.query(query, params)
@@ -259,9 +266,9 @@ async def delete_nodes(self, node_ids: list[str]) -> None:
 
             - None: None
         """
-        query = """
+        query = f"""
         UNWIND $node_ids AS id
-        MATCH (node {id: id})
+        MATCH (node: `{BASE_LABEL}`{{id: id}})
         DETACH DELETE node"""
 
         params = {"node_ids": node_ids}
@@ -284,16 +291,15 @@ async def has_edge(self, from_node: UUID, to_node: UUID, edge_label: str) -> boo
 
             - bool: True if the edge exists, otherwise False.
         """
-        query = """
-            MATCH (from_node)-[relationship]->(to_node)
-            WHERE from_node.id = $from_node_id AND to_node.id = $to_node_id AND type(relationship) = $edge_label
+        query = f"""
+            MATCH (from_node: `{BASE_LABEL}`)-[:`{edge_label}`]->(to_node: `{BASE_LABEL}`)
+            WHERE from_node.id = $from_node_id AND to_node.id = $to_node_id
             RETURN COUNT(relationship) > 0 AS edge_exists
         """
 
         params = {
             "from_node_id": str(from_node),
             "to_node_id": str(to_node),
-            "edge_label": edge_label,
         }
 
         edge_exists = await self.query(query, params)
@@ -366,9 +372,9 @@ async def add_edge(
 
         query = dedent(
             f"""\
-            MATCH (from_node {{id: $from_node}}),
-                  (to_node {{id: $to_node}})
-            MERGE (from_node)-[r:{relationship_name}]->(to_node)
+            MATCH (from_node :`{BASE_LABEL}`{{id: $from_node}}),
+                  (to_node :`{BASE_LABEL}`{{id: $to_node}})
+            MERGE (from_node)-[r:`{relationship_name}`]->(to_node)
             ON CREATE SET r += $properties, r.updated_at = timestamp()
             ON MATCH SET r += $properties, r.updated_at = timestamp()
             RETURN r
@@ -400,17 +406,17 @@ async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) ->
 
             - None: None
         """
-        query = """
+        query = f"""
             UNWIND $edges AS edge
-            MATCH (from_node {id: edge.from_node})
-            MATCH (to_node {id: edge.to_node})
+            MATCH (from_node: `{BASE_LABEL}`{{id: edge.from_node}})
+            MATCH (to_node: `{BASE_LABEL}`{{id: edge.to_node}})
             CALL apoc.merge.relationship(
                 from_node,
                 edge.relationship_name,
-                {
+                {{
                     source_node_id: edge.from_node,
                     target_node_id: edge.to_node
-                },
+                }},
                 edge.properties,
                 to_node
             ) YIELD rel
@@ -451,8 +457,8 @@ async def get_edges(self, node_id: str):
 
             A list of edges connecting to the specified node, represented as tuples of details.
         """
-        query = """
-        MATCH (n {id: $node_id})-[r]-(m)
+        query = f"""
+        MATCH (n: `{BASE_LABEL}`{{id: $node_id}})-[r]-(m)
         RETURN n, r, m
         """
 
@@ -525,24 +531,23 @@ async def get_predecessors(self, node_id: str, edge_label: str = None) -> list[s
             - list[str]: A list of predecessor node IDs.
         """
         if edge_label is not None:
-            query = """
-            MATCH (node)<-[r]-(predecessor)
-            WHERE node.id = $node_id AND type(r) = $edge_label
+            query = f"""
+            MATCH (node: `{BASE_LABEL}`)<-[r:`{edge_label}`]-(predecessor)
+            WHERE node.id = $node_id
             RETURN predecessor
             """
 
             results = await self.query(
                 query,
                 dict(
                     node_id=node_id,
-                    edge_label=edge_label,
                 ),
             )
 
             return [result["predecessor"] for result in results]
         else:
-            query = """
-            MATCH (node)<-[r]-(predecessor)
+            query = f"""
+            MATCH (node: `{BASE_LABEL}`)<-[r]-(predecessor)
             WHERE node.id = $node_id
             RETURN predecessor
             """
@@ -572,9 +577,9 @@ async def get_successors(self, node_id: str, edge_label: str = None) -> list[str
             - list[str]: A list of successor node IDs.
         """
         if edge_label is not None:
-            query = """
-            MATCH (node)-[r]->(successor)
-            WHERE node.id = $node_id AND type(r) = $edge_label
+            query = f"""
+            MATCH (node: `{BASE_LABEL}`)-[r:`{edge_label}`]->(successor)
+            WHERE node.id = $node_id
             RETURN successor
             """
 
@@ -588,8 +593,8 @@ async def get_successors(self, node_id: str, edge_label: str = None) -> list[str
 
             return [result["successor"] for result in results]
         else:
-            query = """
-            MATCH (node)-[r]->(successor)
+            query = f"""
+            MATCH (node: `{BASE_LABEL}`)-[r]->(successor)
             WHERE node.id = $node_id
             RETURN successor
             """
@@ -634,8 +639,8 @@ async def get_node(self, node_id: str) -> Optional[Dict[str, Any]]:
             - Optional[Dict[str, Any]]: The requested node as a dictionary, or None if it does
               not exist.
         """
-        query = """
-        MATCH (node {id: $node_id})
+        query = f"""
+        MATCH (node: `{BASE_LABEL}`{{id: $node_id}})
         RETURN node
         """
         results = await self.query(query, {"node_id": node_id})
@@ -655,9 +660,9 @@ async def get_nodes(self, node_ids: List[str]) -> List[Dict[str, Any]]:
 
             - List[Dict[str, Any]]: A list of nodes represented as dictionaries.
         """
-        query = """
+        query = f"""
         UNWIND $node_ids AS id
-        MATCH (node {id: id})
+        MATCH (node:`{BASE_LABEL}` {{id: id}})
         RETURN node
         """
         results = await self.query(query, {"node_ids": node_ids})
@@ -677,13 +682,13 @@ async def get_connections(self, node_id: UUID) -> list:
 
             - list: A list of connections represented as tuples of details.
         """
-        predecessors_query = """
-        MATCH (node)<-[relation]-(neighbour)
+        predecessors_query = f"""
+        MATCH (node:`{BASE_LABEL}`)<-[relation]-(neighbour)
         WHERE node.id = $node_id
         RETURN neighbour, relation, node
         """
-        successors_query = """
-        MATCH (node)-[relation]->(neighbour)
+        successors_query = f"""
+        MATCH (node:`{BASE_LABEL}`)-[relation]->(neighbour)
         WHERE node.id = $node_id
         RETURN node, relation, neighbour
         """
@@ -723,6 +728,7 @@ async def remove_connection_to_predecessors_of(
 
             - None: None
         """
+        # Not understanding
         query = f"""
         UNWIND $node_ids AS id
         MATCH (node:`{id}`)-[r:{edge_label}]->(predecessor)
@@ -751,6 +757,7 @@ async def remove_connection_to_successors_of(
 
             - None: None
         """
+        # Not understanding
         query = f"""
         UNWIND $node_ids AS id
         MATCH (node:`{id}`)<-[r:{edge_label}]-(successor)

diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/neo4j_metrics_utils.py b/cognee/infrastructure/databases/graph/neo4j_driver/neo4j_metrics_utils.py
@@ -57,9 +57,9 @@ async def get_num_connected_components(adapter: Neo4jAdapter, graph_name: str):
         found.
     """
     query = f"""
-    CALL gds.wcc.stream('{graph_name}')
-    YIELD componentId
-    RETURN count(DISTINCT componentId) AS num_connected_components;
+    CALL gds.wcc.stats('{graph_name}')
+    YIELD componentCount
+    RETURN componentCount AS num_connected_components;
     """
 
     result = await adapter.query(query)
@@ -181,9 +181,9 @@ async def get_avg_clustering(adapter: Neo4jAdapter, graph_name: str):
         The average clustering coefficient as a float, or 0 if no results are available.
     """
     query = f"""
-    CALL gds.localClusteringCoefficient.stream('{graph_name}')
-    YIELD localClusteringCoefficient
-    RETURN avg(localClusteringCoefficient) AS avg_clustering;
+    CALL gds.localClusteringCoefficient.stats('{graph_name}')
+    YIELD averageClusteringCoefficient
+    RETURN averageClusteringCoefficient AS avg_clustering;
     """
 
     result = await adapter.query(query)

diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py
@@ -21,17 +21,15 @@ async def get_authenticated_user(authorization: str = Header(...)) -> SimpleName
             token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
         )
 
-        if payload["tenant_id"]:
+        if payload.get("tenant_id"):
             # SimpleNamespace lets us access dictionary elements like attributes
             auth_data = SimpleNamespace(
                 id=UUID(payload["user_id"]),
                 tenant_id=UUID(payload["tenant_id"]),
                 roles=payload["roles"],
             )
         else:
-            auth_data = SimpleNamespace(
-                id=UUID(payload["user_id"]), tenant_id=None, roles=payload["roles"]
-            )
+            auth_data = SimpleNamespace(id=UUID(payload["user_id"]), tenant_id=None, roles=[])
 
         return auth_data
 

diff --git a/examples/python/relational_database_migration_example.py b/examples/python/relational_database_migration_example.py
@@ -1,7 +1,6 @@
 import asyncio
 import cognee
 import os
-import logging
 
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.api.v1.visualize.visualize import visualize_graph
@@ -10,13 +9,12 @@
 )
 
 from cognee.modules.search.types import SearchType
-from cognee.modules.users.methods import get_default_user
 
 from cognee.infrastructure.databases.relational import (
     create_db_and_tables as create_relational_db_and_tables,
 )
 from cognee.infrastructure.databases.vector.pgvector import (
-    create_db_and_tables as create_pgvector_db_and_tables,
+    create_db_and_tables as create_vector_db_and_tables,
 )
 
 # Prerequisites:
@@ -25,17 +23,23 @@
 #    LLM_API_KEY = "your_key_here"
 # 3. Fill all relevant MIGRATION_DB information for the database you want to migrate to graph / Cognee
 
+# NOTE: If you don't have a DB you want to migrate you can try it out with our
+#       test database at the following location:
+#           MIGRATION_DB_PATH="/{path_to_your_local_cognee}/cognee/tests/test_data"
+#           MIGRATION_DB_NAME="migration_database.sqlite"
+#           MIGRATION_DB_PROVIDER="sqlite"
+
 
 async def main():
     engine = get_migration_relational_engine()
 
+    # Clean all data stored in Cognee
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
 
-    # Needed to create principals table
-    # Create tables for databases
+    # Needed to create appropriate tables only on the Cognee side
     await create_relational_db_and_tables()
-    await create_pgvector_db_and_tables()
+    await create_vector_db_and_tables()
 
     print("\nExtracting schema of database to migrate.")
     schema = await engine.extract_schema()
@@ -57,8 +61,11 @@ async def main():
     await visualize_graph(destination_file_path)
     print(f"Visualization can be found at: {destination_file_path}")
 
+    # Make sure to set top_k at a high value for a broader search, the default value is only 10!
     search_results = await cognee.search(
-        query_type=SearchType.GRAPH_COMPLETION, query_text="What kind of data do you contain?"
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="What kind of data do you contain?",
+        top_k=1000,
     )
     print(f"Search results: {search_results}")