diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 978205d2f2..c727230d71 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -25,7 +25,7 @@ ) from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points -from cognee.tasks.storage.descriptive_metrics import store_descriptive_metrics +from cognee.modules.data.methods import store_descriptive_metrics from cognee.tasks.storage.index_graph_edges import index_graph_edges from cognee.tasks.summarization import summarize_text diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index 30acc1b952..dfb955cd77 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -54,3 +54,7 @@ async def delete_graph( @abstractmethod async def get_graph_data(self): raise NotImplementedError + + @abstractmethod + async def get_graph_metrics(self): + raise NotImplementedError diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index a5c1f3eb31..4f6f1180c9 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -530,3 +530,17 @@ async def get_filtered_graph_data(self, attribute_filters): ] return (nodes, edges) + + async def get_graph_metrics(self): + return { + "num_nodes": -1, + "num_edges": -1, + "mean_degree": -1, + "edge_density": -1, + "num_connected_components": -1, + "sizes_of_connected_components": -1, + "num_selfloops": -1, + "diameter": -1, + "avg_shortest_path_length": -1, + "avg_clustering": -1, + } diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index ddc1707d31..018799a08f 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -385,3 +385,17 @@ async def get_filtered_graph_data( ] return filtered_nodes, filtered_edges + + async def get_graph_metrics(self): + return { + "num_nodes": -1, + "num_edges": -1, + "mean_degree": -1, + "edge_density": -1, + "num_connected_components": -1, + "sizes_of_connected_components": -1, + "num_selfloops": -1, + "diameter": -1, + "avg_shortest_path_length": -1, + "avg_clustering": -1, + } diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index c32db1d2f0..57ac00c1a0 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -11,3 +11,5 @@ # Delete from .delete_dataset import delete_dataset from .delete_data import delete_data + +from .store_descriptive_metrics import store_descriptive_metrics diff --git a/cognee/tasks/storage/descriptive_metrics.py b/cognee/modules/data/methods/store_descriptive_metrics.py similarity index 50% rename from cognee/tasks/storage/descriptive_metrics.py rename to cognee/modules/data/methods/store_descriptive_metrics.py index f7a854e53d..8e5c3b5986 100644 --- a/cognee/tasks/storage/descriptive_metrics.py +++ b/cognee/modules/data/methods/store_descriptive_metrics.py @@ -1,5 +1,4 @@ from cognee.infrastructure.engine import DataPoint -from cognee.modules.data.processing.document_types import Document from cognee.infrastructure.databases.relational import get_relational_engine from sqlalchemy import select from sqlalchemy.sql import func @@ -24,25 +23,28 @@ async def fetch_token_count(db_engine) -> int: return token_count_sum -async def calculate_graph_metrics(graph_data): - nodes, edges = graph_data - graph_metrics = { - "num_nodes": len(nodes), - "num_edges": len(edges), - } - return graph_metrics - - async def store_descriptive_metrics(data_points: list[DataPoint]): db_engine = get_relational_engine() graph_engine = await get_graph_engine() - graph_data = await graph_engine.get_graph_data() + graph_metrics = await graph_engine.get_graph_metrics() - token_count_sum = await fetch_token_count(db_engine) - graph_metrics = await calculate_graph_metrics(graph_data) - - table_name = "graph_metrics_table" - metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics + async with db_engine.get_async_session() as session: + metrics = GraphMetrics( + id=uuid.uuid4(), + num_tokens=await fetch_token_count(db_engine), + num_nodes=graph_metrics["num_nodes"], + num_edges=graph_metrics["num_edges"], + mean_degree=graph_metrics["mean_degree"], + edge_density=graph_metrics["edge_density"], + num_connected_components=graph_metrics["num_connected_components"], + sizes_of_connected_components=graph_metrics["sizes_of_connected_components"], + num_selfloops=graph_metrics["num_selfloops"], + diameter=graph_metrics["diameter"], + avg_shortest_path_length=graph_metrics["avg_shortest_path_length"], + avg_clustering=graph_metrics["avg_clustering"], + ) + + session.add(metrics) + await session.commit() - await db_engine.insert_data(table_name, metrics_dict) return data_points diff --git a/cognee/modules/data/models/GraphMetrics.py b/cognee/modules/data/models/GraphMetrics.py index 2103214c8a..d86a2048b0 100644 --- a/cognee/modules/data/models/GraphMetrics.py +++ b/cognee/modules/data/models/GraphMetrics.py @@ -7,7 +7,7 @@ class GraphMetrics(Base): - __tablename__ = "graph_metrics_table" + __tablename__ = "graph_metrics" # TODO: Change ID to reflect unique id of graph database id = Column(UUID, primary_key=True, default=uuid4) diff --git a/cognee/modules/data/models/__init__.py b/cognee/modules/data/models/__init__.py index bd5774f888..51d6ad1d5d 100644 --- a/cognee/modules/data/models/__init__.py +++ b/cognee/modules/data/models/__init__.py @@ -1,3 +1,4 @@ from .Data import Data from .Dataset import Dataset from .DatasetData import DatasetData +from .GraphMetrics import GraphMetrics