Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
458eeac
Count the number of tokens in documents
alekszievr Jan 28, 2025
51eadef
Merge branch 'COG-970-refactor-tokenizing' into feat/cog-1071-input-t…
alekszievr Jan 28, 2025
ba608a4
Merge branch 'COG-970-refactor-tokenizing' into feat/cog-1071-input-t…
alekszievr Jan 28, 2025
f6663ab
save token count to relational db
alekszievr Jan 28, 2025
9182be8
Merge branch 'COG-970-refactor-tokenizing' into feat/cog-1132-add-num…
alekszievr Jan 28, 2025
72dfec4
Add metrics to metric table
alekszievr Jan 28, 2025
9bd5917
Merge branch 'dev' into feat/cog-1071-input-token-counting
dexters1 Jan 29, 2025
227d94e
Merge branch 'feat/cog-1071-input-token-counting' into feat/cog-1132-…
alekszievr Jan 29, 2025
22b6459
Store list as json instead of array in relational db table
alekszievr Jan 29, 2025
9764441
Merge branch 'dev' into feat/cog-1132-add-num-tokens-to-metric-table
alekszievr Jan 29, 2025
100e7d7
Sum in sql instead of python
alekszievr Jan 29, 2025
c182d47
Unify naming
alekszievr Jan 29, 2025
44fa2cd
Return data_points in descriptive metric calculation task
alekszievr Jan 29, 2025
06030ff
Graph metrics getter template in graph db interface and adapters
alekszievr Jan 29, 2025
67d9908
Calculate descriptive metrics in networkx adapter
alekszievr Jan 29, 2025
252ac7f
neo4j metrics
alekszievr Jan 29, 2025
48a51a3
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Jan 30, 2025
9a94db8
remove _table from table name
alekszievr Jan 30, 2025
57fb338
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Jan 31, 2025
e8dcef1
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Feb 1, 2025
b0f6ba7
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Feb 3, 2025
05138fa
Use modules for adding to db instead of infrastructure
alekszievr Feb 3, 2025
f064f52
Merge branch 'feat/cog-1082-metrics-in-graphdb-interface' into feat/c…
alekszievr Feb 3, 2025
c9ee1bc
Merge branch 'feat/cog-1082-metrics-in-networkx-adapter' into feat/co…
alekszievr Feb 3, 2025
af8e798
Merge branch 'dev' into feat/cog-1082-metrics-in-networkx-adapter
alekszievr Feb 3, 2025
406057f
Merge branch 'feat/cog-1082-metrics-in-networkx-adapter' into feat/co…
alekszievr Feb 3, 2025
d93b5f5
minor fixes
alekszievr Feb 3, 2025
c13fdec
minor cleanup
alekszievr Feb 3, 2025
f2ad1d4
Merge branch 'dev' into feat/cog-1082-metrics-in-neo4j-adapter
alekszievr Feb 3, 2025
3e67828
Remove graph metric calculation from the default cognify pipeline
alekszievr Feb 4, 2025
34ce4f8
descriptive metrics tests
alekszievr Feb 5, 2025
1bc55f9
networkx metrics test
alekszievr Feb 5, 2025
c102f26
all descriptive metrics tests
alekszievr Feb 5, 2025
92ae1d0
Merge branch 'dev' into test/metrics_in_adapters
alekszievr Feb 5, 2025
eddfef0
remove neo4j metrics test due to lack of gds plugin
alekszievr Feb 5, 2025
eb63421
Merge branch 'dev' into test/metrics_in_adapters
borisarzentar Feb 6, 2025
e842de6
Merge branch 'dev' into test/metrics_in_adapters
alekszievr Feb 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
neo4j metrics
  • Loading branch information
alekszievr committed Jan 30, 2025
commit 252ac7ff88c265c4455363f5df774957c3b8b980
2 changes: 1 addition & 1 deletion cognee/api/v1/cognify/cognify_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ async def get_default_tasks(
task_config={"batch_size": 10},
),
Task(add_data_points, only_root=True, task_config={"batch_size": 10}),
Task(store_descriptive_metrics, include_optional=True),
Task(store_descriptive_metrics),
]
except Exception as error:
send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id)
Expand Down
161 changes: 149 additions & 12 deletions cognee/infrastructure/databases/graph/neo4j_driver/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,16 +523,153 @@ async def get_filtered_graph_data(self, attribute_filters):

return (nodes, edges)

async def get_graph_metrics(self):
return {
"num_nodes": -1,
"num_edges": -1,
"mean_degree": -1,
"edge_density": -1,
"num_connected_components": -1,
"sizes_of_connected_components": -1,
"num_selfloops": -1,
"diameter": -1,
"avg_shortest_path_length": -1,
"avg_clustering": -1,
async def graph_exists(self, graph_name="myGraph"):
query = "CALL gds.graph.list() YIELD graphName RETURN collect(graphName) AS graphNames;"
result = await self.query(query)
graph_names = result[0]["graphNames"] if result else []
return graph_name in graph_names

async def project_entire_graph(self, graph_name="myGraph"):
"""
Projects all node labels and all relationship types into an in-memory GDS graph.
"""
if await self.graph_exists(graph_name):
return

node_labels_query = "CALL db.labels() YIELD label RETURN collect(label) AS labels;"
node_labels_result = await self.query(node_labels_query)
node_labels = node_labels_result[0]["labels"] if node_labels_result else []

relationship_types_query = "CALL db.relationshipTypes() YIELD relationshipType RETURN collect(relationshipType) AS relationships;"
relationship_types_result = await self.query(relationship_types_query)
relationship_types = (
relationship_types_result[0]["relationships"] if relationship_types_result else []
)

if not node_labels or not relationship_types:
raise ValueError("No node labels or relationship types found in the database.")

node_labels_str = "[" + ", ".join(f"'{label}'" for label in node_labels) + "]"
relationship_types_str = "[" + ", ".join(f"'{rel}'" for rel in relationship_types) + "]"

query = f"""
CALL gds.graph.project(
'{graph_name}',
{node_labels_str},
{relationship_types_str}
) YIELD graphName;
"""

await self.query(query)

async def drop_graph(self, graph_name="myGraph"):
if self.graph_exists(graph_name):
drop_query = f"CALL gds.graph.drop('{graph_name}');"
await self.query(drop_query)

async def get_graph_metrics(self, include_optional=False):
nodes, edges = await self.get_model_independent_graph_data()
graph_name = "myGraph"
await self.drop_graph(graph_name)
await self.project_entire_graph(graph_name)

async def _get_mean_degree():
query = """
MATCH (n)
OPTIONAL MATCH (n)-[r]-()
WITH n, COUNT(r) AS degree
RETURN avg(degree) AS mean_degree;
"""
result = await self.query(query)
return result[0]["mean_degree"] if result else 0

async def _get_edge_density():
query = """
MATCH (n)
WITH count(n) AS num_nodes
MATCH ()-[r]->()
WITH num_nodes, count(r) AS num_edges
RETURN CASE
WHEN num_nodes < 2 THEN 0
ELSE num_edges * 1.0 / (num_nodes * (num_nodes - 1))
END AS edge_density;
"""
result = await self.query(query)
return result[0]["edge_density"] if result else 0

async def _get_num_connected_components():
graph_name = "myGraph"
await self.drop_graph(graph_name)
await self.project_entire_graph(graph_name)

query = f"""
CALL gds.wcc.stream('{graph_name}')
YIELD componentId
RETURN count(DISTINCT componentId) AS num_connected_components;
"""

result = await self.query(query)
return result[0]["num_connected_components"] if result else 0

async def _get_size_of_connected_components():
graph_name = "myGraph"
await self.drop_graph(graph_name)
await self.project_entire_graph(graph_name)

query = f"""
CALL gds.wcc.stream('{graph_name}')
YIELD componentId
RETURN componentId, count(*) AS size
ORDER BY size DESC;
"""

result = await self.query(query)
return [record["size"] for record in result] if result else []

async def _count_self_loops():
query = """
MATCH (n)-[r]->(n)
RETURN count(r) AS self_loop_count;
"""
result = await self.query(query)
return result[0]["self_loop_count"] if result else 0

async def _get_diameter():
logging.warning("Diameter calculation is not implemented for neo4j.")
return -1

async def _get_avg_shortest_path_length():
logging.warning(
"Average shortest path length calculation is not implemented for neo4j."
)
return -1

async def _get_avg_clustering():
logging.warning("Average clustering calculation is not implemented for neo4j.")
return -1

mandatory_metrics = {
"num_nodes": len(nodes[0]["nodes"]),
"num_edges": len(edges[0]["elements"]),
"mean_degree": await _get_mean_degree(),
"edge_density": await _get_edge_density(),
"num_connected_components": await _get_num_connected_components(),
"sizes_of_connected_components": await _get_size_of_connected_components(),
}

if include_optional:
optional_metrics = {
"num_selfloops": await _count_self_loops(),
"diameter": await _get_diameter(),
"avg_shortest_path_length": await _get_avg_shortest_path_length(),
"avg_clustering": await _get_avg_clustering(),
}
else:
optional_metrics = {
"num_selfloops": -1,
"diameter": -1,
"avg_shortest_path_length": -1,
"avg_clustering": -1,
}

return mandatory_metrics | optional_metrics
1 change: 0 additions & 1 deletion cognee/tasks/storage/descriptive_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ async def store_descriptive_metrics(data_points: list[DataPoint], include_option

token_count_sum = await fetch_token_count(db_engine)
graph_metrics = await graph_engine.get_graph_metrics(include_optional)

table_name = "graph_metrics_table"
metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics

Expand Down
Loading