Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
458eeac
Count the number of tokens in documents
alekszievr Jan 28, 2025
51eadef
Merge branch 'COG-970-refactor-tokenizing' into feat/cog-1071-input-t…
alekszievr Jan 28, 2025
ba608a4
Merge branch 'COG-970-refactor-tokenizing' into feat/cog-1071-input-t…
alekszievr Jan 28, 2025
f6663ab
save token count to relational db
alekszievr Jan 28, 2025
9182be8
Merge branch 'COG-970-refactor-tokenizing' into feat/cog-1132-add-num…
alekszievr Jan 28, 2025
72dfec4
Add metrics to metric table
alekszievr Jan 28, 2025
9bd5917
Merge branch 'dev' into feat/cog-1071-input-token-counting
dexters1 Jan 29, 2025
227d94e
Merge branch 'feat/cog-1071-input-token-counting' into feat/cog-1132-…
alekszievr Jan 29, 2025
22b6459
Store list as json instead of array in relational db table
alekszievr Jan 29, 2025
9764441
Merge branch 'dev' into feat/cog-1132-add-num-tokens-to-metric-table
alekszievr Jan 29, 2025
100e7d7
Sum in sql instead of python
alekszievr Jan 29, 2025
c182d47
Unify naming
alekszievr Jan 29, 2025
44fa2cd
Return data_points in descriptive metric calculation task
alekszievr Jan 29, 2025
06030ff
Graph metrics getter template in graph db interface and adapters
alekszievr Jan 29, 2025
67d9908
Calculate descriptive metrics in networkx adapter
alekszievr Jan 29, 2025
252ac7f
neo4j metrics
alekszievr Jan 29, 2025
48a51a3
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Jan 30, 2025
9a94db8
remove _table from table name
alekszievr Jan 30, 2025
57fb338
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Jan 31, 2025
e8dcef1
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Feb 1, 2025
b0f6ba7
Merge branch 'dev' into feat/cog-1082-metrics-in-graphdb-interface
alekszievr Feb 3, 2025
05138fa
Use modules for adding to db instead of infrastructure
alekszievr Feb 3, 2025
f064f52
Merge branch 'feat/cog-1082-metrics-in-graphdb-interface' into feat/c…
alekszievr Feb 3, 2025
c9ee1bc
Merge branch 'feat/cog-1082-metrics-in-networkx-adapter' into feat/co…
alekszievr Feb 3, 2025
af8e798
Merge branch 'dev' into feat/cog-1082-metrics-in-networkx-adapter
alekszievr Feb 3, 2025
406057f
Merge branch 'feat/cog-1082-metrics-in-networkx-adapter' into feat/co…
alekszievr Feb 3, 2025
d93b5f5
minor fixes
alekszievr Feb 3, 2025
c13fdec
minor cleanup
alekszievr Feb 3, 2025
f2ad1d4
Merge branch 'dev' into feat/cog-1082-metrics-in-neo4j-adapter
alekszievr Feb 3, 2025
3e67828
Remove graph metric calculation from the default cognify pipeline
alekszievr Feb 4, 2025
34ce4f8
descriptive metrics tests
alekszievr Feb 5, 2025
1bc55f9
networkx metrics test
alekszievr Feb 5, 2025
c102f26
all descriptive metrics tests
alekszievr Feb 5, 2025
92ae1d0
Merge branch 'dev' into test/metrics_in_adapters
alekszievr Feb 5, 2025
eddfef0
remove neo4j metrics test due to lack of gds plugin
alekszievr Feb 5, 2025
eb63421
Merge branch 'dev' into test/metrics_in_adapters
borisarzentar Feb 6, 2025
e842de6
Merge branch 'dev' into test/metrics_in_adapters
alekszievr Feb 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
descriptive metrics tests
  • Loading branch information
alekszievr committed Feb 5, 2025
commit 34ce4f847cd1479b35db3c7e807f1ea9df2aed9e
13 changes: 13 additions & 0 deletions cognee/tests/tasks/descriptive_metrics/metric_consistency_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from cognee.tests.tasks.descriptive_metrics.networkx_metrics_test import get_networkx_metrics
from cognee.tests.tasks.descriptive_metrics.neo4j_metrics_test import get_neo4j_metrics
import asyncio


async def check_graph_metrics_consistency_across_adapters():
neo4j_metrics = await get_neo4j_metrics(include_optional=False)
networkx_metrics = await get_networkx_metrics(include_optional=False)
assert networkx_metrics == neo4j_metrics


if __name__ == "__main__":
asyncio.run(check_graph_metrics_consistency_across_adapters())
25 changes: 25 additions & 0 deletions cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from cognee.tests.unit.interfaces.graph.get_graph_from_model_test import (
Document,
DocumentChunk,
Entity,
EntityType,
)
from cognee.tasks.storage.add_data_points import add_data_points


async def create_disconnected_test_graph():
doc = Document(path="test/path")
doc_chunk = DocumentChunk(part_of=doc, text="This is a chunk of text", contains=[])
entity_type = EntityType(name="Person")
entity = Entity(name="Alice", is_type=entity_type)
entity2 = Entity(name="Alice2", is_type=entity_type)
# the following self-loop is intentional and serves the purpose of testing the self-loop counting functionality
doc_chunk.contains.extend([entity, entity2, doc_chunk])

doc2 = Document(path="test/path2")
doc_chunk2 = DocumentChunk(part_of=doc2, text="This is a chunk of text", contains=[])
entity_type2 = EntityType(name="Person")
entity3 = Entity(name="Bob", is_type=entity_type2)
doc_chunk2.contains.extend([entity3])

await add_data_points([doc_chunk, doc_chunk2])
42 changes: 42 additions & 0 deletions cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
from cognee.infrastructure.databases.graph import get_graph_engine
import cognee
import asyncio
import pytest


async def get_neo4j_metrics(include_optional=True):
create_graph_engine.cache_clear()
cognee.config.set_graph_database_provider("neo4j")
graph_engine = await get_graph_engine()
await graph_engine.delete_graph()
await create_disconnected_test_graph()
neo4j_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional)
return neo4j_graph_metrics


@pytest.mark.asyncio
async def test_neo4j_metrics():
neo4j_metrics = await get_neo4j_metrics(include_optional=True)
assert neo4j_metrics["num_nodes"] == 9, f"Expected 9 nodes, got {neo4j_metrics['num_nodes']}"
assert neo4j_metrics["num_edges"] == 9, f"Expected 9 edges, got {neo4j_metrics['num_edges']}"
assert neo4j_metrics["mean_degree"] == 2, (
f"Expected mean degree is 2, got {neo4j_metrics['mean_degree']}"
)
assert neo4j_metrics["edge_density"] == 0.125, (
f"Expected edge density is 0.125, got {neo4j_metrics['edge_density']}"
)
assert neo4j_metrics["num_connected_components"] == 2, (
f"Expected 2 connected components, got {neo4j_metrics['num_connected_components']}"
)
assert neo4j_metrics["sizes_of_connected_components"] == [5, 4], (
f"Expected connected components of size [5, 4], got {neo4j_metrics['sizes_of_connected_components']}"
)
assert neo4j_metrics["num_selfloops"] == 1, (
f"Expected 1 self-loop, got {neo4j_metrics['num_selfloops']}"
)


if __name__ == "__main__":
asyncio.run(test_neo4j_metrics())
53 changes: 53 additions & 0 deletions cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
from cognee.infrastructure.databases.graph import get_graph_engine
import cognee
import asyncio


async def get_networkx_metrics(include_optional=True):
create_graph_engine.cache_clear()
cognee.config.set_graph_database_provider("networkx")
graph_engine = await get_graph_engine()
await graph_engine.delete_graph()
await create_disconnected_test_graph()
networkx_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional)
return networkx_graph_metrics


async def assert_networkx_metrics():
networkx_metrics = await get_networkx_metrics(include_optional=True)
assert networkx_metrics["num_nodes"] == 9, (
f"Expected 9 nodes, got {networkx_metrics['num_nodes']}"
)
assert networkx_metrics["num_edges"] == 9, (
f"Expected 9 edges, got {networkx_metrics['num_edges']}"
)
assert networkx_metrics["mean_degree"] == 2, (
f"Expected mean degree is 2, got {networkx_metrics['mean_degree']}"
)
assert networkx_metrics["edge_density"] == 0.125, (
f"Expected edge density is 0.125, got {networkx_metrics['edge_density']}"
)
assert networkx_metrics["num_connected_components"] == 2, (
f"Expected 2 connected components, got {networkx_metrics['num_connected_components']}"
)
assert networkx_metrics["sizes_of_connected_components"] == [5, 4], (
f"Expected connected components of size [5, 4], got {networkx_metrics['sizes_of_connected_components']}"
)
assert networkx_metrics["num_selfloops"] == 1, (
f"Expected 1 self-loop, got {networkx_metrics['num_selfloops']}"
)
assert networkx_metrics["diameter"] is None, (
f"Diameter should be None for disconnected graphs, got {networkx_metrics['diameter']}"
)
assert networkx_metrics["avg_shortest_path_length"] is None, (
f"Average shortest path should be None for disconnected graphs, got {networkx_metrics['avg_shortest_path_length']}"
)
assert networkx_metrics["avg_clustering"] == 0, (
f"Expected 0 average clustering, got {networkx_metrics['avg_clustering']}"
)


if __name__ == "__main__":
asyncio.run(assert_networkx_metrics())