diff --git a/.github/workflows/test_descriptive_graph_metrics.yml b/.github/workflows/test_descriptive_graph_metrics.yml new file mode 100644 index 0000000000..91165fec64 --- /dev/null +++ b/.github/workflows/test_descriptive_graph_metrics.yml @@ -0,0 +1,28 @@ +name: test | descriptive graph metrics + +on: + workflow_dispatch: + pull_request: + types: [labeled, synchronize] + + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + run_networkx_metrics_test: + uses: ./.github/workflows/reusable_python_example.yml + with: + example-location: ./cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py + secrets: + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }} + GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }} diff --git a/cognee/tests/tasks/descriptive_metrics/metric_consistency_test.py b/cognee/tests/tasks/descriptive_metrics/metric_consistency_test.py new file mode 100644 index 0000000000..e9afc02890 --- /dev/null +++ b/cognee/tests/tasks/descriptive_metrics/metric_consistency_test.py @@ -0,0 +1,13 @@ +from cognee.tests.tasks.descriptive_metrics.networkx_metrics_test import get_networkx_metrics +from cognee.tests.tasks.descriptive_metrics.neo4j_metrics_test import get_neo4j_metrics +import asyncio + + +async def check_graph_metrics_consistency_across_adapters(): + neo4j_metrics = await get_neo4j_metrics(include_optional=False) + networkx_metrics = await get_networkx_metrics(include_optional=False) + assert networkx_metrics == neo4j_metrics + + +if __name__ == "__main__": + asyncio.run(check_graph_metrics_consistency_across_adapters()) diff --git a/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py b/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py new file mode 100644 index 0000000000..bf4bd3a181 --- /dev/null +++ b/cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py @@ -0,0 +1,25 @@ +from cognee.tests.unit.interfaces.graph.get_graph_from_model_test import ( + Document, + DocumentChunk, + Entity, + EntityType, +) +from cognee.tasks.storage.add_data_points import add_data_points + + +async def create_disconnected_test_graph(): + doc = Document(path="test/path") + doc_chunk = DocumentChunk(part_of=doc, text="This is a chunk of text", contains=[]) + entity_type = EntityType(name="Person") + entity = Entity(name="Alice", is_type=entity_type) + entity2 = Entity(name="Alice2", is_type=entity_type) + # the following self-loop is intentional and serves the purpose of testing the self-loop counting functionality + doc_chunk.contains.extend([entity, entity2, doc_chunk]) + + doc2 = Document(path="test/path2") + doc_chunk2 = DocumentChunk(part_of=doc2, text="This is a chunk of text", contains=[]) + entity_type2 = EntityType(name="Person") + entity3 = Entity(name="Bob", is_type=entity_type2) + doc_chunk2.contains.extend([entity3]) + + await add_data_points([doc_chunk, doc_chunk2]) diff --git a/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py b/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py new file mode 100644 index 0000000000..2388f4b6d5 --- /dev/null +++ b/cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py @@ -0,0 +1,42 @@ +from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph +from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine +from cognee.infrastructure.databases.graph import get_graph_engine +import cognee +import asyncio +import pytest + + +async def get_neo4j_metrics(include_optional=True): + create_graph_engine.cache_clear() + cognee.config.set_graph_database_provider("neo4j") + graph_engine = await get_graph_engine() + await graph_engine.delete_graph() + await create_disconnected_test_graph() + neo4j_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional) + return neo4j_graph_metrics + + +@pytest.mark.asyncio +async def test_neo4j_metrics(): + neo4j_metrics = await get_neo4j_metrics(include_optional=True) + assert neo4j_metrics["num_nodes"] == 9, f"Expected 9 nodes, got {neo4j_metrics['num_nodes']}" + assert neo4j_metrics["num_edges"] == 9, f"Expected 9 edges, got {neo4j_metrics['num_edges']}" + assert neo4j_metrics["mean_degree"] == 2, ( + f"Expected mean degree is 2, got {neo4j_metrics['mean_degree']}" + ) + assert neo4j_metrics["edge_density"] == 0.125, ( + f"Expected edge density is 0.125, got {neo4j_metrics['edge_density']}" + ) + assert neo4j_metrics["num_connected_components"] == 2, ( + f"Expected 2 connected components, got {neo4j_metrics['num_connected_components']}" + ) + assert neo4j_metrics["sizes_of_connected_components"] == [5, 4], ( + f"Expected connected components of size [5, 4], got {neo4j_metrics['sizes_of_connected_components']}" + ) + assert neo4j_metrics["num_selfloops"] == 1, ( + f"Expected 1 self-loop, got {neo4j_metrics['num_selfloops']}" + ) + + +if __name__ == "__main__": + asyncio.run(test_neo4j_metrics()) diff --git a/cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py b/cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py new file mode 100644 index 0000000000..239dd9bf85 --- /dev/null +++ b/cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py @@ -0,0 +1,53 @@ +from cognee.tests.tasks.descriptive_metrics.metrics_test_utils import create_disconnected_test_graph +from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine +from cognee.infrastructure.databases.graph import get_graph_engine +import cognee +import asyncio + + +async def get_networkx_metrics(include_optional=True): + create_graph_engine.cache_clear() + cognee.config.set_graph_database_provider("networkx") + graph_engine = await get_graph_engine() + await graph_engine.delete_graph() + await create_disconnected_test_graph() + networkx_graph_metrics = await graph_engine.get_graph_metrics(include_optional=include_optional) + return networkx_graph_metrics + + +async def assert_networkx_metrics(): + networkx_metrics = await get_networkx_metrics(include_optional=True) + assert networkx_metrics["num_nodes"] == 9, ( + f"Expected 9 nodes, got {networkx_metrics['num_nodes']}" + ) + assert networkx_metrics["num_edges"] == 9, ( + f"Expected 9 edges, got {networkx_metrics['num_edges']}" + ) + assert networkx_metrics["mean_degree"] == 2, ( + f"Expected mean degree is 2, got {networkx_metrics['mean_degree']}" + ) + assert networkx_metrics["edge_density"] == 0.125, ( + f"Expected edge density is 0.125, got {networkx_metrics['edge_density']}" + ) + assert networkx_metrics["num_connected_components"] == 2, ( + f"Expected 2 connected components, got {networkx_metrics['num_connected_components']}" + ) + assert networkx_metrics["sizes_of_connected_components"] == [5, 4], ( + f"Expected connected components of size [5, 4], got {networkx_metrics['sizes_of_connected_components']}" + ) + assert networkx_metrics["num_selfloops"] == 1, ( + f"Expected 1 self-loop, got {networkx_metrics['num_selfloops']}" + ) + assert networkx_metrics["diameter"] is None, ( + f"Diameter should be None for disconnected graphs, got {networkx_metrics['diameter']}" + ) + assert networkx_metrics["avg_shortest_path_length"] is None, ( + f"Average shortest path should be None for disconnected graphs, got {networkx_metrics['avg_shortest_path_length']}" + ) + assert networkx_metrics["avg_clustering"] == 0, ( + f"Expected 0 average clustering, got {networkx_metrics['avg_clustering']}" + ) + + +if __name__ == "__main__": + asyncio.run(assert_networkx_metrics())