diff --git a/cognee/modules/search/utils/prepare_search_result.py b/cognee/modules/search/utils/prepare_search_result.py index b854a318d0..cbaf760b25 100644 --- a/cognee/modules/search/utils/prepare_search_result.py +++ b/cognee/modules/search/utils/prepare_search_result.py @@ -1,5 +1,6 @@ -from typing import List, cast -from uuid import uuid5, NAMESPACE_OID +import json +from typing import Any, List, Tuple, cast +from uuid import NAMESPACE_OID, uuid5 from cognee.modules.graph.utils import resolve_edges_to_text from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge @@ -8,9 +9,33 @@ from cognee.modules.search.utils.transform_insights_to_graph import transform_insights_to_graph +def _normalize_tuple_rows(rows: List[Tuple[Any, ...]]) -> List[Tuple[Any, ...]]: + """Convert tuple rows returned by graph queries into JSON dict tuples.""" + normalized: List[Tuple[Any, ...]] = [] + for row in rows: + normalized_row: list[Any] = [] + for column in row: + if isinstance(column, dict): + normalized_row.append(column) + elif isinstance(column, str): + try: + normalized_row.append(json.loads(column)) + except json.JSONDecodeError: + normalized_row.append({"value": column}) + else: + normalized_row.append(column) + normalized.append(tuple(normalized_row)) + return normalized + + async def prepare_search_result(search_result): results, context, datasets = search_result + if isinstance(context, list) and context and isinstance(context[0], tuple): + context = _normalize_tuple_rows(context) + if isinstance(results, list) and results and isinstance(results[0], tuple): + results = _normalize_tuple_rows(results) + graphs = None result_graph = None context_texts = {} @@ -27,6 +52,8 @@ async def prepare_search_result(search_result): isinstance(context, List) and len(context) > 0 and isinstance(context[0], tuple) + and len(context[0]) > 1 + and isinstance(context[0][1], dict) and context[0][1].get("relationship_name") ): context_graph = transform_insights_to_graph(context) @@ -35,13 +62,16 @@ async def prepare_search_result(search_result): } results = None elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge): - context_graph = transform_context_to_graph(context) + edge_context = cast(List[Edge], context) + context_graph = transform_context_to_graph(edge_context) graphs = { ", ".join([dataset.name for dataset in datasets]): context_graph, } context_texts = { - ", ".join([dataset.name for dataset in datasets]): await resolve_edges_to_text(context), + ", ".join([dataset.name for dataset in datasets]): await resolve_edges_to_text( + edge_context + ), } elif isinstance(context, str): context_texts = { @@ -53,7 +83,8 @@ async def prepare_search_result(search_result): } if isinstance(results, List) and len(results) > 0 and isinstance(results[0], Edge): - result_graph = transform_context_to_graph(results) + edge_results = cast(List[Edge], results) + result_graph = transform_context_to_graph(edge_results) return { "result": result_graph or results[0] if results and len(results) == 1 else results, diff --git a/cognee/tests/test_cypher_search.py b/cognee/tests/test_cypher_search.py new file mode 100644 index 0000000000..9d9aa7297d --- /dev/null +++ b/cognee/tests/test_cypher_search.py @@ -0,0 +1,135 @@ +import json +import os +import pathlib + +import cognee +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.infrastructure.databases.relational import create_db_and_tables +from cognee.modules.search.types import SearchType + + +async def main(): + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_cypher_search") + ).resolve() + ) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_cypher_search") + ).resolve() + ) + cognee.config.system_root_directory(cognee_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + await create_db_and_tables() + + graph_engine = await get_graph_engine() + + now = "2025-11-05 00:00:00" + person_props = json.dumps({"name": "Alice"}) + project_props = json.dumps({"name": "Apollo"}) + + await graph_engine.query( + """ + CREATE (p:Node { + id: 'person-1', + name: 'Alice', + type: 'Person', + properties: $person_props, + created_at: timestamp($now), + updated_at: timestamp($now) + }) + """, + {"now": now, "person_props": person_props}, + ) + + await graph_engine.query( + """ + CREATE (p:Node { + id: 'project-1', + name: 'Apollo', + type: 'Project', + properties: $project_props, + created_at: timestamp($now), + updated_at: timestamp($now) + }) + """, + {"now": now, "project_props": project_props}, + ) + + await graph_engine.query( + """ + MATCH (person:Node {id: 'person-1'}), (project:Node {id: 'project-1'}) + MERGE (person)-[r:EDGE {relationship_name: 'WORKS_ON'}]->(project) + ON CREATE SET + r.created_at = timestamp($now), + r.updated_at = timestamp($now), + r.properties = '{}' + ON MATCH SET + r.updated_at = timestamp($now), + r.properties = '{}' + """, + {"now": now}, + ) + + multi_column_raw = await cognee.search( + query_type=SearchType.CYPHER, + query_text=""" + MATCH (p:Node {id: 'person-1'})-[:EDGE {relationship_name: 'WORKS_ON'}]->(proj:Node {id: 'project-1'}) + RETURN p.properties AS person_properties, proj.properties AS project_properties + """, + ) + assert isinstance(multi_column_raw, list) + assert multi_column_raw, "Search returned no rows" + + assert len(multi_column_raw) == 1 + person_raw, project_raw = multi_column_raw[0] + assert isinstance(person_raw, str) + assert isinstance(project_raw, str) + + person_props_result = json.loads(person_raw) + project_props_result = json.loads(project_raw) + assert person_props_result.get("name") == "Alice" + assert project_props_result.get("name") == "Apollo" + + single_column_raw = await cognee.search( + query_type=SearchType.CYPHER, + query_text=""" + MATCH (p:Node {id: 'person-1'})-[:EDGE {relationship_name: 'WORKS_ON'}]->(proj:Node {id: 'project-1'}) + RETURN DISTINCT proj.properties AS project_properties + """, + ) + assert isinstance(single_column_raw, list) + assert single_column_raw, "Search returned no rows" + + assert len(single_column_raw) == 1 + (project_raw,) = single_column_raw[0] + assert isinstance(project_raw, str) + + project_only_props = json.loads(project_raw) + assert project_only_props.get("name") == "Apollo" + + context_only_raw = await cognee.search( + query_type=SearchType.CYPHER, + query_text=""" + MATCH (p:Node {id: 'person-1'})-[:EDGE {relationship_name: 'WORKS_ON'}]->(proj:Node {id: 'project-1'}) + RETURN DISTINCT proj.properties AS project_properties + """, + only_context=True, + ) + assert isinstance(context_only_raw, list) + assert context_only_raw, "Context search returned no rows" + + assert len(context_only_raw) == 1 + context_entry = context_only_raw[0] + assert isinstance(context_entry, dict) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main())