Skip to content

Commit 6841c83

Browse files
committed
fix: fixes cognify duplicated edges and resets the methods to an older version
1 parent b0eb9af commit 6841c83

File tree

2 files changed

+83
-110
lines changed

2 files changed

+83
-110
lines changed

cognee/modules/graph/utils/get_graph_from_model.py

Lines changed: 67 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,8 @@
11
from datetime import datetime, timezone
2-
32
from cognee.infrastructure.engine import DataPoint
43
from cognee.modules.storage.utils import copy_model
54

6-
7-
def get_graph_from_model(data_point: DataPoint, added_nodes=None, added_edges=None):
8-
9-
if not added_nodes:
10-
added_nodes = {}
11-
if not added_edges:
12-
added_edges = {}
13-
5+
def get_graph_from_model(data_point: DataPoint, include_root = True, added_nodes = {}, added_edges = {}):
146
nodes = []
157
edges = []
168

@@ -20,94 +12,87 @@ def get_graph_from_model(data_point: DataPoint, added_nodes=None, added_edges=No
2012
for field_name, field_value in data_point:
2113
if field_name == "_metadata":
2214
continue
23-
elif isinstance(field_value, DataPoint):
15+
16+
if isinstance(field_value, DataPoint):
2417
excluded_properties.add(field_name)
25-
nodes, edges, added_nodes, added_edges = add_nodes_and_edges(
26-
data_point,
27-
field_name,
28-
field_value,
29-
nodes,
30-
edges,
31-
added_nodes,
32-
added_edges,
33-
)
34-
35-
elif (
36-
isinstance(field_value, list)
37-
and len(field_value) > 0
38-
and isinstance(field_value[0], DataPoint)
39-
):
18+
19+
property_nodes, property_edges = get_graph_from_model(field_value, True, added_nodes, added_edges)
20+
21+
for node in property_nodes:
22+
if str(node.id) not in added_nodes:
23+
nodes.append(node)
24+
added_nodes[str(node.id)] = True
25+
26+
for edge in property_edges:
27+
edge_key = str(edge[0]) + str(edge[1]) + edge[2]
28+
29+
if str(edge_key) not in added_edges:
30+
edges.append(edge)
31+
added_edges[str(edge_key)] = True
32+
33+
for property_node in get_own_properties(property_nodes, property_edges):
34+
edge_key = str(data_point.id) + str(property_node.id) + field_name
35+
36+
if str(edge_key) not in added_edges:
37+
edges.append((data_point.id, property_node.id, field_name, {
38+
"source_node_id": data_point.id,
39+
"target_node_id": property_node.id,
40+
"relationship_name": field_name,
41+
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
42+
}))
43+
added_edges[str(edge_key)] = True
44+
continue
45+
46+
if isinstance(field_value, list) and len(field_value) > 0 and isinstance(field_value[0], DataPoint):
4047
excluded_properties.add(field_name)
4148

4249
for item in field_value:
43-
n_edges_before = len(edges)
44-
nodes, edges, added_nodes, added_edges = add_nodes_and_edges(
45-
data_point, field_name, item, nodes, edges, added_nodes, added_edges
46-
)
47-
edges = edges[:n_edges_before] + [
48-
(*edge[:3], {**edge[3], "metadata": {"type": "list"}})
49-
for edge in edges[n_edges_before:]
50-
]
51-
else:
52-
data_point_properties[field_name] = field_value
50+
property_nodes, property_edges = get_graph_from_model(item, True, added_nodes, added_edges)
51+
52+
for node in property_nodes:
53+
if str(node.id) not in added_nodes:
54+
nodes.append(node)
55+
added_nodes[str(node.id)] = True
56+
57+
for edge in property_edges:
58+
edge_key = str(edge[0]) + str(edge[1]) + edge[2]
59+
60+
if str(edge_key) not in added_edges:
61+
edges.append(edge)
62+
added_edges[edge_key] = True
63+
64+
for property_node in get_own_properties(property_nodes, property_edges):
65+
edge_key = str(data_point.id) + str(property_node.id) + field_name
66+
67+
if str(edge_key) not in added_edges:
68+
edges.append((data_point.id, property_node.id, field_name, {
69+
"source_node_id": data_point.id,
70+
"target_node_id": property_node.id,
71+
"relationship_name": field_name,
72+
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
73+
"metadata": {
74+
"type": "list"
75+
},
76+
}))
77+
added_edges[edge_key] = True
78+
continue
79+
80+
data_point_properties[field_name] = field_value
5381

5482
SimpleDataPointModel = copy_model(
5583
type(data_point),
56-
include_fields={
84+
include_fields = {
5785
"_metadata": (dict, data_point._metadata),
5886
},
59-
exclude_fields=excluded_properties,
87+
exclude_fields = excluded_properties,
6088
)
6189

62-
nodes.append(SimpleDataPointModel(**data_point_properties))
90+
if include_root:
91+
nodes.append(SimpleDataPointModel(**data_point_properties))
6392

6493
return nodes, edges
6594

6695

67-
def add_nodes_and_edges(
68-
data_point, field_name, field_value, nodes, edges, added_nodes, added_edges
69-
):
70-
71-
property_nodes, property_edges = get_graph_from_model(
72-
field_value, dict(added_nodes), dict(added_edges)
73-
)
74-
75-
for node in property_nodes:
76-
if str(node.id) not in added_nodes:
77-
nodes.append(node)
78-
added_nodes[str(node.id)] = True
79-
80-
for edge in property_edges:
81-
edge_key = str(edge[0]) + str(edge[1]) + edge[2]
82-
83-
if str(edge_key) not in added_edges:
84-
edges.append(edge)
85-
added_edges[str(edge_key)] = True
86-
87-
for property_node in get_own_properties(property_nodes, property_edges):
88-
edge_key = str(data_point.id) + str(property_node.id) + field_name
89-
90-
if str(edge_key) not in added_edges:
91-
edges.append(
92-
(
93-
data_point.id,
94-
property_node.id,
95-
field_name,
96-
{
97-
"source_node_id": data_point.id,
98-
"target_node_id": property_node.id,
99-
"relationship_name": field_name,
100-
"updated_at": datetime.now(timezone.utc).strftime(
101-
"%Y-%m-%d %H:%M:%S"
102-
),
103-
},
104-
)
105-
)
106-
added_edges[str(edge_key)] = True
107-
108-
return (nodes, edges, added_nodes, added_edges)
109-
110-
11196
def get_own_properties(property_nodes, property_edges):
11297
own_properties = []
11398

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,29 @@
1-
from typing import Callable
2-
31
from pydantic_core import PydanticUndefined
4-
52
from cognee.infrastructure.engine import DataPoint
63
from cognee.modules.storage.utils import copy_model
74

85

9-
def get_model_instance_from_graph(
10-
nodes: list[DataPoint],
11-
edges: list[tuple[str, str, str, dict[str, str]]],
12-
entity_id: str,
13-
):
14-
node_map = {node.id: node for node in nodes}
6+
def get_model_instance_from_graph(nodes: list[DataPoint], edges: list, entity_id: str):
7+
node_map = {}
158

16-
for source_node_id, target_node_id, edge_label, edge_properties in edges:
17-
source_node = node_map[source_node_id]
18-
target_node = node_map[target_node_id]
9+
for node in nodes:
10+
node_map[node.id] = node
11+
12+
for edge in edges:
13+
source_node = node_map[edge[0]]
14+
target_node = node_map[edge[1]]
15+
edge_label = edge[2]
16+
edge_properties = edge[3] if len(edge) == 4 else {}
1917
edge_metadata = edge_properties.get("metadata", {})
20-
edge_type = edge_metadata.get("type", "default")
18+
edge_type = edge_metadata.get("type")
2119

2220
if edge_type == "list":
23-
NewModel = copy_model(
24-
type(source_node),
25-
{edge_label: (list[type(target_node)], PydanticUndefined)},
26-
)
27-
source_node_dict = source_node.model_dump()
28-
source_node_edge_label_values = source_node_dict.get(edge_label, [])
29-
source_node_dict[edge_label] = source_node_edge_label_values + [target_node]
30-
31-
node_map[source_node_id] = NewModel(**source_node_dict)
21+
NewModel = copy_model(type(source_node), { edge_label: (list[type(target_node)], PydanticUndefined) })
22+
23+
node_map[edge[0]] = NewModel(**source_node.model_dump(), **{ edge_label: [target_node] })
3224
else:
33-
NewModel = copy_model(
34-
type(source_node), {edge_label: (type(target_node), PydanticUndefined)}
35-
)
25+
NewModel = copy_model(type(source_node), { edge_label: (type(target_node), PydanticUndefined) })
3626

37-
node_map[target_node_id] = NewModel(
38-
**source_node.model_dump(), **{edge_label: target_node}
39-
)
27+
node_map[edge[0]] = NewModel(**source_node.model_dump(), **{ edge_label: target_node })
4028

4129
return node_map[entity_id]

0 commit comments

Comments
 (0)