-
Notifications
You must be signed in to change notification settings - Fork 966
Neo4j base node label #903
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
a036bd0
c37b5da
0dc0c3c
1a9cf90
dab3c4e
97a499b
3e021ef
86d6ea4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
|
|
||
| logger = get_logger("Neo4jAdapter", level=ERROR) | ||
|
|
||
| BASE_LABEL = "__Node__" | ||
|
|
||
| class Neo4jAdapter(GraphDBInterface): | ||
| """ | ||
|
|
@@ -48,6 +49,12 @@ def __init__( | |
| graph_database_url, | ||
| auth=(graph_database_username, graph_database_password), | ||
| max_connection_lifetime=120, | ||
| notifications_min_severity="OFF", | ||
| ) | ||
| # Create contraint/index | ||
| self.query( | ||
| ("CREATE CONSTRAINT IF NOT EXISTS FOR " | ||
| f"(n:`{BASE_LABEL}`) REQUIRE n.id IS UNIQUE;") | ||
| ) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix async/await and trailing whitespace issues. Two issues to address:
Apply this diff to fix both issues: - notifications_min_severity="OFF",
- )
- # Create contraint/index
- self.query(
- ("CREATE CONSTRAINT IF NOT EXISTS FOR "
- f"(n:`{BASE_LABEL}`) REQUIRE n.id IS UNIQUE;")
+ notifications_min_severity="OFF",
)
+ # Create constraint/index asynchronously in first query call
+ self._constraint_created = FalseThen add this method to handle lazy constraint creation: async def _ensure_constraint(self):
if not self._constraint_created:
await self.query(
f"CREATE CONSTRAINT IF NOT EXISTS FOR (n:`{BASE_LABEL}`) REQUIRE n.id IS UNIQUE;"
)
self._constraint_created = TrueCall 🧰 Tools🪛 Pylint (3.3.7)[convention] 56-56: Trailing whitespace (C0303) 🤖 Prompt for AI Agents |
||
|
|
||
| @asynccontextmanager | ||
|
|
@@ -103,8 +110,8 @@ async def has_node(self, node_id: str) -> bool: | |
| - bool: True if the node exists, otherwise False. | ||
| """ | ||
| results = self.query( | ||
| """ | ||
| MATCH (n) | ||
| f""" | ||
| MATCH (n:`{BASE_LABEL}`) | ||
| WHERE n.id = $node_id | ||
| RETURN COUNT(n) > 0 AS node_exists | ||
| """, | ||
|
|
@@ -129,7 +136,7 @@ async def add_node(self, node: DataPoint): | |
| serialized_properties = self.serialize_properties(node.model_dump()) | ||
|
|
||
| query = dedent( | ||
| """MERGE (node {id: $node_id}) | ||
| f"""MERGE (node: `{BASE_LABEL}`{{id: $node_id}}) | ||
| ON CREATE SET node += $properties, node.updated_at = timestamp() | ||
| ON MATCH SET node += $properties, node.updated_at = timestamp() | ||
| WITH node, $node_label AS label | ||
|
|
@@ -161,9 +168,9 @@ async def add_nodes(self, nodes: list[DataPoint]) -> None: | |
|
|
||
| - None: None | ||
| """ | ||
| query = """ | ||
| query = f""" | ||
| UNWIND $nodes AS node | ||
| MERGE (n {id: node.node_id}) | ||
| MERGE (n: `{BASE_LABEL}`{{id: node.node_id}}) | ||
| ON CREATE SET n += node.properties, n.updated_at = timestamp() | ||
| ON MATCH SET n += node.properties, n.updated_at = timestamp() | ||
| WITH n, node.label AS label | ||
|
|
@@ -215,9 +222,9 @@ async def extract_nodes(self, node_ids: List[str]): | |
|
|
||
| A list of nodes represented as dictionaries. | ||
| """ | ||
| query = """ | ||
| query = f""" | ||
| UNWIND $node_ids AS id | ||
| MATCH (node {id: id}) | ||
| MATCH (node: `{BASE_LABEL}`{{id: id}}) | ||
| RETURN node""" | ||
|
|
||
| params = {"node_ids": node_ids} | ||
|
|
@@ -240,7 +247,7 @@ async def delete_node(self, node_id: str): | |
|
|
||
| The result of the query execution, typically indicating success or failure. | ||
| """ | ||
| query = "MATCH (node {id: $node_id}) DETACH DELETE node" | ||
| query = f"MATCH (node: `{BASE_LABEL}`{{id: $node_id}}) DETACH DELETE node" | ||
| params = {"node_id": node_id} | ||
|
|
||
| return await self.query(query, params) | ||
|
|
@@ -259,9 +266,9 @@ async def delete_nodes(self, node_ids: list[str]) -> None: | |
|
|
||
| - None: None | ||
| """ | ||
| query = """ | ||
| query = f""" | ||
| UNWIND $node_ids AS id | ||
| MATCH (node {id: id}) | ||
| MATCH (node: `{BASE_LABEL}`{{id: id}}) | ||
| DETACH DELETE node""" | ||
|
|
||
| params = {"node_ids": node_ids} | ||
|
|
@@ -284,16 +291,15 @@ async def has_edge(self, from_node: UUID, to_node: UUID, edge_label: str) -> boo | |
|
|
||
| - bool: True if the edge exists, otherwise False. | ||
| """ | ||
| query = """ | ||
| MATCH (from_node)-[relationship]->(to_node) | ||
| WHERE from_node.id = $from_node_id AND to_node.id = $to_node_id AND type(relationship) = $edge_label | ||
| query = f""" | ||
| MATCH (from_node: `{BASE_LABEL}`)-[:`{edge_label}`]->(to_node: `{BASE_LABEL}`) | ||
| WHERE from_node.id = $from_node_id AND to_node.id = $to_node_id | ||
| RETURN COUNT(relationship) > 0 AS edge_exists | ||
| """ | ||
|
|
||
| params = { | ||
| "from_node_id": str(from_node), | ||
| "to_node_id": str(to_node), | ||
| "edge_label": edge_label, | ||
| } | ||
|
Comment on lines
+294
to
303
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Security concern: Edge label should be parameterized to prevent Cypher injection. The edge label is now interpolated directly into the query string instead of being passed as a parameter. This could lead to Cypher injection vulnerabilities if the edge label comes from untrusted sources. Consider parameterizing the edge label or validating it against a whitelist of allowed labels before interpolation. 🤖 Prompt for AI Agents |
||
|
|
||
| edge_exists = await self.query(query, params) | ||
|
|
@@ -366,9 +372,9 @@ async def add_edge( | |
|
|
||
| query = dedent( | ||
| f"""\ | ||
| MATCH (from_node {{id: $from_node}}), | ||
| (to_node {{id: $to_node}}) | ||
| MERGE (from_node)-[r:{relationship_name}]->(to_node) | ||
| MATCH (from_node :`{BASE_LABEL}`{{id: $from_node}}), | ||
| (to_node :`{BASE_LABEL}`{{id: $to_node}}) | ||
| MERGE (from_node)-[r:`{relationship_name}`]->(to_node) | ||
|
Comment on lines
+375
to
+377
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Security concern: Relationship name should be parameterized to prevent Cypher injection. The relationship name is interpolated directly into the query string, creating a potential Cypher injection vulnerability. Consider parameterizing the relationship name or validating it against a whitelist before interpolation: - MERGE (from_node)-[r:`{relationship_name}`]->(to_node)
+ CALL apoc.merge.relationship(from_node, $relationship_name, {}, $properties, to_node) YIELD rel AS rAnd update the params to include the relationship name: params = {
"from_node": str(from_node),
"to_node": str(to_node),
- "relationship_name": relationship_name,
+ "relationship_name": relationship_name,
"properties": serialized_properties,
}
🤖 Prompt for AI Agents |
||
| ON CREATE SET r += $properties, r.updated_at = timestamp() | ||
| ON MATCH SET r += $properties, r.updated_at = timestamp() | ||
| RETURN r | ||
|
|
@@ -400,17 +406,17 @@ async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> | |
|
|
||
| - None: None | ||
| """ | ||
| query = """ | ||
| query = f""" | ||
| UNWIND $edges AS edge | ||
| MATCH (from_node {id: edge.from_node}) | ||
| MATCH (to_node {id: edge.to_node}) | ||
| MATCH (from_node: `{BASE_LABEL}`{{id: edge.from_node}}) | ||
| MATCH (to_node: `{BASE_LABEL}`{{id: edge.to_node}}) | ||
| CALL apoc.merge.relationship( | ||
| from_node, | ||
| edge.relationship_name, | ||
| { | ||
| {{ | ||
| source_node_id: edge.from_node, | ||
| target_node_id: edge.to_node | ||
| }, | ||
| }}, | ||
| edge.properties, | ||
| to_node | ||
| ) YIELD rel | ||
|
|
@@ -451,8 +457,8 @@ async def get_edges(self, node_id: str): | |
|
|
||
| A list of edges connecting to the specified node, represented as tuples of details. | ||
| """ | ||
| query = """ | ||
| MATCH (n {id: $node_id})-[r]-(m) | ||
| query = f""" | ||
| MATCH (n: `{BASE_LABEL}`{{id: $node_id}})-[r]-(m) | ||
| RETURN n, r, m | ||
| """ | ||
|
|
||
|
|
@@ -525,24 +531,23 @@ async def get_predecessors(self, node_id: str, edge_label: str = None) -> list[s | |
| - list[str]: A list of predecessor node IDs. | ||
| """ | ||
| if edge_label is not None: | ||
| query = """ | ||
| MATCH (node)<-[r]-(predecessor) | ||
| WHERE node.id = $node_id AND type(r) = $edge_label | ||
| query = f""" | ||
| MATCH (node: `{BASE_LABEL}`)<-[r:`{edge_label}`]-(predecessor) | ||
| WHERE node.id = $node_id | ||
|
Comment on lines
+534
to
+536
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Security concern: Edge label should be parameterized to prevent Cypher injection. The edge label is interpolated directly into the query string, creating a potential Cypher injection vulnerability. Consider using parameterized queries or validating the edge label against a whitelist before interpolation. 🤖 Prompt for AI Agents |
||
| RETURN predecessor | ||
| """ | ||
|
|
||
| results = await self.query( | ||
| query, | ||
| dict( | ||
| node_id=node_id, | ||
| edge_label=edge_label, | ||
| ), | ||
| ) | ||
|
|
||
| return [result["predecessor"] for result in results] | ||
| else: | ||
| query = """ | ||
| MATCH (node)<-[r]-(predecessor) | ||
| query = f""" | ||
| MATCH (node: `{BASE_LABEL}`)<-[r]-(predecessor) | ||
| WHERE node.id = $node_id | ||
| RETURN predecessor | ||
| """ | ||
|
|
@@ -572,9 +577,9 @@ async def get_successors(self, node_id: str, edge_label: str = None) -> list[str | |
| - list[str]: A list of successor node IDs. | ||
| """ | ||
| if edge_label is not None: | ||
| query = """ | ||
| MATCH (node)-[r]->(successor) | ||
| WHERE node.id = $node_id AND type(r) = $edge_label | ||
| query = f""" | ||
| MATCH (node: `{BASE_LABEL}`)-[r:`{edge_label}`]->(successor) | ||
| WHERE node.id = $node_id | ||
|
Comment on lines
+580
to
+582
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Security concern: Edge label should be parameterized to prevent Cypher injection. The edge label is interpolated directly into the query string, creating a potential Cypher injection vulnerability. Consider using parameterized queries or validating the edge label against a whitelist before interpolation. 🤖 Prompt for AI Agents |
||
| RETURN successor | ||
| """ | ||
|
|
||
|
|
@@ -588,8 +593,8 @@ async def get_successors(self, node_id: str, edge_label: str = None) -> list[str | |
|
|
||
| return [result["successor"] for result in results] | ||
| else: | ||
| query = """ | ||
| MATCH (node)-[r]->(successor) | ||
| query = f""" | ||
| MATCH (node: `{BASE_LABEL}`)-[r]->(successor) | ||
| WHERE node.id = $node_id | ||
| RETURN successor | ||
| """ | ||
|
|
@@ -634,8 +639,8 @@ async def get_node(self, node_id: str) -> Optional[Dict[str, Any]]: | |
| - Optional[Dict[str, Any]]: The requested node as a dictionary, or None if it does | ||
| not exist. | ||
| """ | ||
| query = """ | ||
| MATCH (node {id: $node_id}) | ||
| query = f""" | ||
| MATCH (node: `{BASE_LABEL}`{{id: $node_id}}) | ||
| RETURN node | ||
| """ | ||
| results = await self.query(query, {"node_id": node_id}) | ||
|
|
@@ -655,9 +660,9 @@ async def get_nodes(self, node_ids: List[str]) -> List[Dict[str, Any]]: | |
|
|
||
| - List[Dict[str, Any]]: A list of nodes represented as dictionaries. | ||
| """ | ||
| query = """ | ||
| query = f""" | ||
| UNWIND $node_ids AS id | ||
| MATCH (node {id: id}) | ||
| MATCH (node:`{BASE_LABEL}` {{id: id}}) | ||
| RETURN node | ||
| """ | ||
| results = await self.query(query, {"node_ids": node_ids}) | ||
|
|
@@ -677,13 +682,13 @@ async def get_connections(self, node_id: UUID) -> list: | |
|
|
||
| - list: A list of connections represented as tuples of details. | ||
| """ | ||
| predecessors_query = """ | ||
| MATCH (node)<-[relation]-(neighbour) | ||
| predecessors_query = f""" | ||
| MATCH (node:`{BASE_LABEL}`)<-[relation]-(neighbour) | ||
| WHERE node.id = $node_id | ||
| RETURN neighbour, relation, node | ||
| """ | ||
| successors_query = """ | ||
| MATCH (node)-[relation]->(neighbour) | ||
| successors_query = f""" | ||
| MATCH (node:`{BASE_LABEL}`)-[relation]->(neighbour) | ||
| WHERE node.id = $node_id | ||
| RETURN node, relation, neighbour | ||
| """ | ||
|
|
@@ -723,6 +728,7 @@ async def remove_connection_to_predecessors_of( | |
|
|
||
| - None: None | ||
| """ | ||
| # Not understanding | ||
| query = f""" | ||
| UNWIND $node_ids AS id | ||
| MATCH (node:`{id}`)-[r:{edge_label}]->(predecessor) | ||
|
|
@@ -751,6 +757,7 @@ async def remove_connection_to_successors_of( | |
|
|
||
| - None: None | ||
| """ | ||
| # Not understanding | ||
| query = f""" | ||
| UNWIND $node_ids AS id | ||
| MATCH (node:`{id}`)<-[r:{edge_label}]-(successor) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GA tests are passing, but I just found out that the indexing was actually never awaited, it's a minor thing. I can fix it after we merge.