Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: convert qdrant search results to ScoredPoint
  • Loading branch information
borisarzentar committed Nov 11, 2024
commit d733bfdf6a29b3c5d649a4df411dd1ca1850674b
24 changes: 17 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,24 +109,34 @@ import asyncio
from cognee.api.v1.search import SearchType

async def main():
await cognee.prune.prune_data() # Reset cognee data
await cognee.prune.prune_system(metadata=True) # Reset cognee system state
# Reset cognee data
await cognee.prune.prune_data()
# Reset cognee system state
await cognee.prune.prune_system(metadata=True)

text = """
Natural language processing (NLP) is an interdisciplinary
subfield of computer science and information retrieval.
"""

await cognee.add(text) # Add text to cognee
await cognee.cognify() # Use LLMs and cognee to create knowledge graph
# Add text to cognee
await cognee.add(text)

search_results = await cognee.search( # Search cognee for insights
# Use LLMs and cognee to create knowledge graph
await cognee.cognify()

# Search cognee for insights
search_results = await cognee.search(
SearchType.INSIGHTS,
{'query': 'Tell me about NLP'}
"Tell me about NLP",
)

for result_text in search_results: # Display results
# Display results
for result_text in search_results:
print(result_text)
# natural_language_processing is_a field
# natural_language_processing is_subfield_of computer_science
# natural_language_processing is_subfield_of information_retrieval

asyncio.run(main())
```
Expand Down
15 changes: 13 additions & 2 deletions cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging
from uuid import UUID
from typing import List, Dict, Optional
from qdrant_client import AsyncQdrantClient, models

from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
from cognee.infrastructure.engine import DataPoint
from ..vector_db_interface import VectorDBInterface
from ..embeddings.EmbeddingEngine import EmbeddingEngine
Expand Down Expand Up @@ -153,7 +155,7 @@ async def search(

client = self.get_qdrant_client()

result = await client.search(
results = await client.search(
collection_name = collection_name,
query_vector = models.NamedVector(
name = "text",
Expand All @@ -165,7 +167,16 @@ async def search(

await client.close()

return result
return [
ScoredResult(
id = UUID(result.id),
payload = {
**result.payload,
"id": UUID(result.id),
},
score = 1 - result.score,
) for result in results
]


async def batch_search(self, collection_name: str, query_texts: List[str], limit: int = None, with_vectors: bool = False):
Expand Down
1 change: 1 addition & 0 deletions cognee/modules/engine/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .generate_node_id import generate_node_id
from .generate_node_name import generate_node_name
from .generate_edge_name import generate_edge_name
2 changes: 2 additions & 0 deletions cognee/modules/engine/utils/generate_edge_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def generate_edge_name(name: str) -> str:
return name.lower().replace(" ", "_").replace("'", "")
Comment on lines +1 to +2
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add beartype runtime type checking

Given that this PR is specifically about testing beartype in the cognify pipeline, the function should implement runtime type checking using beartype.

Apply this diff:

+from beartype import beartype

+@beartype
 def generate_edge_name(name: str) -> str:
     return name.lower().replace(" ", "_").replace("'", "")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def generate_edge_name(name: str) -> str:
return name.lower().replace(" ", "_").replace("'", "")
from beartype import beartype
@beartype
def generate_edge_name(name: str) -> str:
return name.lower().replace(" ", "_").replace("'", "")

🛠️ Refactor suggestion

Add input validation and handle edge cases

The function should validate input and handle additional edge cases for more robust edge name generation.

Consider this improved implementation:

 def generate_edge_name(name: str) -> str:
+    if not name:
+        raise ValueError("Edge name cannot be empty")
+    # Replace multiple spaces with single underscore
+    # Handle both types of quotes and other common special characters
-    return name.lower().replace(" ", "_").replace("'", "")
+    return re.sub(r'\s+', '_', name.lower()).translate(str.maketrans("'\"", "  ", "!@#$%^&*()+={}[]|\\:;\"'<>,.?/"))

Committable suggestion skipped: line range outside the PR's diff.

2 changes: 1 addition & 1 deletion cognee/modules/engine/utils/generate_node_name.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def generate_node_name(name: str) -> str:
return name.lower().replace(" ", "_").replace("'", "")
return name.lower().replace("'", "")
6 changes: 3 additions & 3 deletions cognee/tasks/graph/extract_graph_from_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cognee.modules.data.extraction.knowledge_graph import extract_content_graph
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.engine.models import EntityType, Entity
from cognee.modules.engine.utils import generate_node_id, generate_node_name
from cognee.modules.engine.utils import generate_edge_name, generate_node_id, generate_node_name
from cognee.tasks.storage import add_data_points

async def extract_graph_from_data(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel]):
Expand Down Expand Up @@ -95,7 +95,7 @@ async def extract_graph_from_data(data_chunks: list[DocumentChunk], graph_model:
for edge in graph.edges:
source_node_id = generate_node_id(edge.source_node_id)
target_node_id = generate_node_id(edge.target_node_id)
relationship_name = generate_node_name(edge.relationship_name)
relationship_name = generate_edge_name(edge.relationship_name)

edge_key = str(source_node_id) + str(target_node_id) + relationship_name

Expand All @@ -105,7 +105,7 @@ async def extract_graph_from_data(data_chunks: list[DocumentChunk], graph_model:
target_node_id,
edge.relationship_name,
dict(
relationship_name = generate_node_name(edge.relationship_name),
relationship_name = generate_edge_name(edge.relationship_name),
source_node_id = source_node_id,
target_node_id = target_node_id,
),
Expand Down
2 changes: 1 addition & 1 deletion cognee/tests/test_qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ async def main():

from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
random_node = (await vector_engine.search("Entity_name", "AI"))[0]
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
random_node_name = random_node.payload["text"]

search_results = await cognee.search(SearchType.INSIGHTS, query = random_node_name)
Expand Down
2 changes: 1 addition & 1 deletion cognee/tests/test_weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def main():

from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
random_node = (await vector_engine.search("Entity_name", "AI"))[0]
random_node = (await vector_engine.search("Entity_name", "quantum computer"))[0]
random_node_name = random_node.payload["text"]

search_results = await cognee.search(SearchType.INSIGHTS, query = random_node_name)
Expand Down