Skip to content
8 changes: 8 additions & 0 deletions cognee-frontend/src/modules/chat/getHistory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { fetch } from '@/utils';

export default function getHistory() {
return fetch(
'/v1/search',
)
Comment on lines +4 to +6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider adding query parameters for pagination and filtering.

Search history could grow large. Consider implementing pagination and filtering capabilities.

Here's a suggested implementation:

+interface HistoryParams {
+  page?: number;
+  limit?: number;
+  startDate?: string;
+  endDate?: string;
+}
+
-export default function getHistory() {
+export default function getHistory(params?: HistoryParams) {
+  const queryParams = new URLSearchParams();
+  if (params?.page) queryParams.append('page', params.page.toString());
+  if (params?.limit) queryParams.append('limit', params.limit.toString());
+  if (params?.startDate) queryParams.append('startDate', params.startDate);
+  if (params?.endDate) queryParams.append('endDate', params.endDate);
+
   return fetch(
-    '/v1/search',
+    `/v1/search${queryParams.toString() ? `?${queryParams.toString()}` : ''}`,
   )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return fetch(
'/v1/search',
)
interface HistoryParams {
page?: number;
limit?: number;
startDate?: string;
endDate?: string;
}
export default function getHistory(params?: HistoryParams) {
const queryParams = new URLSearchParams();
if (params?.page) queryParams.append('page', params.page.toString());
if (params?.limit) queryParams.append('limit', params.limit.toString());
if (params?.startDate) queryParams.append('startDate', params.startDate);
if (params?.endDate) queryParams.append('endDate', params.endDate);
return fetch(
`/v1/search${queryParams.toString() ? `?${queryParams.toString()}` : ''}`,
)

.then((response) => response.json());
}
15 changes: 13 additions & 2 deletions cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
'use client';

import { v4 } from 'uuid';
import classNames from 'classnames';
import { useCallback, useState } from 'react';
import { useCallback, useEffect, useState } from 'react';
import { CTAButton, Stack, Text, DropdownSelect, TextArea, useBoolean } from 'ohmy-ui';
import { fetch } from '@/utils';
import styles from './SearchView.module.css';
import getHistory from '@/modules/chat/getHistory';

interface Message {
id: string;
Expand Down Expand Up @@ -52,6 +55,14 @@ export default function SearchView() {
}, 300);
}, []);

useEffect(() => {
getHistory()
.then((history) => {
setMessages(history);
scrollToBottom();
});
}, [scrollToBottom]);
Comment on lines +58 to +64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling and loading state management

The current implementation has several potential issues:

  1. No error handling for failed history fetches
  2. Missing loading state indicator
  3. Potential memory leak if component unmounts during fetch
  4. Unnecessary re-renders due to scrollToBottom dependency

Consider this improved implementation:

+ const [isLoading, setIsLoading] = useState(false);
  useEffect(() => {
+   let mounted = true;
+   setIsLoading(true);
    getHistory()
      .then((history) => {
+       if (mounted) {
          setMessages(history);
          scrollToBottom();
+       }
      })
+     .catch((error) => {
+       console.error('Failed to fetch chat history:', error);
+       // Consider showing a user-friendly error message
+     })
+     .finally(() => {
+       if (mounted) {
+         setIsLoading(false);
+       }
+     });
+   return () => {
+     mounted = false;
+   };
- }, [scrollToBottom]);
+ }, []); // Remove scrollToBottom from dependencies
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
useEffect(() => {
getHistory()
.then((history) => {
setMessages(history);
scrollToBottom();
});
}, [scrollToBottom]);
const [isLoading, setIsLoading] = useState(false);
useEffect(() => {
let mounted = true;
setIsLoading(true);
getHistory()
.then((history) => {
if (mounted) {
setMessages(history);
scrollToBottom();
}
})
.catch((error) => {
console.error('Failed to fetch chat history:', error);
// Consider showing a user-friendly error message
})
.finally(() => {
if (mounted) {
setIsLoading(false);
}
});
return () => {
mounted = false;
};
}, []); // Remove scrollToBottom from dependencies


const handleSearchSubmit = useCallback((event: React.FormEvent<HTMLFormElement>) => {
event.preventDefault();

Expand All @@ -78,7 +89,7 @@ export default function SearchView() {
'Content-Type': 'application/json',
},
body: JSON.stringify({
query: inputValue,
query: inputValue.trim(),
searchType: searchTypeValue,
}),
})
Expand Down
2 changes: 1 addition & 1 deletion cognee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .api.v1.add import add
from .api.v1.cognify import cognify
from .api.v1.datasets.datasets import datasets
from .api.v1.search import search, SearchType
from .api.v1.search import search, SearchType, get_search_history
from .api.v1.prune import prune

# Pipelines
Expand Down
19 changes: 19 additions & 0 deletions cognee/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from cognee.api.DTO import InDTO, OutDTO
from cognee.api.v1.search import SearchType
from cognee.modules.search.operations import get_history
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.pipelines.models import PipelineRunStatus
Expand Down Expand Up @@ -350,6 +351,24 @@ async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticat
content = {"error": str(error)}
)

class SearchHistoryItem(OutDTO):
id: UUID
text: str
user: str
created_at: datetime

@app.get("/api/v1/search", response_model = list[SearchHistoryItem])
async def get_search_history(user: User = Depends(get_authenticated_user)):
try:
history = await get_history(user.id)

return history
except Exception as error:
return JSONResponse(
status_code = 500,
content = {"error": str(error)}
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Enhance error handling and add logging.

Several improvements could be made to align with the codebase patterns and PR objectives:

  1. Error response structure should match other endpoints (e.g., see /api/v1/datasets).
  2. Add logging to track API usage and errors.
  3. Consider using more specific error handling.

Here's the suggested implementation:

 @app.get("/api/v1/search", response_model = list[SearchHistoryItem])
 async def get_search_history(user: User = Depends(get_authenticated_user)):
+    logger.info(f"Retrieving search history for user: {user.id}")
     try:
         history = await get_history(user.id)
+        logger.info(f"Retrieved {len(history)} search history items")
         return history
     except Exception as error:
+        logger.error(f"Error retrieving search history: {str(error)}")
-        return JSONResponse(
-            status_code = 500,
-            content = {"error": str(error)}
-        )
+        raise HTTPException(
+            status_code = 500,
+            detail = f"Error retrieving search history: {str(error)}"
+        ) from error

Committable suggestion was skipped due to low confidence.


from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig

class LLMConfigDTO(OutDTO, LLMConfig):
Expand Down
1 change: 1 addition & 0 deletions cognee/api/v1/search/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .search_v2 import search, SearchType
from .get_search_history import get_search_history
9 changes: 9 additions & 0 deletions cognee/api/v1/search/get_search_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User

async def get_search_history(user: User = None) -> list:
if not user:
user = await get_default_user()

return await get_history(user.id)
16 changes: 11 additions & 5 deletions cognee/api/v1/search/search_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
from uuid import UUID
from enum import Enum
from typing import Callable, Dict
from cognee.modules.search.operations import log_query, log_result
from cognee.shared.utils import send_telemetry
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_default_user
Expand All @@ -14,15 +16,17 @@ class SearchType(Enum):
INSIGHTS = "INSIGHTS"
CHUNKS = "CHUNKS"

async def search(search_type: SearchType, query: str, user: User = None) -> list:
async def search(query_type: SearchType, query_text: str, user: User = None) -> list:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we maybe rename SearchType to QueryType?

if user is None:
user = await get_default_user()

if user is None:
raise PermissionError("No user found in the system. Please create a user.")

query = await log_query(query_text, str(query_type), user.id)

own_document_ids = await get_document_ids_for_user(user.id)
search_results = await specific_search(search_type, query, user)
search_results = await specific_search(query_type, query_text, user)

filtered_search_results = []

Expand All @@ -33,19 +37,21 @@ async def search(search_type: SearchType, query: str, user: User = None) -> list
if document_id is None or document_id in own_document_ids:
filtered_search_results.append(search_result)

await log_result(query.id, json.dumps(filtered_search_results), user.id)

return filtered_search_results

async def specific_search(search_type: SearchType, query: str, user) -> list:
async def specific_search(query_type: SearchType, query: str, user) -> list:
search_tasks: Dict[SearchType, Callable] = {
SearchType.SUMMARIES: query_summaries,
SearchType.INSIGHTS: query_graph_connections,
SearchType.CHUNKS: query_chunks,
}

search_task = search_tasks.get(search_type)
search_task = search_tasks.get(query_type)

if search_task is None:
raise ValueError(f"Unsupported search type: {search_type}")
raise ValueError(f"Unsupported search type: {query_type}")

send_telemetry("cognee.search EXECUTION STARTED", user.id)

Expand Down
16 changes: 16 additions & 0 deletions cognee/modules/search/models/Query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from uuid import uuid4
from datetime import datetime, timezone
from sqlalchemy import Column, DateTime, String
from cognee.infrastructure.databases.relational import Base, UUID

class Query(Base):
__tablename__ = "queries"

id = Column(UUID, primary_key = True, default = uuid4)

text = Column(String)
query_type = Column(String)
user_id = Column(UUID)
Comment on lines +11 to +13
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add constraints for data integrity and security.

Several improvements could be made to enhance data integrity and security:

  1. Add a maximum length constraint to the text column to prevent potential DoS attacks
  2. Add an enum or check constraint for query_type to ensure valid values
  3. Add a foreign key constraint and index for user_id
-    text = Column(String)
-    query_type = Column(String)
-    user_id = Column(UUID)
+    text = Column(String(1024), nullable=False)
+    query_type = Column(String(50), nullable=False)
+    user_id = Column(UUID, ForeignKey('users.id'), nullable=False, index=True)

Also consider adding an enum for query_type:

from enum import Enum
class QueryType(str, Enum):
    SEARCH = "search"
    FILTER = "filter"
    # add other types as needed

query_type = Column(Enum(QueryType), nullable=False)


created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
16 changes: 16 additions & 0 deletions cognee/modules/search/models/Result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from datetime import datetime, timezone
from uuid import uuid4
from sqlalchemy import Column, DateTime, Text
from cognee.infrastructure.databases.relational import Base, UUID

class Result(Base):
__tablename__ = "results"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we rename results to query_results?


id = Column(UUID, primary_key = True, default = uuid4)

value = Column(Text)
query_id = Column(UUID)
user_id = Column(UUID)

created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
Comment on lines +15 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Fix timezone handling inconsistencies and standardize datetime usage

There are several inconsistencies in datetime handling across the codebase:

  • Most SQLAlchemy models correctly use timezone-aware DateTime columns with UTC timestamps
  • However, there are problematic cases that need attention:
    • cognee/infrastructure/pipeline/models/Operation.py uses datetime.utcnow without timezone parameter
    • tools/daily_twitter_stats.py, cognee/shared/utils.py, and cognee/infrastructure/databases/graph/falkordb/adapter.py use naive datetime.now() calls
    • The commented code in falkordb adapter shows string-formatted timestamps

Suggested fixes:

  1. Update Operation.py to use timezone-aware UTC timestamps like other models
  2. Replace naive datetime.now() calls with datetime.now(timezone.utc)
  3. Use ISO format with timezone info when string formatting is needed
🔗 Analysis chain

Fix PEP 8 spacing and verify timestamp handling.

  1. Remove spaces around the = operator in keyword arguments.
-    created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
-    updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
  1. Ensure consistent timestamp handling across the codebase.
🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Verify consistent timezone handling across the codebase
# Look for potential timezone-related issues or inconsistencies

# Check for datetime usage without explicit timezone
echo "Checking for datetime usage without explicit timezone..."
rg "datetime\.now\(\)" --type py

# Check for timezone-aware datetime column definitions
echo "Checking DateTime column definitions..."
rg "Column\(DateTime" --type py -A 1

Length of output: 5063

3 changes: 3 additions & 0 deletions cognee/modules/search/operations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .log_query import log_query
from .log_result import log_result
from .get_history import get_history
31 changes: 31 additions & 0 deletions cognee/modules/search/operations/get_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from uuid import UUID
from sqlalchemy import literal, select
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Query import Query
from ..models.Result import Result

async def get_history(user_id: UUID, limit: int = 10) -> list[Result]:
db_engine = get_relational_engine()

Comment on lines +7 to +9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add error handling and input validation.

Consider these improvements for robustness:

  1. Add error handling for database connection failures
  2. Validate that the limit parameter is positive
 async def get_history(user_id: UUID, limit: int = 10) -> list[Result]:
+    if limit <= 0:
+        raise ValueError("Limit must be positive")
+
     db_engine = get_relational_engine()
+    if not db_engine:
+        raise RuntimeError("Failed to connect to database")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async def get_history(user_id: UUID, limit: int = 10) -> list[Result]:
db_engine = get_relational_engine()
async def get_history(user_id: UUID, limit: int = 10) -> list[Result]:
if limit <= 0:
raise ValueError("Limit must be positive")
db_engine = get_relational_engine()
if not db_engine:
raise RuntimeError("Failed to connect to database")

queries_query = select(
Query.id,
Query.text.label("text"),
Query.created_at,
literal("user").label("user")
) \
.filter(Query.user_id == user_id)

results_query = select(
Result.id,
Result.value.label("text"),
Result.created_at,
literal("system").label("user")
) \
.filter(Result.user_id == user_id)

history_query = queries_query.union(results_query).order_by("created_at").limit(limit)

async with db_engine.get_async_session() as session:
history = (await session.execute(history_query)).all()

return history
Comment on lines +26 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling and result processing.

The query execution should handle potential database errors and ensure proper result type conversion.

     history_query = queries_query.union(results_query).order_by("created_at").limit(limit)

     async with db_engine.get_async_session() as session:
-        history = (await session.execute(history_query)).all()
-
-        return history
+        try:
+            result = await session.execute(history_query)
+            history = result.all()
+            return [
+                Result(
+                    id=item.id,
+                    text=item.text,
+                    created_at=item.created_at,
+                    user=item.user
+                ) for item in history
+            ]
+        except Exception as e:
+            raise RuntimeError(f"Failed to fetch history: {str(e)}") from e

Committable suggestion was skipped due to low confidence.

Comment on lines +1 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Database indices needed for query performance

The codebase analysis reveals that neither the models nor migrations define indices for frequently queried columns. For optimal performance of the history query, you should add the following indices:

  • user_id and created_at columns in both queries and results tables
  • Consider a composite index on (user_id, created_at) as they are used together in filtering and sorting
🔗 Analysis chain

Verify database index coverage for performance.

The queries filter and order by frequently accessed columns. Ensure appropriate indices exist for optimal performance.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Description: Check for index definitions in model files and migrations

# Look for index definitions in the Query model
echo "Checking Query model indices..."
rg -A 5 "Index|index" "cognee/modules/search/models/Query.py"

# Look for index definitions in the Result model
echo "Checking Result model indices..."
rg -A 5 "Index|index" "cognee/modules/search/models/Result.py"

# Look for index creation in migrations
echo "Checking migrations for indices..."
fd -e py . migrations/ --exec rg -A 5 "create_index|CreateIndex"

Length of output: 517


Script:

#!/bin/bash
# Let's try a different approach to find model definitions and migrations

# First, let's locate the model files to understand their structure
echo "Locating model files..."
fd -e py Query.py Result.py

# Look for any SQLAlchemy model definitions and index configurations
echo "Checking model definitions..."
rg -A 10 "class Query|class Result" 

# Look for migration files in common locations
echo "Finding migration files..."
fd -e py . alembic migrations database/migrations db/migrations

# Check for any SQL files that might contain index definitions
echo "Checking SQL files..."
fd -e sql . --exec rg -l "CREATE INDEX|create index"

Length of output: 2454


Script:

#!/bin/bash
# Let's check the initial migration file for table creation and indices
echo "Checking initial migration content..."
cat "alembic/versions/8057ae7329c2_initial_migration.py"

# Also check the model imports to ensure we haven't missed any index definitions
echo "Checking model imports..."
rg -A 5 "from sqlalchemy" "cognee/modules/search/models/Query.py" "cognee/modules/search/models/Result.py"

Length of output: 1792

17 changes: 17 additions & 0 deletions cognee/modules/search/operations/get_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from uuid import UUID
from sqlalchemy import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Query import Query

async def get_queries(user_id: UUID, limit: int) -> list[Query]:
db_engine = get_relational_engine()

async with db_engine.get_async_session() as session:
datasets = (await session.scalars(
select(Query)
.filter(Query.user_id == user_id)
.order_by(Query.created_at.desc())
.limit(limit)
)).all()

return datasets
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Several improvements needed for robustness and clarity.

  1. The variable name datasets is misleading as it stores query records.
  2. Missing error handling for database operations.
  3. Missing validation for the limit parameter.
  4. Missing function documentation.

Here's the suggested implementation:

 async def get_queries(user_id: UUID, limit: int) -> list[Query]:
+    """Retrieve user's search query history.
+
+    Args:
+        user_id (UUID): The ID of the user whose queries to retrieve
+        limit (int): Maximum number of queries to return
+
+    Returns:
+        list[Query]: List of Query objects ordered by creation date descending
+
+    Raises:
+        ValueError: If limit is not a positive integer
+        SQLAlchemyError: If database operation fails
+    """
+    if not isinstance(limit, int) or limit <= 0:
+        raise ValueError("Limit must be a positive integer")
+
     db_engine = get_relational_engine()

     async with db_engine.get_async_session() as session:
-        datasets = (await session.scalars(
-            select(Query)
-                .filter(Query.user_id == user_id)
-                .order_by(Query.created_at.desc())
-                .limit(limit)
-        )).all()
+        try:
+            queries = (await session.scalars(
+                select(Query)
+                    .filter(Query.user_id == user_id)
+                    .order_by(Query.created_at.desc())
+                    .limit(limit)
+            )).all()
+            return queries
+        except SQLAlchemyError as e:
+            # Log the error details
+            raise SQLAlchemyError(f"Failed to retrieve queries: {str(e)}")
-
-        return datasets

Don't forget to add the import:

from sqlalchemy.exc import SQLAlchemyError

17 changes: 17 additions & 0 deletions cognee/modules/search/operations/get_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from uuid import UUID
from sqlalchemy import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Result import Result

async def get_results(user_id: UUID, limit: int = 10) -> list[Result]:
db_engine = get_relational_engine()

async with db_engine.get_async_session() as session:
datasets = (await session.scalars(
select(Result)
.filter(Result.user_id == user_id)
.order_by(Result.created_at.desc())
.limit(limit)
)).all()

return datasets
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling for database operations.

The function should handle potential database errors gracefully to prevent crashes and provide meaningful error messages.

Consider wrapping the database operations in a try-except block:

 async def get_results(user_id: UUID, limit: int = 10) -> list[Result]:
+    """Retrieve search results for a specific user.
+
+    Args:
+        user_id (UUID): The ID of the user whose results to retrieve
+        limit (int, optional): Maximum number of results to return. Defaults to 10.
+
+    Returns:
+        list[Result]: List of search results ordered by creation date
+
+    Raises:
+        DatabaseError: If there's an error accessing the database
+    """
+    if limit < 1:
+        raise ValueError("Limit must be a positive integer")
+
     db_engine = get_relational_engine()
 
     async with db_engine.get_async_session() as session:
-        datasets = (await session.scalars(
-            select(Result)
-                .filter(Result.user_id == user_id)
-                .order_by(Result.created_at.desc())
-                .limit(limit)
-        )).all()
+        try:
+            datasets = (await session.scalars(
+                select(Result)
+                    .filter(Result.user_id == user_id)
+                    .order_by(Result.created_at.desc())
+                    .limit(limit)
+            )).all()
+            return datasets
+        except Exception as e:
+            # Log the error details
+            raise DatabaseError(f"Failed to retrieve search results: {str(e)}") from e
-        return datasets

Committable suggestion was skipped due to low confidence.

19 changes: 19 additions & 0 deletions cognee/modules/search/operations/log_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Query import Query

async def log_query(query_text: str, query_type: str, user_id: UUID) -> Query:
db_engine = get_relational_engine()

async with db_engine.get_async_session() as session:
query = Query(
text = query_text,
query_type = query_type,
user_id = user_id,
)

session.add(query)

await session.commit()

return query
Comment on lines +5 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling and input validation.

The function needs proper error handling for database operations and input validation.

Here's a suggested implementation with error handling and validation:

 from uuid import UUID
 from cognee.infrastructure.databases.relational import get_relational_engine
 from ..models.Query import Query
+from sqlalchemy.exc import SQLAlchemyError
+from typing import Optional
 
 async def log_query(query_text: str, query_type: str, user_id: UUID) -> Query:
+    """Log a search query to the database.
+
+    Args:
+        query_text: The search query text
+        query_type: The type of search query
+        user_id: The ID of the user performing the search
+
+    Returns:
+        Query: The created Query object
+
+    Raises:
+        ValueError: If query_text is empty or query_type is invalid
+        SQLAlchemyError: If database operation fails
+    """
+    if not query_text.strip():
+        raise ValueError("Query text cannot be empty")
+    
+    if not query_type.strip():
+        raise ValueError("Query type cannot be empty")
+
     db_engine = get_relational_engine()
 
-    async with db_engine.get_async_session() as session:
-        query = Query(
-            text = query_text,
-            query_type = query_type,
-            user_id = user_id,
-        )
+    try:
+        async with db_engine.get_async_session() as session:
+            query = Query(
+                text=query_text.strip(),
+                query_type=query_type.strip(),
+                user_id=user_id,
+            )
 
-        session.add(query)
+            session.add(query)
+            await session.commit()
+            await session.refresh(query)
+            return query
 
-        await session.commit()
-
-        return query
+    except SQLAlchemyError as e:
+        # Log the error here if you have a logging system
+        raise SQLAlchemyError(f"Failed to log query: {str(e)}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async def log_query(query_text: str, query_type: str, user_id: UUID) -> Query:
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
query = Query(
text = query_text,
query_type = query_type,
user_id = user_id,
)
session.add(query)
await session.commit()
return query
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Query import Query
from sqlalchemy.exc import SQLAlchemyError
from typing import Optional
async def log_query(query_text: str, query_type: str, user_id: UUID) -> Query:
"""Log a search query to the database.
Args:
query_text: The search query text
query_type: The type of search query
user_id: The ID of the user performing the search
Returns:
Query: The created Query object
Raises:
ValueError: If query_text is empty or query_type is invalid
SQLAlchemyError: If database operation fails
"""
if not query_text.strip():
raise ValueError("Query text cannot be empty")
if not query_type.strip():
raise ValueError("Query type cannot be empty")
db_engine = get_relational_engine()
try:
async with db_engine.get_async_session() as session:
query = Query(
text=query_text.strip(),
query_type=query_type.strip(),
user_id=user_id,
)
session.add(query)
await session.commit()
await session.refresh(query)
return query
except SQLAlchemyError as e:
# Log the error here if you have a logging system
raise SQLAlchemyError(f"Failed to log query: {str(e)}")

15 changes: 15 additions & 0 deletions cognee/modules/search/operations/log_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Result import Result

async def log_result(query_id: UUID, result: str, user_id: UUID):
db_engine = get_relational_engine()

async with db_engine.get_async_session() as session:
session.add(Result(
value = result,
query_id = query_id,
user_id = user_id,
))

await session.commit()
Comment on lines +5 to +15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling and input validation.

While the basic implementation works, several important aspects need to be addressed:

  1. Add error handling for database operations
  2. Validate the result string (e.g., check for empty strings, max length)
  3. Add proper transaction handling with rollback support
  4. Include logging for operation status and errors

Here's a suggested implementation:

 from uuid import UUID
+from typing import Optional
+import logging
 from cognee.infrastructure.databases.relational import get_relational_engine
 from ..models.Result import Result
+from sqlalchemy.exc import SQLAlchemyError

+logger = logging.getLogger(__name__)
+
+async def log_result(query_id: UUID, result: str, user_id: UUID) -> Optional[UUID]:
+    if not result or not result.strip():
+        raise ValueError("Result cannot be empty")
+    
+    if len(result) > 10000:  # adjust max length as needed
+        raise ValueError("Result exceeds maximum length")
 
-async def log_result(query_id: UUID, result: str, user_id: UUID):
     db_engine = get_relational_engine()
 
     async with db_engine.get_async_session() as session:
-        session.add(Result(
-            value = result,
-            query_id = query_id,
-            user_id = user_id,
-        ))
+        async with session.begin():
+            try:
+                new_result = Result(
+                    value=result.strip(),
+                    query_id=query_id,
+                    user_id=user_id,
+                )
+                session.add(new_result)
+                await session.commit()
+                logger.info(f"Successfully logged result for query {query_id}")
+                return new_result.id
+            except SQLAlchemyError as e:
+                await session.rollback()
+                logger.error(f"Failed to log result for query {query_id}: {str(e)}")
+                raise

The changes include:

  • Input validation for the result string
  • Proper error handling with rollback support
  • Logging for operation tracking
  • Return value for operation confirmation
  • Transaction management using session.begin()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async def log_result(query_id: UUID, result: str, user_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
session.add(Result(
value = result,
query_id = query_id,
user_id = user_id,
))
await session.commit()
from uuid import UUID
from typing import Optional
import logging
from cognee.infrastructure.databases.relational import get_relational_engine
from ..models.Result import Result
from sqlalchemy.exc import SQLAlchemyError
logger = logging.getLogger(__name__)
async def log_result(query_id: UUID, result: str, user_id: UUID) -> Optional[UUID]:
if not result or not result.strip():
raise ValueError("Result cannot be empty")
if len(result) > 10000: # adjust max length as needed
raise ValueError("Result exceeds maximum length")
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
async with session.begin():
try:
new_result = Result(
value=result.strip(),
query_id=query_id,
user_id=user_id,
)
session.add(new_result)
await session.commit()
logger.info(f"Successfully logged result for query {query_id}")
return new_result.id
except SQLAlchemyError as e:
await session.rollback()
logger.error(f"Failed to log result for query {query_id}: {str(e)}")
raise

9 changes: 6 additions & 3 deletions cognee/tests/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,27 @@ async def main():
random_node = (await vector_engine.search("entities", "AI"))[0]
random_node_name = random_node.payload["name"]

search_results = await cognee.search(SearchType.INSIGHTS, query = random_node_name)
search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")

search_results = await cognee.search(SearchType.CHUNKS, query = random_node_name)
search_results = await cognee.search(SearchType.CHUNKS, query_text = random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")

search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
search_results = await cognee.search(SearchType.SUMMARIES, query_text = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\Extracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix typo and enhance CHUNKS and SUMMARIES search tests.

  1. There's a typo in the SUMMARIES print statement ("\n\Extracted")
  2. Both tests would benefit from more specific assertions, similar to the INSIGHTS test

Apply similar improvements to both tests:

     search_results = await cognee.search(SearchType.CHUNKS, query_text = random_node_name)
-    assert len(search_results) != 0, "The search results list is empty."
-    print("\n\nExtracted chunks are:\n")
-    for result in search_results:
-        print(f"{result}\n")
+    assert len(search_results) > 0, "The search results list is empty."
+    for result in search_results:
+        assert isinstance(result, str), "Chunk should be a string"
+        assert len(result.strip()) > 0, "Chunk should not be empty"

     search_results = await cognee.search(SearchType.SUMMARIES, query_text = random_node_name)
     assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\n\nExtracted summaries are:\n")

Committable suggestion was skipped due to low confidence.

history = await cognee.get_search_history()

assert len(history) == 6, "Search history is not correct."
Comment on lines +56 to +58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Improve search history validation.

The current test has several limitations:

  1. Uses a magic number (6) without explaining why this is expected
  2. Doesn't validate the content of history entries
  3. Could have a more descriptive error message

Consider this improved implementation:

     history = await cognee.get_search_history()
-
-    assert len(history) == 6, "Search history is not correct."
+    # We expect 3 searches (INSIGHTS, CHUNKS, SUMMARIES) × 2 calls each
+    expected_history_length = 6
+    assert len(history) == expected_history_length, f"Expected {expected_history_length} history entries (3 types × 2 calls), got {len(history)}"
+    
+    # Validate history entries
+    search_types = {SearchType.INSIGHTS, SearchType.CHUNKS, SearchType.SUMMARIES}
+    for entry in history:
+        assert hasattr(entry, 'query_text'), "History entry missing query_text"
+        assert entry.query_text == random_node_name, "Unexpected query text in history"
+        assert entry.search_type in search_types, f"Invalid search type: {entry.search_type}"

Committable suggestion was skipped due to low confidence.


if __name__ == "__main__":
import asyncio
Expand Down
9 changes: 6 additions & 3 deletions cognee/tests/test_neo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,27 @@ async def main():
random_node = (await vector_engine.search("entities", "AI"))[0]
random_node_name = random_node.payload["name"]

search_results = await cognee.search(SearchType.INSIGHTS, query = random_node_name)
search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")

search_results = await cognee.search(SearchType.CHUNKS, query = random_node_name)
search_results = await cognee.search(SearchType.CHUNKS, query_text = random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")

search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
search_results = await cognee.search(SearchType.SUMMARIES, query_text = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\Extracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

history = await cognee.get_search_history()

assert len(history) == 6, "Search history is not correct."

if __name__ == "__main__":
import asyncio
Expand Down
9 changes: 6 additions & 3 deletions cognee/tests/test_pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,24 +68,27 @@ async def main():
random_node = (await vector_engine.search("entities", "AI"))[0]
random_node_name = random_node.payload["name"]

search_results = await cognee.search(SearchType.INSIGHTS, query=random_node_name)
search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")

search_results = await cognee.search(SearchType.CHUNKS, query=random_node_name)
search_results = await cognee.search(SearchType.CHUNKS, query_text = random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")

search_results = await cognee.search(SearchType.SUMMARIES, query=random_node_name)
search_results = await cognee.search(SearchType.SUMMARIES, query_text = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

history = await cognee.get_search_history()

assert len(history) == 6, "Search history is not correct."
Comment on lines +89 to +91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve the search history verification test.

The current test has several potential issues:

  1. The magic number 6 is not explained (3 searches × 2?)
  2. The assertion message could be more descriptive
  3. The exact count check might be brittle

Consider making the test more robust:

     history = await cognee.get_search_history()
 
-    assert len(history) == 6, "Search history is not correct."
+    # We expect history entries for INSIGHTS, CHUNKS, and SUMMARIES searches
+    expected_searches = 3
+    assert len(history) >= expected_searches, f"Search history should contain at least {expected_searches} entries (one per search type), but found {len(history)}"
+    
+    # Verify that all search types are present in the history
+    search_types = {entry['type'] for entry in history}
+    assert search_types.issuperset({SearchType.INSIGHTS, SearchType.CHUNKS, SearchType.SUMMARIES}), "Not all search types were recorded in history"

Committable suggestion was skipped due to low confidence.


if __name__ == "__main__":
import asyncio
Expand Down
Loading