Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: Resolve security concerns regarding os calls
Resolved security concerns in endpoints regarding os

Fix #COG-334-structure-routing
  • Loading branch information
dexters1 committed Nov 5, 2024
commit 801efeb1cb481b34c3bc83f05050f18c2fef84a5
12 changes: 8 additions & 4 deletions cognee/api/v1/add/routers/get_add_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
from fastapi import APIRouter
from typing import List
import aiohttp
import subprocess
import logging
import os

from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user

def get_add_router():
logger = logging.getLogger(__name__)

def get_add_router() -> APIRouter:
router = APIRouter()

@router.post("/", response_model=None)
Expand All @@ -24,7 +27,7 @@ async def add(
if "github" in data:
Comment on lines +26 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Type Checking Error: Inconsistent Handling of 'data'

The condition if isinstance(data, str) and data.startswith("http"): checks if data is a string, but data might be a list of UploadFile. This could cause unexpected behavior.

Ensure that data is properly handled for both string URLs and lists of UploadFile. After updating the type annotation, adjust the logic to handle each case appropriately.

# Perform git clone if the URL is from GitHub
repo_name = data.split("/")[-1].replace(".git", "")
os.system(f"git clone {data} .data/{repo_name}")
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
await cognee_add(
"data://.data/",
f"{repo_name}",
Expand All @@ -35,7 +38,8 @@ async def add(
async with session.get(data) as resp:
if resp.status == 200:
file_data = await resp.read()
with open(f".data/{data.split('/')[-1]}", "wb") as f:
filename = os.path.basename(data)
with open(f".data/{filename}", "wb") as f:
f.write(file_data)
await cognee_add(
"data://.data/",
Expand Down
8 changes: 4 additions & 4 deletions cognee/api/v1/cognify/routers/get_cognify_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from cognee.modules.users.methods import get_authenticated_user
from fastapi import Depends

def get_cognify_router():
router = APIRouter()
class CognifyPayloadDTO(BaseModel):
datasets: List[str]

class CognifyPayloadDTO(BaseModel):
datasets: List[str]
def get_cognify_router() -> APIRouter:
router = APIRouter()

@router.post("/", response_model=None)
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
Expand Down
52 changes: 28 additions & 24 deletions cognee/api/v1/datasets/routers/get_datasets_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,30 @@
from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.pipelines.models import PipelineRunStatus

def get_datasets_router():
logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)

class ErrorResponseDTO(BaseModel):
message: str

class DatasetDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
owner_id: UUID

class DataDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
extension: str
mime_type: str
raw_data_location: str

def get_datasets_router() -> APIRouter:
router = APIRouter()

class ErrorResponseDTO(BaseModel):
message: str

class DatasetDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
owner_id: UUID

@router.get("/", response_model=list[DatasetDTO])
async def get_datasets(user: User = Depends(get_authenticated_user)):
try:
Expand Down Expand Up @@ -96,15 +106,6 @@ async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authentica
content="Graphistry credentials are not set. Please set them in your .env file.",
)
Comment on lines +103 to +107
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Replace bare except with specific exception handling.

Using bare except clauses can mask important errors and make debugging difficult.

             return JSONResponse(
                 status_code=200,
                 content=str(graph_url),
             )
-        except:
+        except (ConnectionError, ValueError) as error:
+            logger.error(f"Error generating graph: {str(error)}")
             return JSONResponse(
                 status_code=409,
                 content="Graphistry credentials are not set. Please set them in your .env file.",
             )

Committable suggestion was skipped due to low confidence.

🧰 Tools
🪛 Ruff

93-93: Do not use bare except

(E722)

Comment on lines +95 to +107
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Improve error handling in graph generation.

The current implementation has two issues:

  1. Uses a bare except clause which could mask important errors
  2. Inconsistent error handling pattern (using JSONResponse instead of HTTPException)

Apply this diff to improve error handling:

         try:
             graph_client = await get_graph_engine()
             graph_url = await render_graph(graph_client.graph)
 
-            return JSONResponse(
-                status_code=200,
-                content=str(graph_url),
-            )
+            return str(graph_url)
-        except:
+        except (ConnectionError, ValueError) as error:
+            logger.error(f"Error generating graph: {str(error)}")
+            raise HTTPException(
+                status_code=409,
+                detail="Graphistry credentials are not set. Please set them in your .env file."
+            ) from error
-            return JSONResponse(
-                status_code=409,
-                content="Graphistry credentials are not set. Please set them in your .env file.",
-            )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
graph_client = await get_graph_engine()
graph_url = await render_graph(graph_client.graph)
return JSONResponse(
status_code=200,
content=str(graph_url),
)
except:
return JSONResponse(
status_code=409,
content="Graphistry credentials are not set. Please set them in your .env file.",
)
try:
graph_client = await get_graph_engine()
graph_url = await render_graph(graph_client.graph)
return str(graph_url)
except (ConnectionError, ValueError) as error:
logger.error(f"Error generating graph: {str(error)}")
raise HTTPException(
status_code=409,
detail="Graphistry credentials are not set. Please set them in your .env file."
) from error
🧰 Tools
🪛 Ruff

103-103: Do not use bare except

(E722)


class DataDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
extension: str
mime_type: str
raw_data_location: str

@router.get("/{dataset_id}/data", response_model=list[DataDTO],
responses={404: {"model": ErrorResponseDTO}})
async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
Expand Down Expand Up @@ -157,18 +158,21 @@ async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_a
dataset_data = await get_dataset_data(dataset.id)

if dataset_data is None:
raise HTTPException(status_code=404, detail=f"Dataset ({dataset_id}) not found.")
raise HTTPException(status_code=404, detail=f"No data found in dataset ({dataset_id}).")

data = [data for data in dataset_data if str(data.id) == data_id][0]
matching_data = [data for data in dataset_data if str(data.id) == data_id]

if data is None:
# Check if matching_data contains an element
if len(matching_data) == 0:
return JSONResponse(
status_code=404,
content={
"detail": f"Data ({data_id}) not found in dataset ({dataset_id})."
}
)

data = matching_data[0]

return data.raw_data_location

return router
11 changes: 6 additions & 5 deletions cognee/api/v1/search/routers/get_search_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from cognee.api.DTO import InDTO
from cognee.modules.users.methods import get_authenticated_user

def get_search_router():
router = APIRouter()

class SearchPayloadDTO(InDTO):
search_type: SearchType
query: str
class SearchPayloadDTO(InDTO):
search_type: SearchType
query: str

def get_search_router() -> APIRouter:
router = APIRouter()

@router.post("/", response_model = list)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Specify the response model type.

The response_model is set to list without type information. Consider creating a response DTO to properly document the structure of the returned data.

from pydantic import BaseModel

class SearchResultDTO(BaseModel):
    # Add appropriate fields based on the actual search results
    id: str
    title: str
    # ... other fields

@router.post("/", response_model=list[SearchResultDTO])

async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
Expand Down
47 changes: 23 additions & 24 deletions cognee/api/v1/settings/routers/get_settings_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,40 @@
from cognee.modules.users.methods import get_authenticated_user
from fastapi import Depends
from cognee.modules.users.models import User
from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig

def get_settings_router():
router = APIRouter()
class LLMConfigOutputDTO(OutDTO, LLMConfig):
pass

class VectorDBConfigOutputDTO(OutDTO, VectorDBConfig):
pass

from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
class SettingsDTO(OutDTO):
llm: LLMConfigOutputDTO
vector_db: VectorDBConfigOutputDTO

class LLMConfigDTO(OutDTO, LLMConfig):
pass
class LLMConfigInputDTO(InDTO):
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
model: str
api_key: str

class VectorDBConfigDTO(OutDTO, VectorDBConfig):
pass
class VectorDBConfigInputDTO(InDTO):
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
url: str
api_key: str

class SettingsDTO(OutDTO):
llm: LLMConfigDTO
vector_db: VectorDBConfigDTO
class SettingsPayloadDTO(InDTO):
llm: Optional[LLMConfigInputDTO] = None
vector_db: Optional[VectorDBConfigInputDTO] = None
Comment on lines +19 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add field validation for api_key and url.

Consider adding Pydantic field validators to ensure:

  • api_key meets minimum length/format requirements
  • url is properly formatted

Example implementation:

from pydantic import validator, HttpUrl

class LLMConfigInputDTO(InDTO):
    provider: Literal["openai", "ollama", "anthropic"]
    model: str
    api_key: str

    @validator('api_key')
    def validate_api_key(cls, v):
        if len(v) < 8:
            raise ValueError('API key must be at least 8 characters')
        return v

class VectorDBConfigInputDTO(InDTO):
    provider: Literal["lancedb", "qdrant", "weaviate", "pgvector"]
    url: HttpUrl  # This will automatically validate URL format
    api_key: str

    @validator('api_key')
    def validate_api_key(cls, v):
        if len(v) < 8:
            raise ValueError('API key must be at least 8 characters')
        return v


def get_settings_router() -> APIRouter:
router = APIRouter()

@router.get("/", response_model=SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import get_settings as get_cognee_settings
return get_cognee_settings()

Comment on lines +36 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling to GET endpoint.

The endpoint should handle potential errors from get_cognee_settings().

Apply this diff:

 @router.get("/", response_model=SettingsDTO)
 async def get_settings(user: User = Depends(get_authenticated_user)):
+    from fastapi import HTTPException
     from cognee.modules.settings import get_settings as get_cognee_settings
-    return get_cognee_settings()
+    try:
+        return get_cognee_settings()
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to retrieve settings: {str(e)}")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
@router.get("/", response_model=SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import get_settings as get_cognee_settings
return get_cognee_settings()
@router.get("/", response_model=SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)):
from fastapi import HTTPException
from cognee.modules.settings import get_settings as get_cognee_settings
try:
return get_cognee_settings()
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to retrieve settings: {str(e)}")

class LLMConfigDTO(InDTO):
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
model: str
api_key: str

class VectorDBConfigDTO(InDTO):
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
url: str
api_key: str

class SettingsPayloadDTO(InDTO):
llm: Optional[LLMConfigDTO] = None
vector_db: Optional[VectorDBConfigDTO] = None

@router.post("/", response_model=None)
async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import save_llm_config, save_vector_db_config
Expand Down