Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
302 changes: 33 additions & 269 deletions cognee/api/client.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
""" FastAPI server for the Cognee API. """
from datetime import datetime
import os
from uuid import UUID
import aiohttp
import uvicorn
import logging
import sentry_sdk
from typing import List, Union, Optional, Literal
from typing_extensions import Annotated
from fastapi import FastAPI, HTTPException, Form, UploadFile, Query, Depends
from fastapi.responses import JSONResponse, FileResponse, Response
from fastapi import FastAPI
from fastapi.responses import JSONResponse, Response
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from cognee.api.DTO import InDTO, OutDTO
from cognee.api.v1.search import SearchType
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.pipelines.models import PipelineRunStatus


# Set up logging
logging.basicConfig(
Expand Down Expand Up @@ -65,9 +52,12 @@ async def lifespan(app: FastAPI):

from cognee.api.v1.users.routers import get_auth_router, get_register_router,\
get_reset_password_router, get_verify_router, get_users_router

from cognee.api.v1.permissions.get_permissions_router import get_permissions_router

from cognee.api.v1.permissions.routers import get_permissions_router
from cognee.api.v1.settings.routers import get_settings_router
from cognee.api.v1.datasets.routers import get_datasets_router
from cognee.api.v1.cognify.routers import get_cognify_router
from cognee.api.v1.search.routers import get_search_router
from cognee.api.v1.add.routers import get_add_router
Comment on lines +55 to +60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Move module-level imports to the top of the file

According to PEP 8 guidelines, all module-level imports should be placed at the top of the file, after any module comments and docstrings, and before module-level variables and constants. This enhances code readability and maintainability.

Apply this diff to fix the issue:

+from fastapi import FastAPI
+from fastapi.responses import JSONResponse, Response
+from fastapi.middleware.cors import CORSMiddleware
+from cognee.api.v1.users.routers import get_auth_router, get_register_router,\
+    get_reset_password_router, get_verify_router, get_users_router
+from cognee.api.v1.permissions.routers import get_permissions_router
+from cognee.api.v1.settings.routers import get_settings_router
+from cognee.api.v1.datasets.routers import get_datasets_router
+from cognee.api.v1.cognify.routers import get_cognify_router
+from cognee.api.v1.search.routers import get_search_router
+from cognee.api.v1.add.routers import get_add_router

 import os
 import uvicorn
 import logging
 import sentry_sdk

 # Set up logging
 logging.basicConfig(
     level=logging.INFO,  # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
     format="%(asctime)s [%(levelname)s] %(message)s",  # Set the log message format
 )

Committable suggestion was skipped due to low confidence.

🧰 Tools
🪛 Ruff

55-55: Module level import not at top of file

(E402)


56-56: Module level import not at top of file

(E402)


57-57: Module level import not at top of file

(E402)


58-58: Module level import not at top of file

(E402)


59-59: Module level import not at top of file

(E402)


60-60: Module level import not at top of file

(E402)


from fastapi import Request
from fastapi.encoders import jsonable_encoder
Expand Down Expand Up @@ -137,261 +127,35 @@ def health_check():
"""
return Response(status_code = 200)

app.include_router(
get_datasets_router(),
prefix="/api/v1/datasets",
tags=["datasets"]
)

class ErrorResponseDTO(BaseModel):
message: str


class DatasetDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
owner_id: UUID

@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
async def get_datasets(user: User = Depends(get_authenticated_user)):
try:
from cognee.modules.data.methods import get_datasets
datasets = await get_datasets(user.id)

return datasets
except Exception as error:
logger.error(f"Error retrieving datasets: {str(error)}")
raise HTTPException(status_code = 500, detail = f"Error retrieving datasets: {str(error)}") from error


@app.delete("/api/v1/datasets/{dataset_id}", response_model = None, responses = { 404: { "model": ErrorResponseDTO }})
async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.data.methods import get_dataset, delete_dataset

dataset = await get_dataset(user.id, dataset_id)

if dataset is None:
raise HTTPException(
status_code = 404,
detail = f"Dataset ({dataset_id}) not found."
)

await delete_dataset(dataset)


@app.get("/api/v1/datasets/{dataset_id}/graph", response_model = str)
async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
from cognee.shared.utils import render_graph
from cognee.infrastructure.databases.graph import get_graph_engine

try:
graph_client = await get_graph_engine()
graph_url = await render_graph(graph_client.graph)

return JSONResponse(
status_code = 200,
content = str(graph_url),
)
except:
return JSONResponse(
status_code = 409,
content = "Graphistry credentials are not set. Please set them in your .env file.",
)


class DataDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
extension: str
mime_type: str
raw_data_location: str

@app.get("/api/v1/datasets/{dataset_id}/data", response_model = list[DataDTO], responses = { 404: { "model": ErrorResponseDTO }})
async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.data.methods import get_dataset_data, get_dataset

dataset = await get_dataset(user.id, dataset_id)

if dataset is None:
return JSONResponse(
status_code = 404,
content = ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
)

dataset_data = await get_dataset_data(dataset_id = dataset.id)

if dataset_data is None:
return []

return dataset_data


@app.get("/api/v1/datasets/status", response_model = dict[str, PipelineRunStatus])
async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user)):
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets

try:
datasets_statuses = await cognee_datasets.get_status(datasets)

return datasets_statuses
except Exception as error:
return JSONResponse(
status_code = 409,
content = {"error": str(error)}
)


@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class = FileResponse)
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.data.methods import get_dataset, get_dataset_data

dataset = await get_dataset(user.id, dataset_id)

if dataset is None:
return JSONResponse(
status_code = 404,
content = {
"detail": f"Dataset ({dataset_id}) not found."
}
)

dataset_data = await get_dataset_data(dataset.id)

if dataset_data is None:
raise HTTPException(status_code = 404, detail = f"Dataset ({dataset_id}) not found.")

data = [data for data in dataset_data if str(data.id) == data_id][0]

if data is None:
return JSONResponse(
status_code = 404,
content = {
"detail": f"Data ({data_id}) not found in dataset ({dataset_id})."
}
)

return data.raw_data_location


@app.post("/api/v1/add", response_model = None)
async def add(
data: List[UploadFile],
datasetId: str = Form(...),
user: User = Depends(get_authenticated_user),
):
""" This endpoint is responsible for adding data to the graph."""
from cognee.api.v1.add import add as cognee_add
try:
if isinstance(data, str) and data.startswith("http"):
if "github" in data:
# Perform git clone if the URL is from GitHub
repo_name = data.split("/")[-1].replace(".git", "")
os.system(f"git clone {data} .data/{repo_name}")
await cognee_add(
"data://.data/",
f"{repo_name}",
)
else:
# Fetch and store the data from other types of URL using curl
async with aiohttp.ClientSession() as session:
async with session.get(data) as resp:
if resp.status == 200:
file_data = await resp.read()
with open(f".data/{data.split('/')[-1]}", "wb") as f:
f.write(file_data)
await cognee_add(
"data://.data/",
f"{data.split('/')[-1]}",
)
else:
await cognee_add(
data,
datasetId,
user = user,
)
except Exception as error:
return JSONResponse(
status_code = 409,
content = {"error": str(error)}
)


class CognifyPayloadDTO(BaseModel):
datasets: List[str]

@app.post("/api/v1/cognify", response_model = None)
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
""" This endpoint is responsible for the cognitive processing of the content."""
from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
try:
await cognee_cognify(payload.datasets, user)
except Exception as error:
return JSONResponse(
status_code = 409,
content = {"error": str(error)}
)


class SearchPayloadDTO(InDTO):
search_type: SearchType
query: str

@app.post("/api/v1/search", response_model = list)
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
""" This endpoint is responsible for searching for nodes in the graph."""
from cognee.api.v1.search import search as cognee_search

try:
results = await cognee_search(payload.search_type, payload.query, user)

return results
except Exception as error:
return JSONResponse(
status_code = 409,
content = {"error": str(error)}
)

from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig

class LLMConfigDTO(OutDTO, LLMConfig):
pass

class VectorDBConfigDTO(OutDTO, VectorDBConfig):
pass

class SettingsDTO(OutDTO):
llm: LLMConfigDTO
vector_db: VectorDBConfigDTO

@app.get("/api/v1/settings", response_model = SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import get_settings as get_cognee_settings
return get_cognee_settings()


class LLMConfigDTO(InDTO):
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
model: str
api_key: str

class VectorDBConfigDTO(InDTO):
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
url: str
api_key: str

class SettingsPayloadDTO(InDTO):
llm: Optional[LLMConfigDTO] = None
vector_db: Optional[VectorDBConfigDTO] = None

@app.post("/api/v1/settings", response_model = None)
async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import save_llm_config, save_vector_db_config
app.include_router(
get_add_router(),
prefix="/api/v1/add",
tags=["add"]
)

if new_settings.llm is not None:
await save_llm_config(new_settings.llm)
app.include_router(
get_cognify_router(),
prefix="/api/v1/cognify",
tags=["cognify"]
)

if new_settings.vector_db is not None:
await save_vector_db_config(new_settings.vector_db)
app.include_router(
get_search_router(),
prefix="/api/v1/search",
tags=["search"]
)

app.include_router(
get_settings_router(),
prefix="/api/v1/settings",
tags=["settings"]
)

def start_api_server(host: str = "0.0.0.0", port: int = 8000):
"""
Expand Down
1 change: 1 addition & 0 deletions cognee/api/v1/add/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .get_add_router import get_add_router
56 changes: 56 additions & 0 deletions cognee/api/v1/add/routers/get_add_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from fastapi import Form, UploadFile, Depends
from fastapi.responses import JSONResponse
from fastapi import APIRouter
from typing import List
import aiohttp
import os

from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user

def get_add_router():
router = APIRouter()

@router.post("/", response_model=None)
async def add(
data: List[UploadFile],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Incorrect Type Annotation for 'data' Parameter

The parameter data is annotated as List[UploadFile], but later in the code, data is treated as a string (e.g., line 23). If data can also be a string (e.g., a URL), the type annotation should reflect that.

Update the type annotation to accept both List[UploadFile] and str:

- from typing import List
+ from typing import List, Union

...

-     data: List[UploadFile],
+     data: Union[List[UploadFile], str],

Committable suggestion was skipped due to low confidence.

datasetId: str = Form(...),
user: User = Depends(get_authenticated_user),
):
""" This endpoint is responsible for adding data to the graph."""
from cognee.api.v1.add import add as cognee_add
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Import Statement Inside Function Scope

Importing modules inside functions is generally discouraged unless necessary, as it can lead to repeated imports and affect performance.

Move the import statement to the module level:

-     from cognee.api.v1.add import add as cognee_add
+ from cognee.api.v1.add import add as cognee_add

...

      def get_add_router():

Committable suggestion was skipped due to low confidence.

try:
if isinstance(data, str) and data.startswith("http"):
if "github" in data:
Comment on lines +26 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Type Checking Error: Inconsistent Handling of 'data'

The condition if isinstance(data, str) and data.startswith("http"): checks if data is a string, but data might be a list of UploadFile. This could cause unexpected behavior.

Ensure that data is properly handled for both string URLs and lists of UploadFile. After updating the type annotation, adjust the logic to handle each case appropriately.

# Perform git clone if the URL is from GitHub
repo_name = data.split("/")[-1].replace(".git", "")
os.system(f"git clone {data} .data/{repo_name}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Security Concern: Potential Command Injection via os.system

Using os.system with user-supplied input can lead to command injection vulnerabilities. The variable data is used directly in the command without sanitization.

Use the subprocess module with a list of arguments to prevent shell injection:

- import os
+ import subprocess

...

-                     os.system(f"git clone {data} .data/{repo_name}")
+                     subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
os.system(f"git clone {data} .data/{repo_name}")
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)

await cognee_add(
"data://.data/",
f"{repo_name}",
)
else:
# Fetch and store the data from other types of URL using curl
async with aiohttp.ClientSession() as session:
async with session.get(data) as resp:
if resp.status == 200:
file_data = await resp.read()
with open(f".data/{data.split('/')[-1]}", "wb") as f:
f.write(file_data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Security Issue: Potential Path Traversal when Saving Files

Using data.split('/')[-1] directly as a filename can expose the application to path traversal attacks if data contains malicious input.

Sanitize the filename to prevent path traversal. Use os.path.basename() to extract the filename safely:

-                                 with open(f".data/{data.split('/')[-1]}", "wb") as f:
+                                 filename = os.path.basename(data)
+                                 with open(f".data/{filename}", "wb") as f:

Committable suggestion was skipped due to low confidence.

await cognee_add(
"data://.data/",
f"{data.split('/')[-1]}",
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Concurrency Issue: Asynchronous File Operations Without Proper Locking

The code performs file write operations within an asynchronous function but lacks synchronization mechanisms. This could lead to race conditions in a concurrent environment.

Implement proper synchronization when performing file I/O operations in an asynchronous context. Consider using asynchronous file operations provided by aiofiles:

+ import aiofiles

...

-                                     with open(f".data/{filename}", "wb") as f:
-                                         f.write(file_data)
+                                     async with aiofiles.open(f".data/{filename}", "wb") as f:
+                                         await f.write(file_data)

Committable suggestion was skipped due to low confidence.

else:
await cognee_add(
data,
datasetId,
user=user,
)
except Exception as error:
return JSONResponse(
status_code=409,
content={"error": str(error)}
)

return router
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Function Complexity: Consider Refactoring for Better Maintainability

The add endpoint function handles multiple responsibilities, including URL validation, GitHub cloning, HTTP fetching, and data addition. This makes the function complex and harder to maintain.

Refactor the code by extracting logically distinct operations into separate functions or methods. This improves readability and makes the codebase easier to test and maintain.

1 change: 1 addition & 0 deletions cognee/api/v1/cognify/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .get_cognify_router import get_cognify_router
Loading