Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_chromadb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
installer-parallel: true

- name: Install dependencies
run: poetry install --no-interaction
run: poetry install --extras chromadb --no-interaction

- name: Run chromadb test
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_cognee_server_start.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:

- name: Install dependencies
run: |
poetry install --no-interaction
poetry install --extras api --no-interaction

- name: Run cognee server
env:
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ FROM python:3.11-slim

# Define Poetry extras to install
ARG POETRY_EXTRAS="\
# API \
api \
# Storage & Databases \
filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu \
# Notebooks & Interactive Environments \
Expand Down
6 changes: 3 additions & 3 deletions cognee/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def health_check():

app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])

app.include_router(
get_code_pipeline_router(), prefix="/api/v1/code-pipeline", tags=["code-pipeline"]
)
codegraph_routes = get_code_pipeline_router()
if codegraph_routes:
app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])


def start_api_server(host: str = "0.0.0.0", port: int = 8000):
Expand Down
21 changes: 8 additions & 13 deletions cognee/api/v1/add/routers/get_add_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from fastapi.responses import JSONResponse
from fastapi import APIRouter
from typing import List
import aiohttp
import subprocess
import logging
import os

import requests
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user

Expand Down Expand Up @@ -36,17 +36,12 @@ async def add(
)
else:
# Fetch and store the data from other types of URL using curl
async with aiohttp.ClientSession() as session:
async with session.get(data) as resp:
if resp.status == 200:
file_data = await resp.read()
filename = os.path.basename(data)
with open(f".data/{filename}", "wb") as f:
f.write(file_data)
await cognee_add(
"data://.data/",
f"{data.split('/')[-1]}",
)
response = requests.get(data)
response.raise_for_status()

file_data = await response.content()

return await cognee_add(file_data)
else:
await cognee_add(
data,
Expand Down
1 change: 1 addition & 0 deletions cognee/api/v1/cognify/code_graph_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from cognee.infrastructure.llm import get_max_chunk_tokens

monitoring = get_base_config().monitoring_tool

if monitoring == MonitoringTool.LANGFUSE:
from langfuse.decorators import observe

Expand Down
9 changes: 8 additions & 1 deletion cognee/api/v1/cognify/routers/get_code_pipeline_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from fastapi import APIRouter
from fastapi.responses import JSONResponse
from cognee.api.DTO import InDTO
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
from cognee.modules.retrieval.code_retriever import CodeRetriever
from cognee.modules.storage.utils import JSONEncoder

Expand All @@ -22,11 +21,19 @@ class CodePipelineRetrievePayloadDTO(InDTO):


def get_code_pipeline_router() -> APIRouter:
try:
import run_code_graph_pipeline
except ModuleNotFoundError:
logger.error("codegraph dependencies not found. Skipping codegraph API routes.")
return None

router = APIRouter()

@router.post("/index", response_model=None)
async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
"""This endpoint is responsible for running the indexation on code repo."""
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline

try:
async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs):
logger.info(result)
Expand Down
8 changes: 2 additions & 6 deletions cognee/api/v1/users/routers/get_visualize_router.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
from fastapi import Form, UploadFile, Depends
import logging
from fastapi import Depends
from fastapi.responses import JSONResponse
from fastapi import APIRouter
from typing import List
import aiohttp
import subprocess
import logging
import os
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import requests
import os
import json
import random
from typing import Optional, Any, List, Tuple
from typing import Any
from cognee.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAdapter


Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import asyncio
import httpx
import aiohttp
import logging
from typing import List, Optional
import os

import aiohttp.http_exceptions

from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
Expand Down Expand Up @@ -48,14 +50,10 @@ async def embed_text(self, text: List[str]) -> List[List[float]]:
if self.mock:
return [[0.0] * self.dimensions for _ in text]

embeddings = []
async with httpx.AsyncClient() as client:
for prompt in text:
embedding = await self._get_embedding(client, prompt)
embeddings.append(embedding)
embeddings = await asyncio.gather(*[self._get_embedding(prompt) for prompt in text])
return embeddings

async def _get_embedding(self, client: httpx.AsyncClient, prompt: str) -> List[float]:
async def _get_embedding(self, prompt: str) -> List[float]:
"""
Internal method to call the Ollama embeddings endpoint for a single prompt.
"""
Expand All @@ -71,13 +69,13 @@ async def _get_embedding(self, client: httpx.AsyncClient, prompt: str) -> List[f
retries = 0
while retries < self.MAX_RETRIES:
try:
response = await client.post(
self.endpoint, json=payload, headers=headers, timeout=60.0
)
response.raise_for_status()
data = response.json()
return data["embedding"]
except httpx.HTTPStatusError as e:
async with aiohttp.ClientSession() as session:
async with session.post(
self.endpoint, json=payload, headers=headers, timeout=60.0
) as response:
data = await response.json()
return data["embedding"]
except aiohttp.http_exceptions.HttpBadRequest as e:
logger.error(f"HTTP error on attempt {retries + 1}: {e}")
retries += 1
await asyncio.sleep(min(2**retries, 60))
Expand Down
4 changes: 3 additions & 1 deletion cognee/infrastructure/llm/anthropic/adapter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Type
from pydantic import BaseModel
import instructor
import anthropic

from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm.llm_interface import LLMInterface
Expand All @@ -15,9 +14,12 @@ class AnthropicAdapter(LLMInterface):
model: str

def __init__(self, max_tokens: int, model: str = None):
import anthropic

self.aclient = instructor.patch(
create=anthropic.Anthropic().messages.create, mode=instructor.Mode.ANTHROPIC_TOOLS
)

self.model = model
self.max_tokens = max_tokens

Expand Down
2 changes: 1 addition & 1 deletion cognee/infrastructure/llm/gemini/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pydantic import BaseModel
import logging
import litellm
import asyncio
from litellm import acompletion, JSONSchemaValidationError
from cognee.shared.data_models import MonitoringTool
from cognee.exceptions import InvalidValueError
Expand All @@ -13,6 +12,7 @@
logger = logging.getLogger(__name__)

monitoring = get_base_config().monitoring_tool

if monitoring == MonitoringTool.LANGFUSE:
from langfuse.decorators import observe

Expand Down
1 change: 1 addition & 0 deletions cognee/infrastructure/llm/openai/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from cognee.base_config import get_base_config

monitoring = get_base_config().monitoring_tool

if monitoring == MonitoringTool.LANGFUSE:
from langfuse.decorators import observe

Expand Down
3 changes: 1 addition & 2 deletions cognee/modules/data/extraction/extract_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from instructor.exceptions import InstructorRetryException
from pydantic import BaseModel
from tenacity import RetryError

from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt
Expand Down Expand Up @@ -36,7 +35,7 @@ async def extract_code_summary(content: str):
else:
try:
result = await extract_summary(content, response_model=SummarizedCode)
except (RetryError, InstructorRetryException) as e:
except InstructorRetryException as e:
logger.error("Failed to extract code summary, falling back to mock summary", exc_info=e)
result = get_mock_summarized_code()

Expand Down
63 changes: 0 additions & 63 deletions cognee/modules/data/extraction/extract_topics_naive.py

This file was deleted.

36 changes: 4 additions & 32 deletions cognee/shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,25 @@

from uuid import uuid4
import pathlib
import nltk
from cognee.shared.exceptions import IngestionError

# Analytics Proxy Url, currently hosted by Vercel
proxy_url = "https://test.prometh.ai"


def get_entities(tagged_tokens):
import nltk

nltk.download("maxent_ne_chunker", quiet=True)

from nltk.chunk import ne_chunk

return ne_chunk(tagged_tokens)


def extract_pos_tags(sentence):
"""Extract Part-of-Speech (POS) tags for words in a sentence."""
import nltk

# Ensure that the necessary NLTK resources are downloaded
nltk.download("words", quiet=True)
Expand Down Expand Up @@ -308,37 +311,6 @@ def embed_logo(p, layout_scale, logo_alpha, position):
)


def style_and_render_graph(p, G, layout_positions, node_attribute, node_colors, centrality):
"""
Apply styling and render the graph into the plot.
"""
from bokeh.plotting import figure, from_networkx
from bokeh.models import Circle, MultiLine, HoverTool, ColumnDataSource, Range1d
from bokeh.plotting import output_file, show

from bokeh.embed import file_html
from bokeh.resources import CDN

graph_renderer = from_networkx(G, layout_positions)
node_radii = [0.02 + 0.1 * centrality[node] for node in G.nodes()]
graph_renderer.node_renderer.data_source.data["radius"] = node_radii
graph_renderer.node_renderer.data_source.data["fill_color"] = node_colors
graph_renderer.node_renderer.glyph = Circle(
radius="radius",
fill_color="fill_color",
fill_alpha=0.9,
line_color="#000000",
line_width=1.5,
)
graph_renderer.edge_renderer.glyph = MultiLine(
line_color="#000000",
line_alpha=0.3,
line_width=1.5,
)
p.renderers.append(graph_renderer)
return graph_renderer


def graph_to_tuple(graph):
"""
Converts a networkx graph to a tuple of (nodes, edges).
Expand Down
Loading
Loading