Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: integrate get_source_code_chunks task, comment out summarize_code
  • Loading branch information
lxobr committed Dec 18, 2024
commit 1c5ca8460a9cd382eb7d3d5cd3bdd2c88c6f1bbe
50 changes: 26 additions & 24 deletions cognee/api/v1/cognify/code_graph_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,32 @@
from pathlib import Path
from typing import Union

from cognee.modules.data.methods import get_datasets, get_datasets_by_name
from cognee.shared.SourceCodeGraph import SourceCodeGraph
from cognee.shared.data_models import SummarizedContent
from cognee.shared.utils import send_telemetry
from cognee.modules.data.models import Dataset, Data
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.models import Data, Dataset
from cognee.modules.pipelines import run_tasks
from cognee.modules.pipelines.models import PipelineRunStatus
from cognee.modules.pipelines.operations.get_pipeline_status import \
get_pipeline_status
from cognee.modules.pipelines.operations.log_pipeline_status import \
log_pipeline_status
from cognee.modules.data.methods import get_datasets, get_datasets_by_name
from cognee.modules.pipelines.tasks.Task import Task
from cognee.modules.users.methods import get_default_user
from cognee.modules.pipelines import run_tasks
from cognee.modules.users.models import User
from cognee.shared.SourceCodeGraph import SourceCodeGraph
from cognee.shared.utils import send_telemetry
from cognee.tasks.documents import (check_permissions_on_documents,
classify_documents,
extract_chunks_from_documents)
from cognee.modules.users.methods import get_default_user
from cognee.modules.pipelines.models import PipelineRunStatus
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status
from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents
from cognee.tasks.repo_processor.get_source_code_chunks import get_source_code_chunks
from cognee.tasks.graph import extract_graph_from_code
from cognee.tasks.repo_processor import (enrich_dependency_graph,
expand_dependency_graph,
get_repo_file_dependencies)
from cognee.tasks.repo_processor import get_repo_file_dependencies, enrich_dependency_graph, expand_dependency_graph
from cognee.tasks.storage import add_data_points
from cognee.tasks.summarization import summarize_code
from cognee.infrastructure.databases.vector.embeddings import (get_embedding_engine)

logger = logging.getLogger("code_graph_pipeline")

update_status_lock = asyncio.Lock()


async def code_graph_pipeline(datasets: Union[str, list[str]] = None, user: User = None):
if user is None:
user = await get_default_user()
Expand Down Expand Up @@ -65,7 +63,7 @@ async def code_graph_pipeline(datasets: Union[str, list[str]] = None, user: User

async def run_pipeline(dataset: Dataset, user: User):
'''DEPRECATED: Use `run_code_graph_pipeline` instead. This function will be removed.'''
data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)
data_documents: list[Data] = await get_dataset_data(dataset_id=dataset.id)

document_ids_str = [str(document.id) for document in data_documents]

Expand All @@ -88,10 +86,11 @@ async def run_pipeline(dataset: Dataset, user: User):
try:
tasks = [
Task(classify_documents),
Task(check_permissions_on_documents, user = user, permissions = ["write"]),
Task(extract_chunks_from_documents), # Extract text chunks based on the document type.
Task(add_data_points, task_config = { "batch_size": 10 }),
Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { "batch_size": 10 }), # Generate knowledge graphs from the document chunks.
Task(check_permissions_on_documents, user=user, permissions=["write"]),
Task(extract_chunks_from_documents), # Extract text chunks based on the document type.
Task(add_data_points, task_config={"batch_size": 10}),
Task(extract_graph_from_code, graph_model=SourceCodeGraph, task_config={"batch_size": 10}),
# Generate knowledge graphs from the document chunks.
]

pipeline = run_tasks(tasks, data_documents, "code_graph_pipeline")
Expand Down Expand Up @@ -135,11 +134,14 @@ async def run_code_graph_pipeline(repo_path):
await cognee.prune.prune_system(metadata=True)
await create_db_and_tables()

embedding_engine = get_embedding_engine()

tasks = [
Task(get_repo_file_dependencies),
Task(enrich_dependency_graph, task_config={"batch_size": 50}),
Task(enrich_dependency_graph),
Task(expand_dependency_graph, task_config={"batch_size": 50}),
Task(summarize_code, task_config={"batch_size": 50}),
Task(get_source_code_chunks, embedding_model=embedding_engine.model, task_config={"batch_size": 50}),
# Task(summarize_code, task_config={"batch_size": 50}),
Task(add_data_points, task_config={"batch_size": 50}),
]

Expand Down
Loading