Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
dce894b
Add first three unit tests
0xideas Nov 11, 2024
86e726d
Complete migrating unit tests
0xideas Nov 11, 2024
d7ffef1
Remove old __tests__ folders
0xideas Nov 11, 2024
fbd0115
Rebase onto main
0xideas Nov 12, 2024
f326a4d
Transform into pytest tests
0xideas Nov 12, 2024
3cc1138
Apply autoformatting
0xideas Nov 12, 2024
1889071
Run unit tests in github actions
0xideas Nov 12, 2024
7d3f222
Add get_mock_user
0xideas Nov 12, 2024
7d3657a
Add fixture for mock user
0xideas Nov 12, 2024
ade1fd2
Add pytest.ini back
0xideas Nov 12, 2024
a57530e
Use patch
0xideas Nov 12, 2024
501d210
Try more patching
0xideas Nov 12, 2024
636bfae
Try recursive patching
0xideas Nov 12, 2024
c42bb51
Add cognee_db
0xideas Nov 12, 2024
adbf3df
Move cognee_db for integration test
0xideas Nov 12, 2024
e6bdb67
cp cognee_db for integration test
0xideas Nov 12, 2024
6f4ba20
Run integration tests in pipeline
0xideas Nov 12, 2024
c385783
Remove pytest.ini
0xideas Nov 12, 2024
2be1127
Makedirs for integration test
0xideas Nov 12, 2024
d7d8460
Clean up unit test pull request
0xideas Nov 12, 2024
949dd50
Copy over gitignore
0xideas Nov 12, 2024
d01061d
Remove unused imports
0xideas Nov 12, 2024
a541125
Add better text for task integration test assertions
0xideas Nov 12, 2024
7a6cf53
Remove tests/unit/integration folder
0xideas Nov 12, 2024
8107709
Remove duplicate pdf key
0xideas Nov 12, 2024
2d74590
Apply _test.py suffix to test files
0xideas Nov 12, 2024
83995fa
Try old version of classify_documents
0xideas Nov 12, 2024
826de0e
Remove orphan dictionary
0xideas Nov 12, 2024
8a59cad
Remove unneeded ground truth dicts and autoformat
0xideas Nov 12, 2024
58a733c
Increase time delta
0xideas Nov 12, 2024
af88870
Update cognee/tests/unit/interfaces/graph/util.py
0xideas Nov 13, 2024
aa1480c
Remove unused imports and make PdfDocument_test a pytest function
0xideas Nov 13, 2024
cd80525
Revert to EXTENSION_TO_DOCUMENT_CLASS implementation of classify_docu…
0xideas Nov 13, 2024
49bc07d
Rename ground_truth to expected_chunks
0xideas Nov 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/test_python_3_10.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction

- name: Run tests
run: poetry run pytest tests/
- name: Run unit tests
run: poetry run pytest cognee/tests/unit/

- name: Run integration tests
run: poetry run pytest cognee/tests/integration/

- name: Run default basic pipeline
env:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/test_python_3_11.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction

- name: Run tests
run: poetry run pytest tests/
- name: Run unit tests
run: poetry run pytest cognee/tests/unit/

- name: Run integration tests
run: poetry run pytest cognee/tests/integration/

- name: Run default basic pipeline
env:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/test_python_3_9.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction

- name: Run tests
run: poetry run pytest tests/
- name: Run unit tests
run: poetry run pytest cognee/tests/unit/

- name: Run integration tests
run: poetry run pytest cognee/tests/integration/

- name: Run default basic pipeline
env:
Expand Down

This file was deleted.

This file was deleted.

Binary file not shown.
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
14 changes: 0 additions & 14 deletions cognee/modules/pipelines/operations/__tests__/get_graph_url.py

This file was deleted.

53 changes: 0 additions & 53 deletions cognee/tasks/chunks/__tests__/chunk_by_paragraph.test.py

This file was deleted.

4 changes: 1 addition & 3 deletions cognee/tasks/documents/classify_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
"pdf": PdfDocument,
"audio": AudioDocument,
"image": ImageDocument,
"pdf": TextDocument,
"txt": TextDocument
}

def classify_documents(data_documents: list[Data]) -> list[Document]:
documents = [
EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location, name=data_item.name)
for data_item in data_documents
]

return documents
11 changes: 11 additions & 0 deletions cognee/tests/integration/run_toy_tasks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

import pytest


@pytest.fixture(autouse=True, scope="session")
def copy_cognee_db_to_target_location():
os.makedirs("cognee/.cognee_system/databases/", exist_ok=True)
os.system(
"cp cognee/tests/integration/run_toy_tasks/data/cognee_db cognee/.cognee_system/databases/cognee_db"
)
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import asyncio
from queue import Queue

from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.tasks.Task import Task


async def pipeline(data_queue):
async def queue_consumer():
while not data_queue.is_closed:
Expand All @@ -17,20 +19,25 @@ async def add_one(num):
async def multiply_by_two(num):
yield num * 2

tasks_run = run_tasks([
Task(queue_consumer),
Task(add_one),
Task(multiply_by_two),
])
tasks_run = run_tasks(
[
Task(queue_consumer),
Task(add_one),
Task(multiply_by_two),
],
pipeline_name="test_run_tasks_from_queue",
)

results = [2, 4, 6, 8, 10, 12, 14, 16, 18]
results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
index = 0
async for result in tasks_run:
print(result)
assert result == results[index]
assert (
result == results[index]
), f"at {index = }: {result = } != {results[index] = }"
index += 1

async def main():

async def run_queue():
data_queue = Queue()
data_queue.is_closed = False

Expand All @@ -42,5 +49,6 @@ async def queue_producer():

await asyncio.gather(pipeline(data_queue), queue_producer())

if __name__ == "__main__":
asyncio.run(main())

def test_run_tasks_from_queue():
asyncio.run(run_queue())
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import asyncio

from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.tasks.Task import Task


async def main():
async def run_and_check_tasks():
def number_generator(num):
for i in range(num):
yield i + 1
Expand All @@ -18,19 +19,25 @@ async def multiply_by_two(num):
async def add_one_single(num):
yield num + 1

pipeline = run_tasks([
Task(number_generator),
Task(add_one, task_config = {"batch_size": 5}),
Task(multiply_by_two, task_config = {"batch_size": 1}),
Task(add_one_single),
], 10)
pipeline = run_tasks(
[
Task(number_generator),
Task(add_one, task_config={"batch_size": 5}),
Task(multiply_by_two, task_config={"batch_size": 1}),
Task(add_one_single),
],
10,
pipeline_name="test_run_tasks",
)

results = [5, 7, 9, 11, 13, 15, 17, 19, 21, 23]
index = 0
async for result in pipeline:
print(result)
assert result == results[index]
assert (
result == results[index]
), f"at {index = }: {result = } != {results[index] = }"
index += 1

if __name__ == "__main__":
asyncio.run(main())

def test_run_tasks():
asyncio.run(run_and_check_tasks())
34 changes: 34 additions & 0 deletions cognee/tests/unit/documents/PdfDocument_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import uuid

from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument

GROUND_TRUTH = [
{"word_count": 879, "len_text": 5622, "cut_type": "sentence_end"},
{"word_count": 951, "len_text": 6384, "cut_type": "sentence_end"},
]


def test_PdfDocument():
test_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
"test_data",
"artificial-intelligence.pdf",
)
pdf_doc = PdfDocument(
id=uuid.uuid4(), name="Test document.pdf", raw_data_location=test_file_path
)

for ground_truth, paragraph_data in zip(
GROUND_TRUTH, pdf_doc.read(chunk_size=1024)
):
Comment on lines +23 to +25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Chunk size not adequately tested

Tests do not verify different chunk sizes, which may affect the results.

  • cognee/tests/unit/documents/PdfDocument_test.py lines 24-26
🔗 Analysis chain

Verify chunk size handling

The test uses a fixed chunk size of 1024 bytes but doesn't verify how different chunk sizes might affect the results.

Let's verify if the chunk size is documented or tested elsewhere:

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Search for other tests or documentation of chunk size handling
rg -g '!*.pyc' -g '!*.pdf' "chunk_size" ./cognee/

Length of output: 7381


Script:

#!/bin/bash
# Search for different chunk_size values in test files
rg -g 'tests/**/*.py' 'chunk_size\s*=\s*(?!1024)\d+' ./cognee/

Length of output: 313


Script:

#!/bin/bash
# Search for different chunk_size values in test files, excluding 1024
rg -g 'tests/**/*.py' 'chunk_size\s*=\s*\d+' ./cognee/ | grep -v 'chunk_size\s*=\s*1024'

Length of output: 90

assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
Loading
Loading