diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 57646a3711..9f35d5f751 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -201,6 +201,10 @@ jobs: with: python-version: '3.11.x' + - name: Install specific S3 dependency + run: | + poetry install -E aws + - name: Run S3 Bucket Test env: ENV: 'dev' @@ -243,6 +247,4 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} run: poetry run python ./cognee/tests/test_parallel_databases.py diff --git a/cognee/fetch_secret.py b/cognee/fetch_secret.py deleted file mode 100644 index c36f9e4ce6..0000000000 --- a/cognee/fetch_secret.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -import sys -import boto3 -from dotenv import load_dotenv - -# Get the directory that contains your script -current_dir = os.path.dirname(os.path.abspath(__file__)) - -# Get the parent directory -parent_dir = os.path.dirname(current_dir) - -# Add the parent directory to sys.path -sys.path.insert(0, parent_dir) - -environment = os.getenv("AWS_ENV", "dev") - - -def fetch_secret(secret_name: str, region_name: str, env_file_path: str): - """Fetch the secret from AWS Secrets Manager and write it to the .env file.""" - print("Initializing session") - session = boto3.session.Session() - print("Session initialized") - client = session.client(service_name="secretsmanager", region_name=region_name) - print("Client initialized") - - try: - response = client.get_secret_value(SecretId=secret_name) - except Exception as e: - print(f"Error retrieving secret: {e}") - return f"Error retrieving secret: {e}" - - if "SecretString" in response: - secret = response["SecretString"] - else: - secret = response["SecretBinary"] - - with open(env_file_path, "w") as env_file: - env_file.write(secret) - print("Secrets are added to the .env file.") - - if os.path.exists(env_file_path): - print(f"The .env file is located at: {env_file_path}") - load_dotenv() - print("The .env file is loaded.") - else: - print(f"The .env file was not found at: {env_file_path}.") - - -ENV_FILE_PATH = os.path.abspath("../.env") - -if os.path.exists(ENV_FILE_PATH): - # Load default environment variables (.env) - load_dotenv() - print("Environment variables are already loaded.") -else: - fetch_secret( - f"promethai-{environment}-backend-secretso-promethaijs-dotenv", - "eu-west-1", - ENV_FILE_PATH, - ) diff --git a/cognee/modules/data/processing/document_types/open_data_file.py b/cognee/modules/data/processing/document_types/open_data_file.py index 207b67fdb9..64a5041235 100644 --- a/cognee/modules/data/processing/document_types/open_data_file.py +++ b/cognee/modules/data/processing/document_types/open_data_file.py @@ -1,4 +1,3 @@ -import s3fs from typing import IO, Optional from cognee.api.v1.add.config import get_s3_config @@ -9,6 +8,8 @@ def open_data_file( if file_path.startswith("s3://"): s3_config = get_s3_config() if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + import s3fs + fs = s3fs.S3FileSystem( key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False ) diff --git a/cognee/modules/ingestion/classify.py b/cognee/modules/ingestion/classify.py index 5f3bbbd38c..d1650e5dc1 100644 --- a/cognee/modules/ingestion/classify.py +++ b/cognee/modules/ingestion/classify.py @@ -1,22 +1,40 @@ from io import BufferedReader -from typing import Union, BinaryIO, Optional -from .data_types import TextData, BinaryData, S3BinaryData +from typing import Union, BinaryIO, Optional, Any +from .data_types import TextData, BinaryData from tempfile import SpooledTemporaryFile -from s3fs.core import S3File, S3FileSystem + from cognee.modules.ingestion.exceptions import IngestionError +try: + from s3fs.core import S3File + from cognee.modules.ingestion.data_types.S3BinaryData import S3BinaryData +except ImportError: + S3File = None + S3BinaryData = None + -def classify(data: Union[str, BinaryIO], filename: str = None, s3fs: Optional[S3FileSystem] = None): +def classify(data: Union[str, BinaryIO], filename: str = None, s3fs: Optional[Any] = None): if isinstance(data, str): return TextData(data) if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile): return BinaryData(data, str(data.name).split("/")[-1] if data.name else filename) - if isinstance(data, S3File): - derived_filename = str(data.full_name).split("/")[-1] if data.full_name else filename - return S3BinaryData(s3_path=data.full_name, name=derived_filename, s3=s3fs) + try: + from importlib import import_module + + s3core = import_module("s3fs.core") + S3File = s3core.S3File + except ImportError: + S3File = None + + if S3File is not None: + from cognee.modules.ingestion.data_types.S3BinaryData import S3BinaryData + + if isinstance(data, S3File): + derived_filename = str(data.full_name).split("/")[-1] if data.full_name else filename + return S3BinaryData(s3_path=data.full_name, name=derived_filename, s3=s3fs) raise IngestionError( - message=f"Type of data sent to classify(data: Union[str, BinaryIO) not supported: {type(data)}" + message=f"Type of data sent to classify(data: Union[str, BinaryIO) not supported or s3fs is not installed: {type(data)}" ) diff --git a/cognee/modules/ingestion/data_types/__init__.py b/cognee/modules/ingestion/data_types/__init__.py index f68306f809..2cc5796cdb 100644 --- a/cognee/modules/ingestion/data_types/__init__.py +++ b/cognee/modules/ingestion/data_types/__init__.py @@ -1,4 +1,3 @@ from .TextData import TextData, create_text_data from .BinaryData import BinaryData, create_binary_data -from .S3BinaryData import S3BinaryData, create_s3_binary_data from .IngestionData import IngestionData diff --git a/cognee/tasks/documents/__init__.py b/cognee/tasks/documents/__init__.py index 6988a68994..f4582fbe0c 100644 --- a/cognee/tasks/documents/__init__.py +++ b/cognee/tasks/documents/__init__.py @@ -1,5 +1,3 @@ -from .translate_text import translate_text -from .detect_language import detect_language from .classify_documents import classify_documents from .extract_chunks_from_documents import extract_chunks_from_documents from .check_permissions_on_dataset import check_permissions_on_dataset diff --git a/cognee/tasks/documents/detect_language.py b/cognee/tasks/documents/detect_language.py deleted file mode 100644 index 043efce40d..0000000000 --- a/cognee/tasks/documents/detect_language.py +++ /dev/null @@ -1,39 +0,0 @@ -from cognee.shared.logging_utils import get_logger, ERROR - -logger = get_logger(level=ERROR) - - -async def detect_language(text: str): - """ - Detect the language of the given text and return its ISO 639-1 language code. - If the detected language is Croatian ("hr"), it maps to Serbian ("sr"). - The text is trimmed to the first 100 characters for efficient processing. - Parameters: - text (str): The text for language detection. - Returns: - str: The ISO 639-1 language code of the detected language, or "None" in case of an error. - """ - - from langdetect import detect, LangDetectException - - # Trim the text to the first 100 characters - trimmed_text = text[:100] - - try: - # Detect the language using langdetect - detected_lang_iso639_1 = detect(trimmed_text) - - # Special case: map "hr" (Croatian) to "sr" (Serbian ISO 639-2) - if detected_lang_iso639_1 == "hr": - return "sr" - - return detected_lang_iso639_1 - - except LangDetectException as e: - logger.error(f"Language detection error: {e}") - - except Exception as e: - logger.error(f"Unexpected error: {e}") - raise e - - return None diff --git a/cognee/tasks/documents/translate_text.py b/cognee/tasks/documents/translate_text.py deleted file mode 100644 index a6fdb614c6..0000000000 --- a/cognee/tasks/documents/translate_text.py +++ /dev/null @@ -1,46 +0,0 @@ -from cognee.shared.logging_utils import get_logger, ERROR - -from cognee.exceptions import InvalidValueError - -logger = get_logger(level=ERROR) - - -async def translate_text( - text, source_language: str = "sr", target_language: str = "en", region_name="eu-west-1" -): - """ - Translate text from source language to target language using AWS Translate. - Parameters: - text (str): The text to be translated. - source_language (str): The source language code (e.g., "sr" for Serbian). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php - target_language (str): The target language code (e.g., "en" for English). ISO 639-2 Code https://www.loc.gov/standards/iso639-2/php/code_list.php - region_name (str): AWS region name. - Returns: - str: Translated text or an error message. - """ - - import boto3 - from botocore.exceptions import BotoCoreError, ClientError - - if not text: - raise InvalidValueError(message="No text to translate.") - - if not source_language or not target_language: - raise InvalidValueError(message="Source and target language codes are required.") - - try: - translate = boto3.client(service_name="translate", region_name=region_name, use_ssl=True) - result = translate.translate_text( - Text=text, - SourceLanguageCode=source_language, - TargetLanguageCode=target_language, - ) - yield result.get("TranslatedText", "No translation found.") - - except BotoCoreError as e: - logger.error(f"BotoCoreError occurred: {e}") - yield None - - except ClientError as e: - logger.error(f"ClientError occurred: {e}") - yield None diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 70bc027a14..6b5ac6488c 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -1,5 +1,4 @@ import dlt -import s3fs import json import inspect from uuid import UUID @@ -40,6 +39,8 @@ async def ingest_data( fs = None if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + import s3fs + fs = s3fs.S3FileSystem( key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False ) diff --git a/cognee/tasks/ingestion/resolve_data_directories.py b/cognee/tasks/ingestion/resolve_data_directories.py index 0b3edc382a..e8bdbca7db 100644 --- a/cognee/tasks/ingestion/resolve_data_directories.py +++ b/cognee/tasks/ingestion/resolve_data_directories.py @@ -1,5 +1,4 @@ import os -import s3fs from typing import List, Union, BinaryIO from urllib.parse import urlparse from cognee.api.v1.add.config import get_s3_config @@ -27,6 +26,8 @@ async def resolve_data_directories( fs = None if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None: + import s3fs + fs = s3fs.S3FileSystem( key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False ) diff --git a/poetry.lock b/poetry.lock index 5a4257ac95..8d0d3ea187 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4,9 +4,10 @@ name = "aiobotocore" version = "2.22.0" description = "Async client for aws services using botocore and aiohttp" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "aiobotocore-2.22.0-py3-none-any.whl", hash = "sha256:b4e6306f79df9d81daff1f9d63189a2dbee4b77ce3ab937304834e35eaaeeccf"}, {file = "aiobotocore-2.22.0.tar.gz", hash = "sha256:11091477266b75c2b5d28421c1f2bc9a87d175d0b8619cb830805e7a113a170b"}, @@ -163,9 +164,10 @@ speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (> name = "aioitertools" version = "0.12.0" description = "itertools and builtins for AsyncIO and mixed iterables" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "aioitertools-0.12.0-py3-none-any.whl", hash = "sha256:fc1f5fac3d737354de8831cbba3eb04f79dd649d8f3afb4c5b114925e662a796"}, {file = "aioitertools-0.12.0.tar.gz", hash = "sha256:c2a9055b4fbb7705f561b9d86053e8af5d10cc845d22c32008c43490b2d8dd6b"}, @@ -813,9 +815,10 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] name = "boto3" version = "1.37.3" description = "The AWS SDK for Python" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "boto3-1.37.3-py3-none-any.whl", hash = "sha256:2063b40af99fd02f6228ff52397b552ff3353831edaf8d25cc04801827ab9794"}, {file = "boto3-1.37.3.tar.gz", hash = "sha256:21f3ce0ef111297e63a6eb998a25197b8c10982970c320d4c6e8db08be2157be"}, @@ -833,9 +836,10 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "botocore" version = "1.37.3" description = "Low-level, data-driven core of boto 3." -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "botocore-1.37.3-py3-none-any.whl", hash = "sha256:d01bd3bf4c80e61fa88d636ad9f5c9f60a551d71549b481386c6b4efe0bb2b2e"}, {file = "botocore-1.37.3.tar.gz", hash = "sha256:fe8403eb55a88faf9b0f9da6615e5bee7be056d75e17af66c3c8f0a3b0648da4"}, @@ -3889,9 +3893,10 @@ files = [ name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -3962,6 +3967,8 @@ python-versions = "*" groups = ["main"] files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, + {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, + {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -5106,8 +5113,11 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, @@ -7665,6 +7675,7 @@ files = [ {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, @@ -9581,9 +9592,10 @@ files = [ name = "s3fs" version = "2025.3.2" description = "Convenient Filesystem interface over S3" -optional = false +optional = true python-versions = ">=3.9" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "s3fs-2025.3.2-py3-none-any.whl", hash = "sha256:81eae3f37b4b04bcc08845d7bcc607c6ca45878813ef7e6a28d77b2688417130"}, {file = "s3fs-2025.3.2.tar.gz", hash = "sha256:6798f896ec76dd3bfd8beb89f0bb7c5263cb2760e038bae0978505cd172a307c"}, @@ -9605,9 +9617,10 @@ boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] name = "s3transfer" version = "0.11.3" description = "An Amazon S3 Transfer Manager" -optional = false +optional = true python-versions = ">=3.8" groups = ["main"] +markers = "extra == \"aws\"" files = [ {file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"}, {file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"}, @@ -11993,6 +12006,7 @@ cffi = ["cffi (>=1.11)"] [extras] anthropic = ["anthropic"] api = ["gunicorn", "kuzu", "uvicorn", "websockets"] +aws = ["s3fs"] chromadb = ["chromadb", "pypika"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] debug = ["debugpy"] @@ -12023,4 +12037,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "ea71b85520cb437c259639de02daaeb9b4fdb78eb5ce216b28c31d2133f8e0e8" +content-hash = "4d5f5cfe7072a53e4d9d38e5503a9839b555add9087b8947e5eecf0f80b9cbbb" diff --git a/pyproject.toml b/pyproject.toml index b65171a8ba..da6834da10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,9 +27,6 @@ dependencies = [ "nltk==3.9.1", "numpy>=1.26.4, <=2.1", "pandas>=2.2.2", - # Note: New s3fs and boto3 versions don't work well together - # Always use comaptible fixed versions of these two dependencies - "s3fs[boto3]==2025.3.2", "sqlalchemy==2.0.39", "aiosqlite>=0.20.0,<0.21", "tiktoken<=0.9.0", @@ -118,6 +115,9 @@ gui = [ "qasync>=0.27.1,<0.28", ] graphiti = ["graphiti-core>=0.7.0,<0.8"] +# Note: New s3fs and boto3 versions don't work well together +# Always use comaptible fixed versions of these two dependencies +aws = ["s3fs[boto3]==2025.3.2"] dev = [ "pytest>=7.4.0,<8", "pytest-cov>=6.1.1", diff --git a/uv.lock b/uv.lock index 08a8e60930..63cabb6ac4 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10, <=3.13" resolution-markers = [ "python_full_version >= '3.13'", @@ -57,7 +58,6 @@ dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiosignal" }, { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "async-timeout", version = "5.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_version < '0'" }, { name = "attrs" }, { name = "frozenlist" }, { name = "multidict" }, @@ -345,7 +345,6 @@ version = "0.30.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "async-timeout", version = "5.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_version < '0'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746 } wheels = [ @@ -902,7 +901,6 @@ dependencies = [ { name = "pypdf" }, { name = "python-dotenv" }, { name = "python-multipart" }, - { name = "s3fs", extra = ["boto3"] }, { name = "scikit-learn" }, { name = "sentry-sdk", extra = ["fastapi"] }, { name = "sqlalchemy" }, @@ -921,6 +919,9 @@ api = [ { name = "uvicorn" }, { name = "websockets" }, ] +aws = [ + { name = "s3fs", extra = ["boto3"] }, +] chromadb = [ { name = "chromadb" }, { name = "pypika" }, @@ -1103,7 +1104,7 @@ requires-dist = [ { name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" }, { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.14.2,<2" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" }, - { name = "s3fs", extras = ["boto3"], specifier = "==2025.3.2" }, + { name = "s3fs", extras = ["boto3"], marker = "extra == 'aws'", specifier = "==2025.3.2" }, { name = "scikit-learn", specifier = ">=1.6.1,<2" }, { name = "sentry-sdk", extras = ["fastapi"], specifier = ">=2.9.0,<3" }, { name = "sqlalchemy", specifier = "==2.0.39" }, @@ -1121,6 +1122,7 @@ requires-dist = [ { name = "weaviate-client", marker = "extra == 'weaviate'", specifier = "==4.9.6" }, { name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1" }, ] +provides-extras = ["api", "weaviate", "qdrant", "neo4j", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "kuzu", "groq", "milvus", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"] [[package]] name = "colorama" @@ -1784,17 +1786,17 @@ name = "fastembed" version = "0.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub" }, - { name = "loguru" }, - { name = "mmh3" }, + { name = "huggingface-hub", marker = "python_full_version < '3.13'" }, + { name = "loguru", marker = "python_full_version < '3.13'" }, + { name = "mmh3", marker = "python_full_version < '3.13'" }, { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "onnxruntime" }, - { name = "pillow" }, - { name = "py-rust-stemmers" }, - { name = "requests" }, - { name = "tokenizers" }, - { name = "tqdm" }, + { name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*'" }, + { name = "onnxruntime", marker = "python_full_version < '3.13'" }, + { name = "pillow", marker = "python_full_version < '3.13'" }, + { name = "py-rust-stemmers", marker = "python_full_version < '3.13'" }, + { name = "requests", marker = "python_full_version < '3.13'" }, + { name = "tokenizers", marker = "python_full_version < '3.13'" }, + { name = "tqdm", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c6/f4/036a656c605f63dc25f11284f60f69900a54a19c513e1ae60d21d6977e75/fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733", size = 50731 } wheels = [ @@ -3765,8 +3767,8 @@ name = "loguru" version = "0.7.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "win32-setctime", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559 } wheels = [