From a5a07d711b5adf0895c6d9d8b8df70750ecbbb02 Mon Sep 17 00:00:00 2001 From: Prateek Chhikara <46902268+prateekchhikara@users.noreply.github.com> Date: Thu, 12 Jun 2025 23:34:38 -0700 Subject: [PATCH 1/7] Updates in client to support summary (#2951) --- mem0/client/main.py | 16 ++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/mem0/client/main.py b/mem0/client/main.py index 5bdf9210b1..e7bc245c3c 100644 --- a/mem0/client/main.py +++ b/mem0/client/main.py @@ -472,6 +472,22 @@ def get_memory_export(self, **kwargs) -> Dict[str, Any]: capture_client_event("client.get_memory_export", self, {"keys": list(kwargs.keys()), "sync_type": "sync"}) return response.json() + @api_error_handler + def get_summary(self, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """Get the summary of a memory export. + + Args: + filters: Optional filters to apply to the summary request + + Returns: + Dict containing the export status and summary data + """ + + response = self.client.post("/v1/summary/", json=self._prepare_params({"filters": filters})) + response.raise_for_status() + capture_client_event("client.get_summary", self, {"sync_type": "sync"}) + return response.json() + @api_error_handler def get_project(self, fields: Optional[List[str]] = None) -> Dict[str, Any]: """Get instructions or categories for the current project. diff --git a/pyproject.toml b/pyproject.toml index 1bed757914..b1232451ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "mem0ai" -version = "0.1.107" +version = "0.1.107r1" description = "Long-term memory for AI Agents" authors = [ { name = "Mem0", email = "founders@mem0.ai" } From df43f904d10be4765b04a7c707f4882a3f6d500c Mon Sep 17 00:00:00 2001 From: Dev Khant Date: Fri, 13 Jun 2025 12:09:54 +0530 Subject: [PATCH 2/7] deploy minor version -> 0.1.107rc2 (#2953) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b1232451ea..1ade265f1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "mem0ai" -version = "0.1.107r1" +version = "0.1.107rc2" description = "Long-term memory for AI Agents" authors = [ { name = "Mem0", email = "founders@mem0.ai" } From a8ace1860739ca824c6409b1ac3507862e339ba5 Mon Sep 17 00:00:00 2001 From: John Lockwood Date: Sat, 14 Jun 2025 05:08:32 -0700 Subject: [PATCH 3/7] Fix/pin pinecone issue #2772 (#2773) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91f123b89b..3c93d58c9f 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ install: install_all: pip install ruff==0.6.9 groq together boto3 litellm ollama chromadb weaviate weaviate-client sentence_transformers vertexai \ - google-generativeai elasticsearch opensearch-py vecs pinecone pinecone-text faiss-cpu langchain-community \ + google-generativeai elasticsearch opensearch-py vecs "pinecone<7.0.0" pinecone-text faiss-cpu langchain-community \ upstash-vector azure-search-documents langchain-memgraph langchain-neo4j rank-bm25 # Format code with ruff From 7c0c4a03c4f442252033adbfedc974fb33de5e29 Mon Sep 17 00:00:00 2001 From: John Lockwood Date: Sat, 14 Jun 2025 05:13:16 -0700 Subject: [PATCH 4/7] Feat/add python version test envs (#2774) --- CONTRIBUTING.md | 34 +++++++++------ Makefile | 9 ++++ mem0/memory/main.py | 90 ++++++++++++++++++--------------------- pyproject.toml | 56 ++++++++++++++++++++++++ tests/memory/test_main.py | 19 ++++++--- tests/test_main.py | 12 ++++-- 6 files changed, 150 insertions(+), 70 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b4420bc9f1..28dad2a7a9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,18 +16,19 @@ To make a contribution, follow these steps: For more details about pull requests, please read [GitHub's guides](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). -### ๐Ÿ“ฆ Package manager +### ๐Ÿ“ฆ Development Environment -We use `poetry` as our package manager. You can install poetry by following the instructions [here](https://python-poetry.org/docs/#installation). - -Please DO NOT use pip or conda to install the dependencies. Instead, use poetry: +We use `hatch` for managing development environments. To set up: ```bash -make install_all - -#activate +# Activate environment for specific Python version: +hatch shell dev_py_3_9 # Python 3.9 +hatch shell dev_py_3_10 # Python 3.10 +hatch shell dev_py_3_11 # Python 3.11 -poetry shell +# The environment will automatically install all dev dependencies +# Run tests within the activated shell: +make test ``` ### ๐Ÿ“Œ Pre-commit @@ -40,16 +41,21 @@ pre-commit install ### ๐Ÿงช Testing -We use `pytest` to test our code. You can run the tests by running the following command: +We use `pytest` to test our code across multiple Python versions. You can run tests using: ```bash -poetry run pytest tests +# Run tests with default Python version +make test -# or +# Test specific Python versions: +make test-py-3.9 # Python 3.9 environment +make test-py-3.10 # Python 3.10 environment +make test-py-3.11 # Python 3.11 environment -make test +# When using hatch shells, run tests with: +make test # After activating a shell with hatch shell test_XX ``` -Several packages have been removed from Poetry to make the package lighter. Therefore, it is recommended to run `make install_all` to install the remaining packages and ensure all tests pass. Make sure that all tests pass before submitting a pull request. +Make sure that all tests pass across all supported Python versions before submitting a pull request. -We look forward to your pull requests and can't wait to see your contributions! \ No newline at end of file +We look forward to your pull requests and can't wait to see your contributions! diff --git a/Makefile b/Makefile index 3c93d58c9f..59c745ca31 100644 --- a/Makefile +++ b/Makefile @@ -41,3 +41,12 @@ clean: test: hatch run test + +test-py-3.9: + hatch run dev_py_3_9:test + +test-py-3.10: + hatch run dev_py_3_10:test + +test-py-3.11: + hatch run dev_py_3_11:test diff --git a/mem0/memory/main.py b/mem0/memory/main.py index f86fdfbf49..5681489523 100644 --- a/mem0/memory/main.py +++ b/mem0/memory/main.py @@ -28,8 +28,8 @@ get_fact_retrieval_messages, parse_messages, parse_vision_messages, - remove_code_blocks, process_telemetry_filters, + remove_code_blocks, ) from mem0.utils.factory import EmbedderFactory, LlmFactory, VectorStoreFactory @@ -338,10 +338,9 @@ def _add_to_vector_store(self, messages, metadata, filters, infer): except Exception as e: logging.error(f"Error in new_retrieved_facts: {e}") new_retrieved_facts = [] - + if not new_retrieved_facts: logger.debug("No new facts retrieved from input. Skipping memory update LLM call.") - return [] retrieved_old_memory = [] new_message_embeddings = {} @@ -369,24 +368,27 @@ def _add_to_vector_store(self, messages, metadata, filters, infer): temp_uuid_mapping[str(idx)] = item["id"] retrieved_old_memory[idx]["id"] = str(idx) - function_calling_prompt = get_update_memory_messages( - retrieved_old_memory, new_retrieved_facts, self.config.custom_update_memory_prompt - ) - - try: - response: str = self.llm.generate_response( - messages=[{"role": "user", "content": function_calling_prompt}], - response_format={"type": "json_object"}, + if new_retrieved_facts: + function_calling_prompt = get_update_memory_messages( + retrieved_old_memory, new_retrieved_facts, self.config.custom_update_memory_prompt ) - except Exception as e: - logging.error(f"Error in new memory actions response: {e}") - response = "" - try: - response = remove_code_blocks(response) - new_memories_with_actions = json.loads(response) - except Exception as e: - logging.error(f"Invalid JSON response: {e}") + try: + response: str = self.llm.generate_response( + messages=[{"role": "user", "content": function_calling_prompt}], + response_format={"type": "json_object"}, + ) + except Exception as e: + logging.error(f"Error in new memory actions response: {e}") + response = "" + + try: + response = remove_code_blocks(response) + new_memories_with_actions = json.loads(response) + except Exception as e: + logging.error(f"Invalid JSON response: {e}") + new_memories_with_actions = {} + else: new_memories_with_actions = {} returned_memories = [] @@ -1162,13 +1164,11 @@ async def _add_to_vector_store( response = remove_code_blocks(response) new_retrieved_facts = json.loads(response)["facts"] except Exception as e: - new_retrieved_facts = [] - - if not new_retrieved_facts: - logger.info("No new facts retrieved from input. Skipping memory update LLM call.") - return [] logging.error(f"Error in new_retrieved_facts: {e}") new_retrieved_facts = [] + + if not new_retrieved_facts: + logger.debug("No new facts retrieved from input. Skipping memory update LLM call.") retrieved_old_memory = [] new_message_embeddings = {} @@ -1200,31 +1200,25 @@ async def process_fact_for_search(new_mem_content): temp_uuid_mapping[str(idx)] = item["id"] retrieved_old_memory[idx]["id"] = str(idx) - function_calling_prompt = get_update_memory_messages( - retrieved_old_memory, new_retrieved_facts, self.config.custom_update_memory_prompt - ) - try: - response = await asyncio.to_thread( - self.llm.generate_response, - messages=[{"role": "user", "content": function_calling_prompt}], - response_format={"type": "json_object"}, + if new_retrieved_facts: + function_calling_prompt = get_update_memory_messages( + retrieved_old_memory, new_retrieved_facts, self.config.custom_update_memory_prompt ) - except Exception as e: - response = "" - logging.error(f"Error in new memory actions response: {e}") - response = "" - try: - response = remove_code_blocks(response) - new_memories_with_actions = json.loads(response) - except Exception as e: - new_memories_with_actions = {} - - if not new_memories_with_actions: - logger.info("No new facts retrieved from input (async). Skipping memory update LLM call.") - return [] - - logging.error(f"Invalid JSON response: {e}") - new_memories_with_actions = {} + try: + response = await asyncio.to_thread( + self.llm.generate_response, + messages=[{"role": "user", "content": function_calling_prompt}], + response_format={"type": "json_object"}, + ) + except Exception as e: + logging.error(f"Error in new memory actions response: {e}") + response = "" + try: + response = remove_code_blocks(response) + new_memories_with_actions = json.loads(response) + except Exception as e: + logging.error(f"Invalid JSON response: {e}") + new_memories_with_actions = {} returned_memories = [] try: diff --git a/pyproject.toml b/pyproject.toml index 1ade265f1a..dd882d05be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,32 @@ graph = [ "neo4j>=5.23.1", "rank-bm25>=0.2.2", ] +vector_stores = [ + "vecs>=0.4.0", + "chromadb>=0.4.24", + "weaviate-client>=4.4.0", + "pinecone<7.0.0", + "pinecone-text>=0.1.1", + "faiss-cpu>=1.7.4", + "upstash-vector>=0.1.0", + "azure-search-documents>=11.4.0b8", +] +llms = [ + "groq>=0.3.0", + "together>=0.2.10", + "litellm>=0.1.0", + "ollama>=0.1.0", + "vertexai>=0.1.0", + "google-generativeai>=0.3.0", +] +extras = [ + "boto3>=1.34.0", + "langchain-community>=0.0.0", + "sentence-transformers>=2.2.2", + "elasticsearch>=8.0.0", + "opensearch-py>=2.0.0", + "langchain-memgraph>=0.1.0", +] test = [ "pytest>=8.2.2", "pytest-mock>=3.14.0", @@ -53,6 +79,36 @@ only-include = ["mem0"] [tool.hatch.build.targets.wheel.shared-data] "README.md" = "README.md" +[tool.hatch.envs.dev_py_3_9] +python = "3.9" +features = [ + "test", + "graph", + "vector_stores", + "llms", + "extras", +] + +[tool.hatch.envs.dev_py_3_10] +python = "3.10" +features = [ + "test", + "graph", + "vector_stores", + "llms", + "extras", +] + +[tool.hatch.envs.dev_py_3_11] +python = "3.11" +features = [ + "test", + "graph", + "vector_stores", + "llms", + "extras", +] + [tool.hatch.envs.default.scripts] format = [ "ruff format", diff --git a/tests/memory/test_main.py b/tests/memory/test_main.py index 64a8f83729..90ceff17df 100644 --- a/tests/memory/test_main.py +++ b/tests/memory/test_main.py @@ -40,10 +40,12 @@ def mock_memory(self, mocker): return memory - def test_empty_llm_response_fact_extraction(self, mock_memory, caplog): + def test_empty_llm_response_fact_extraction(self, mocker, mock_memory, caplog): """Test empty response from LLM during fact extraction""" # Setup mock_memory.llm.generate_response.return_value = "" + mock_capture_event = mocker.MagicMock() + mocker.patch("mem0.memory.main.capture_event", mock_capture_event) # Execute with caplog.at_level(logging.ERROR): @@ -52,9 +54,10 @@ def test_empty_llm_response_fact_extraction(self, mock_memory, caplog): ) # Verify - assert mock_memory.llm.generate_response.call_count == 2 + assert mock_memory.llm.generate_response.call_count == 1 assert result == [] # Should return empty list when no memories processed assert "Error in new_retrieved_facts" in caplog.text + assert mock_capture_event.call_count == 1 def test_empty_llm_response_memory_actions(self, mock_memory, caplog): """Test empty response from LLM during memory actions""" @@ -94,25 +97,31 @@ async def test_async_empty_llm_response_fact_extraction(self, mock_async_memory, """Test empty response in AsyncMemory._add_to_vector_store""" mocker.patch("mem0.utils.factory.EmbedderFactory.create", return_value=MagicMock()) mock_async_memory.llm.generate_response.return_value = "" + mock_capture_event = mocker.MagicMock() + mocker.patch("mem0.memory.main.capture_event", mock_capture_event) with caplog.at_level(logging.ERROR): result = await mock_async_memory._add_to_vector_store( - messages=[{"role": "user", "content": "test"}], metadata={}, filters={}, infer=True + messages=[{"role": "user", "content": "test"}], metadata={}, effective_filters={}, infer=True ) - + assert mock_async_memory.llm.generate_response.call_count == 1 assert result == [] assert "Error in new_retrieved_facts" in caplog.text + assert mock_capture_event.call_count == 1 @pytest.mark.asyncio async def test_async_empty_llm_response_memory_actions(self, mock_async_memory, caplog, mocker): """Test empty response in AsyncMemory._add_to_vector_store""" mocker.patch("mem0.utils.factory.EmbedderFactory.create", return_value=MagicMock()) mock_async_memory.llm.generate_response.side_effect = ['{"facts": ["test fact"]}', ""] + mock_capture_event = mocker.MagicMock() + mocker.patch("mem0.memory.main.capture_event", mock_capture_event) with caplog.at_level(logging.ERROR): result = await mock_async_memory._add_to_vector_store( - messages=[{"role": "user", "content": "test"}], metadata={}, filters={}, infer=True + messages=[{"role": "user", "content": "test"}], metadata={}, effective_filters={}, infer=True ) assert result == [] assert "Invalid JSON response" in caplog.text + assert mock_capture_event.call_count == 1 diff --git a/tests/test_main.py b/tests/test_main.py index 41afa05b89..c5b45b34d4 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -19,13 +19,14 @@ def mock_openai(): def memory_instance(): with ( patch("mem0.utils.factory.EmbedderFactory") as mock_embedder, - patch("mem0.utils.factory.VectorStoreFactory") as mock_vector_store, + patch("mem0.memory.main.VectorStoreFactory") as mock_vector_store, patch("mem0.utils.factory.LlmFactory") as mock_llm, patch("mem0.memory.telemetry.capture_event"), patch("mem0.memory.graph_memory.MemoryGraph"), ): mock_embedder.create.return_value = Mock() mock_vector_store.create.return_value = Mock() + mock_vector_store.create.return_value.search.return_value = [] mock_llm.create.return_value = Mock() config = MemoryConfig(version="v1.1") @@ -37,13 +38,14 @@ def memory_instance(): def memory_custom_instance(): with ( patch("mem0.utils.factory.EmbedderFactory") as mock_embedder, - patch("mem0.utils.factory.VectorStoreFactory") as mock_vector_store, + patch("mem0.memory.main.VectorStoreFactory") as mock_vector_store, patch("mem0.utils.factory.LlmFactory") as mock_llm, patch("mem0.memory.telemetry.capture_event"), patch("mem0.memory.graph_memory.MemoryGraph"), ): mock_embedder.create.return_value = Mock() mock_vector_store.create.return_value = Mock() + mock_vector_store.create.return_value.search.return_value = [] mock_llm.create.return_value = Mock() config = MemoryConfig( @@ -250,7 +252,11 @@ def test_get_all(memory_instance, version, enable_graph, expected_result): def test_custom_prompts(memory_custom_instance): messages = [{"role": "user", "content": "Test message"}] + from mem0.embeddings.mock import MockEmbeddings memory_custom_instance.llm.generate_response = Mock() + memory_custom_instance.llm.generate_response.return_value = '{"facts": ["fact1", "fact2"]}' + memory_custom_instance.embedding_model = MockEmbeddings() + with patch("mem0.memory.main.parse_messages", return_value="Test message") as mock_parse_messages: with patch( @@ -273,7 +279,7 @@ def test_custom_prompts(memory_custom_instance): ## custom update memory prompt ## mock_get_update_memory_messages.assert_called_once_with( - [], [], memory_custom_instance.config.custom_update_memory_prompt + [], ["fact1", "fact2"], memory_custom_instance.config.custom_update_memory_prompt ) memory_custom_instance.llm.generate_response.assert_any_call( From a0cd4065d9240329cb9b20acac851d3e3d5c69d5 Mon Sep 17 00:00:00 2001 From: Fabian Valle Date: Sat, 14 Jun 2025 08:27:06 -0400 Subject: [PATCH 5/7] +MongoDB Vector Support (#2367) Co-authored-by: Divya Gupta --- docs/components/vectordbs/dbs/mongodb.mdx | 49 ++++ docs/components/vectordbs/overview.mdx | 1 + docs/docs.json | 1 + mem0/configs/vector_stores/mongodb.py | 42 +++ mem0/utils/factory.py | 1 + mem0/vector_stores/configs.py | 1 + mem0/vector_stores/mongodb.py | 299 ++++++++++++++++++++++ poetry.lock | 2 +- tests/vector_stores/test_mongodb.py | 176 +++++++++++++ 9 files changed, 571 insertions(+), 1 deletion(-) create mode 100644 docs/components/vectordbs/dbs/mongodb.mdx create mode 100644 mem0/configs/vector_stores/mongodb.py create mode 100644 mem0/vector_stores/mongodb.py create mode 100644 tests/vector_stores/test_mongodb.py diff --git a/docs/components/vectordbs/dbs/mongodb.mdx b/docs/components/vectordbs/dbs/mongodb.mdx new file mode 100644 index 0000000000..216c043632 --- /dev/null +++ b/docs/components/vectordbs/dbs/mongodb.mdx @@ -0,0 +1,49 @@ +# MongoDB + +[MongoDB](https://www.mongodb.com/) is a versatile document database that supports vector search capabilities, allowing for efficient high-dimensional similarity searches over large datasets with robust scalability and performance. + +## Usage + +```python +import os +from mem0 import Memory + +os.environ["OPENAI_API_KEY"] = "sk-xx" + +config = { + "vector_store": { + "provider": "mongodb", + "config": { + "db_name": "mem0-db", + "collection_name": "mem0-collection", + "user": "my-user", + "password": "my-password", + } + } +} + +m = Memory.from_config(config) +messages = [ + {"role": "user", "content": "I'm planning to watch a movie tonight. Any recommendations?"}, + {"role": "assistant", "content": "How about a thriller movies? They can be quite engaging."}, + {"role": "user", "content": "Iโ€™m not a big fan of thriller movies but I love sci-fi movies."}, + {"role": "assistant", "content": "Got it! I'll avoid thriller recommendations and suggest sci-fi movies in the future."} +] +m.add(messages, user_id="alice", metadata={"category": "movies"}) +``` + +## Config + +Here are the parameters available for configuring MongoDB: + +| Parameter | Description | Default Value | +| --- | --- | --- | +| db_name | Name of the MongoDB database | `"mem0_db"` | +| collection_name | Name of the MongoDB collection | `"mem0_collection"` | +| embedding_model_dims | Dimensions of the embedding vectors | `1536` | +| user | MongoDB user for authentication | `None` | +| password | Password for the MongoDB user | `None` | +| host | MongoDB host | `"localhost"` | +| port | MongoDB port | `27017` | + +> **Note**: `user` and `password` must either be provided together or omitted together. diff --git a/docs/components/vectordbs/overview.mdx b/docs/components/vectordbs/overview.mdx index f0d87be16e..3309b7a4ee 100644 --- a/docs/components/vectordbs/overview.mdx +++ b/docs/components/vectordbs/overview.mdx @@ -23,6 +23,7 @@ See the list of supported vector databases below. + diff --git a/docs/docs.json b/docs/docs.json index 65ae9590ee..60e8d2ab87 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -137,6 +137,7 @@ "components/vectordbs/dbs/pgvector", "components/vectordbs/dbs/milvus", "components/vectordbs/dbs/pinecone", + "components/vectordbs/dbs/mongodb", "components/vectordbs/dbs/azure", "components/vectordbs/dbs/redis", "components/vectordbs/dbs/elasticsearch", diff --git a/mem0/configs/vector_stores/mongodb.py b/mem0/configs/vector_stores/mongodb.py new file mode 100644 index 0000000000..3b6ce88142 --- /dev/null +++ b/mem0/configs/vector_stores/mongodb.py @@ -0,0 +1,42 @@ +from typing import Any, Dict, Optional, Callable, List + +from pydantic import BaseModel, Field, root_validator + + +class MongoVectorConfig(BaseModel): + """Configuration for MongoDB vector database.""" + + db_name: str = Field("mem0_db", description="Name of the MongoDB database") + collection_name: str = Field("mem0", description="Name of the MongoDB collection") + embedding_model_dims: Optional[int] = Field(1536, description="Dimensions of the embedding vectors") + user: Optional[str] = Field(None, description="MongoDB user for authentication") + password: Optional[str] = Field(None, description="Password for the MongoDB user") + host: Optional[str] = Field("localhost", description="MongoDB host. Default is 'localhost'") + port: Optional[int] = Field(27017, description="MongoDB port. Default is 27017") + + @root_validator(pre=True) + def check_auth_and_connection(cls, values): + user = values.get("user") + password = values.get("password") + if (user is None) != (password is None): + raise ValueError("Both 'user' and 'password' must be provided together or omitted together.") + + host = values.get("host") + port = values.get("port") + if host is None: + raise ValueError("The 'host' must be provided.") + if port is None: + raise ValueError("The 'port' must be provided.") + return values + + @root_validator(pre=True) + def validate_extra_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]: + allowed_fields = set(cls.__fields__) + input_fields = set(values.keys()) + extra_fields = input_fields - allowed_fields + if extra_fields: + raise ValueError( + f"Extra fields not allowed: {', '.join(extra_fields)}. " + f"Please provide only the following fields: {', '.join(allowed_fields)}." + ) + return values diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py index d137e273c2..4988b30038 100644 --- a/mem0/utils/factory.py +++ b/mem0/utils/factory.py @@ -79,6 +79,7 @@ class VectorStoreFactory: "upstash_vector": "mem0.vector_stores.upstash_vector.UpstashVector", "azure_ai_search": "mem0.vector_stores.azure_ai_search.AzureAISearch", "pinecone": "mem0.vector_stores.pinecone.PineconeDB", + "mongodb": "mem0.vector_stores.mongodb.MongoDB", "redis": "mem0.vector_stores.redis.RedisDB", "elasticsearch": "mem0.vector_stores.elasticsearch.ElasticsearchDB", "vertex_ai_vector_search": "mem0.vector_stores.vertex_ai_vector_search.GoogleMatchingEngine", diff --git a/mem0/vector_stores/configs.py b/mem0/vector_stores/configs.py index 43a2289f71..e360d238fa 100644 --- a/mem0/vector_stores/configs.py +++ b/mem0/vector_stores/configs.py @@ -15,6 +15,7 @@ class VectorStoreConfig(BaseModel): "chroma": "ChromaDbConfig", "pgvector": "PGVectorConfig", "pinecone": "PineconeConfig", + "mongodb": "MongoDBConfig", "milvus": "MilvusDBConfig", "upstash_vector": "UpstashVectorConfig", "azure_ai_search": "AzureAISearchConfig", diff --git a/mem0/vector_stores/mongodb.py b/mem0/vector_stores/mongodb.py new file mode 100644 index 0000000000..0a225fc54c --- /dev/null +++ b/mem0/vector_stores/mongodb.py @@ -0,0 +1,299 @@ +import logging +from typing import List, Optional, Dict, Any, Callable + +from pydantic import BaseModel + +try: + from pymongo import MongoClient + from pymongo.operations import SearchIndexModel + from pymongo.errors import PyMongoError +except ImportError: + raise ImportError("The 'pymongo' library is required. Please install it using 'pip install pymongo'.") + +from mem0.vector_stores.base import VectorStoreBase + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +class OutputData(BaseModel): + id: Optional[str] + score: Optional[float] + payload: Optional[dict] + + +class MongoVector(VectorStoreBase): + VECTOR_TYPE = "knnVector" + SIMILARITY_METRIC = "cosine" + + def __init__( + self, + db_name: str, + collection_name: str, + embedding_model_dims: int, + mongo_uri: str + ): + """ + Initialize the MongoDB vector store with vector search capabilities. + + Args: + db_name (str): Database name + collection_name (str): Collection name + embedding_model_dims (int): Dimension of the embedding vector + mongo_uri (str): MongoDB connection URI + """ + self.collection_name = collection_name + self.embedding_model_dims = embedding_model_dims + self.db_name = db_name + + self.client = MongoClient( + mongo_uri + ) + self.db = self.client[db_name] + self.collection = self.create_col() + + def create_col(self): + """Create new collection with vector search index.""" + try: + database = self.client[self.db_name] + collection_names = database.list_collection_names() + if self.collection_name not in collection_names: + logger.info(f"Collection '{self.collection_name}' does not exist. Creating it now.") + collection = database[self.collection_name] + # Insert and remove a placeholder document to create the collection + collection.insert_one({"_id": 0, "placeholder": True}) + collection.delete_one({"_id": 0}) + logger.info(f"Collection '{self.collection_name}' created successfully.") + else: + collection = database[self.collection_name] + + self.index_name = f"{self.collection_name}_vector_index" + found_indexes = list(collection.list_search_indexes(name=self.index_name)) + if found_indexes: + logger.info(f"Search index '{self.index_name}' already exists in collection '{self.collection_name}'.") + else: + search_index_model = SearchIndexModel( + name=self.index_name, + definition={ + "mappings": { + "dynamic": False, + "fields": { + "embedding": { + "type": self.VECTOR_TYPE, + "dimensions": self.embedding_model_dims, + "similarity": self.SIMILARITY_METRIC, + } + }, + } + }, + ) + collection.create_search_index(search_index_model) + logger.info( + f"Search index '{self.index_name}' created successfully for collection '{self.collection_name}'." + ) + return collection + except PyMongoError as e: + logger.error(f"Error creating collection and search index: {e}") + return None + + def insert( + self, vectors: List[List[float]], payloads: Optional[List[Dict]] = None, ids: Optional[List[str]] = None + ) -> None: + """ + Insert vectors into the collection. + + Args: + vectors (List[List[float]]): List of vectors to insert. + payloads (List[Dict], optional): List of payloads corresponding to vectors. + ids (List[str], optional): List of IDs corresponding to vectors. + """ + logger.info(f"Inserting {len(vectors)} vectors into collection '{self.collection_name}'.") + + data = [] + for vector, payload, _id in zip(vectors, payloads or [{}] * len(vectors), ids or [None] * len(vectors)): + document = {"_id": _id, "embedding": vector, "payload": payload} + data.append(document) + try: + self.collection.insert_many(data) + logger.info(f"Inserted {len(data)} documents into '{self.collection_name}'.") + except PyMongoError as e: + logger.error(f"Error inserting data: {e}") + + def search(self, query: str, query_vector: List[float], limit=5, filters: Optional[Dict] = None) -> List[OutputData]: + """ + Search for similar vectors using the vector search index. + + Args: + query (str): Query string + query_vector (List[float]): Query vector. + limit (int, optional): Number of results to return. Defaults to 5. + filters (Dict, optional): Filters to apply to the search. + + Returns: + List[OutputData]: Search results. + """ + + found_indexes = list(self.collection.list_search_indexes(name=self.index_name)) + if not found_indexes: + logger.error(f"Index '{self.index_name}' does not exist.") + return [] + + results = [] + try: + collection = self.client[self.db_name][self.collection_name] + pipeline = [ + { + "$vectorSearch": { + "index": self.index_name, + "limit": limit, + "numCandidates": limit, + "queryVector": query_vector, + "path": "embedding", + } + }, + {"$set": {"score": {"$meta": "vectorSearchScore"}}}, + {"$project": {"embedding": 0}}, + ] + results = list(collection.aggregate(pipeline)) + logger.info(f"Vector search completed. Found {len(results)} documents.") + except Exception as e: + logger.error(f"Error during vector search for query {query}: {e}") + return [] + + output = [OutputData(id=str(doc["_id"]), score=doc.get("score"), payload=doc.get("payload")) for doc in results] + return output + + def delete(self, vector_id: str) -> None: + """ + Delete a vector by ID. + + Args: + vector_id (str): ID of the vector to delete. + """ + try: + result = self.collection.delete_one({"_id": vector_id}) + if result.deleted_count > 0: + logger.info(f"Deleted document with ID '{vector_id}'.") + else: + logger.warning(f"No document found with ID '{vector_id}' to delete.") + except PyMongoError as e: + logger.error(f"Error deleting document: {e}") + + def update(self, vector_id: str, vector: Optional[List[float]] = None, payload: Optional[Dict] = None) -> None: + """ + Update a vector and its payload. + + Args: + vector_id (str): ID of the vector to update. + vector (List[float], optional): Updated vector. + payload (Dict, optional): Updated payload. + """ + update_fields = {} + if vector is not None: + update_fields["embedding"] = vector + if payload is not None: + update_fields["payload"] = payload + + if update_fields: + try: + result = self.collection.update_one({"_id": vector_id}, {"$set": update_fields}) + if result.matched_count > 0: + logger.info(f"Updated document with ID '{vector_id}'.") + else: + logger.warning(f"No document found with ID '{vector_id}' to update.") + except PyMongoError as e: + logger.error(f"Error updating document: {e}") + + def get(self, vector_id: str) -> Optional[OutputData]: + """ + Retrieve a vector by ID. + + Args: + vector_id (str): ID of the vector to retrieve. + + Returns: + Optional[OutputData]: Retrieved vector or None if not found. + """ + try: + doc = self.collection.find_one({"_id": vector_id}) + if doc: + logger.info(f"Retrieved document with ID '{vector_id}'.") + return OutputData(id=str(doc["_id"]), score=None, payload=doc.get("payload")) + else: + logger.warning(f"Document with ID '{vector_id}' not found.") + return None + except PyMongoError as e: + logger.error(f"Error retrieving document: {e}") + return None + + def list_cols(self) -> List[str]: + """ + List all collections in the database. + + Returns: + List[str]: List of collection names. + """ + try: + collections = self.db.list_collection_names() + logger.info(f"Listing collections in database '{self.db_name}': {collections}") + return collections + except PyMongoError as e: + logger.error(f"Error listing collections: {e}") + return [] + + def delete_col(self) -> None: + """Delete the collection.""" + try: + self.collection.drop() + logger.info(f"Deleted collection '{self.collection_name}'.") + except PyMongoError as e: + logger.error(f"Error deleting collection: {e}") + + def col_info(self) -> Dict[str, Any]: + """ + Get information about the collection. + + Returns: + Dict[str, Any]: Collection information. + """ + try: + stats = self.db.command("collstats", self.collection_name) + info = {"name": self.collection_name, "count": stats.get("count"), "size": stats.get("size")} + logger.info(f"Collection info: {info}") + return info + except PyMongoError as e: + logger.error(f"Error getting collection info: {e}") + return {} + + def list(self, filters: Optional[Dict] = None, limit: int = 100) -> List[OutputData]: + """ + List vectors in the collection. + + Args: + filters (Dict, optional): Filters to apply to the list. + limit (int, optional): Number of vectors to return. + + Returns: + List[OutputData]: List of vectors. + """ + try: + query = filters or {} + cursor = self.collection.find(query).limit(limit) + results = [OutputData(id=str(doc["_id"]), score=None, payload=doc.get("payload")) for doc in cursor] + logger.info(f"Retrieved {len(results)} documents from collection '{self.collection_name}'.") + return results + except PyMongoError as e: + logger.error(f"Error listing documents: {e}") + return [] + + def reset(self): + """Reset the index by deleting and recreating it.""" + logger.warning(f"Resetting index {self.collection_name}...") + self.delete_col() + self.collection = self.create_col(self.collection_name) + + def __del__(self) -> None: + """Close the database connection when the object is deleted.""" + if hasattr(self, "client"): + self.client.close() + logger.info("MongoClient connection closed.") diff --git a/poetry.lock b/poetry.lock index fd05427e42..8784eec3ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2200,4 +2200,4 @@ graph = ["langchain-neo4j", "neo4j", "rank-bm25"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<4.0" -content-hash = "07f2aee9c596c2d2470df085b92551b7b7e3c19cabe61ae5bee7505395601417" +content-hash = "07f2aee9c596c2d2470df085b92551b7b7e3c19cabe61ae5bee7505395601417" \ No newline at end of file diff --git a/tests/vector_stores/test_mongodb.py b/tests/vector_stores/test_mongodb.py new file mode 100644 index 0000000000..370d5322e4 --- /dev/null +++ b/tests/vector_stores/test_mongodb.py @@ -0,0 +1,176 @@ +import time +import pytest +from unittest.mock import MagicMock, patch +from mem0.vector_stores.mongodb import MongoVector +from pymongo.operations import SearchIndexModel + +@pytest.fixture +@patch("mem0.vector_stores.mongodb.MongoClient") +def mongo_vector_fixture(mock_mongo_client): + mock_client = mock_mongo_client.return_value + mock_db = mock_client["test_db"] + mock_collection = mock_db["test_collection"] + mock_collection.list_search_indexes.return_value = [] + mock_collection.aggregate.return_value = [] + mock_collection.find_one.return_value = None + mock_collection.find.return_value = [] + mock_db.list_collection_names.return_value = [] + + mongo_vector = MongoVector( + db_name="test_db", + collection_name="test_collection", + embedding_model_dims=1536, + user="username", + password="password", + ) + return mongo_vector, mock_collection, mock_db + +def test_initalize_create_col(mongo_vector_fixture): + mongo_vector, mock_collection, mock_db = mongo_vector_fixture + assert mongo_vector.collection_name == "test_collection" + assert mongo_vector.embedding_model_dims == 1536 + assert mongo_vector.db_name == "test_db" + + # Verify create_col being called + mock_db.list_collection_names.assert_called_once() + mock_collection.insert_one.assert_called_once_with({"_id": 0, "placeholder": True}) + mock_collection.delete_one.assert_called_once_with({"_id": 0}) + assert mongo_vector.index_name == "test_collection_vector_index" + mock_collection.list_search_indexes.assert_called_once_with(name="test_collection_vector_index") + mock_collection.create_search_index.assert_called_once() + args, _ = mock_collection.create_search_index.call_args + search_index_model = args[0].document + assert search_index_model == { + "name": "test_collection_vector_index", + "definition": { + "mappings": { + "dynamic": False, + "fields": { + "embedding": { + "type": "knnVector", + "d": 1536, + "similarity": "cosine", + } + } + } + } + } + assert mongo_vector.collection == mock_collection + +def test_insert(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + vectors = [[0.1] * 1536, [0.2] * 1536] + payloads = [{"name": "vector1"}, {"name": "vector2"}] + ids = ["id1", "id2"] + + mongo_vector.insert(vectors, payloads, ids) + expected_records=[ + ({"_id": ids[0], "embedding": vectors[0], "payload": payloads[0]}), + ({"_id": ids[1], "embedding": vectors[1], "payload": payloads[1]}) + ] + mock_collection.insert_many.assert_called_once_with(expected_records) + +def test_search(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + query_vector = [0.1] * 1536 + mock_collection.aggregate.return_value = [ + {"_id": "id1", "score": 0.9, "payload": {"key": "value1"}}, + {"_id": "id2", "score": 0.8, "payload": {"key": "value2"}}, + ] + mock_collection.list_search_indexes.return_value = ["test_collection_vector_index"] + + results = mongo_vector.search("query_str", query_vector, limit=2) + mock_collection.list_search_indexes.assert_called_with(name="test_collection_vector_index") + mock_collection.aggregate.assert_called_once_with([ + { + "$vectorSearch": { + "index": "test_collection_vector_index", + "limit": 2, + "numCandidates": 2, + "queryVector": query_vector, + "path": "embedding", + }, + }, + {"$set": {"score": {"$meta": "vectorSearchScore"}}}, + {"$project": {"embedding": 0}}, + ]) + assert len(results) == 2 + assert results[0].id == "id1" + assert results[0].score == 0.9 + assert results[1].id == "id2" + assert results[1].score == 0.8 + +def test_delete(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + mock_delete_result = MagicMock() + mock_delete_result.deleted_count = 1 + mock_collection.delete_one.return_value = mock_delete_result + + mongo_vector.delete("id1") + mock_collection.delete_one.assert_called_with({"_id": "id1"}) + +def test_update(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + mock_update_result = MagicMock() + mock_update_result.matched_count = 1 + mock_collection.update_one.return_value = mock_update_result + idValue = "id1" + vectorValue = [0.2] * 1536 + payloadValue = {"key": "updated"} + + mongo_vector.update(idValue, vector=vectorValue, payload=payloadValue) + mock_collection.update_one.assert_called_once_with( + {"_id": idValue}, + {"$set": {"embedding": vectorValue, "payload": payloadValue}}, + ) + +def test_get(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + mock_collection.find_one.return_value = {"_id": "id1", "payload": {"key": "value1"}} + + result = mongo_vector.get("id1") + assert result is not None + assert result.id == "id1" + assert result.payload == {"key": "value1"} + +def test_list_cols(mongo_vector_fixture): + mongo_vector, _, mock_db = mongo_vector_fixture + mock_db.list_collection_names.return_value = ["col1", "col2"] + + collections = mongo_vector.list_cols() + assert collections == ["col1", "col2"] + +def test_delete_col(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + + mongo_vector.delete_col() + mock_collection.drop.assert_called_once() + +def test_col_info(mongo_vector_fixture): + mongo_vector, _, mock_db = mongo_vector_fixture + mock_db.command.return_value = {"count": 10, "size": 1024} + + info = mongo_vector.col_info() + mock_db.command.assert_called_once_with("collstats", "test_collection") + assert info["name"] == "test_collection" + assert info["count"] == 10 + assert info["size"] == 1024 + +def test_list(mongo_vector_fixture): + mongo_vector, mock_collection, _ = mongo_vector_fixture + mock_cursor = MagicMock() + mock_cursor.limit.return_value = [ + {"_id": "id1", "payload": {"key": "value1"}}, + {"_id": "id2", "payload": {"key": "value2"}}, + ] + mock_collection.find.return_value = mock_cursor + + query_filters = {"_id": {"$in": ["id1", "id2"]}} + results = mongo_vector.list(filters=query_filters, limit=2) + mock_collection.find.assert_called_once_with(query_filters) + mock_cursor.limit.assert_called_once_with(2) + assert len(results) == 2 + assert results[0].id == "id1" + assert results[0].payload == {"key": "value1"} + assert results[1].id == "id2" + assert results[1].payload == {"key": "value2"} From 3e5f68ee90e16556aab117b7167e9d25fc3cbed8 Mon Sep 17 00:00:00 2001 From: Dev Khant Date: Sat, 14 Jun 2025 21:55:22 +0530 Subject: [PATCH 6/7] Add logger in Opensearch (#2957) --- mem0/vector_stores/opensearch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mem0/vector_stores/opensearch.py b/mem0/vector_stores/opensearch.py index 4f258f3293..b3ddff82a4 100644 --- a/mem0/vector_stores/opensearch.py +++ b/mem0/vector_stores/opensearch.py @@ -83,19 +83,19 @@ def create_col(self, name: str, vector_size: int) -> None: }, } + logger.warning(f"Creating index {name}, it might take 1-2 minutes...") if not self.client.indices.exists(index=name): self.client.indices.create(index=name, body=index_settings) - logger.info(f"Created index {name}") # Wait for index to be ready - max_retries = 60 # 60 seconds timeout + max_retries = 180 # 3 minutes timeout retry_count = 0 while retry_count < max_retries: try: # Check if index is ready by attempting a simple search self.client.search(index=name, body={"query": {"match_all": {}}}) - logger.info(f"Index {name} is ready") time.sleep(1) + logger.info(f"Index {name} is ready") return except Exception: retry_count += 1 From 18c870ec79d40c286f8dd15e56a2c5e5357c75c4 Mon Sep 17 00:00:00 2001 From: Dev Khant Date: Sat, 14 Jun 2025 21:58:12 +0530 Subject: [PATCH 7/7] version bump -> 0.1.108 (#2958) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dd882d05be..c3b867329e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "mem0ai" -version = "0.1.107rc2" +version = "0.1.108" description = "Long-term memory for AI Agents" authors = [ { name = "Mem0", email = "founders@mem0.ai" }