Medium severity6.3NVD Advisory· Published May 1, 2026· Updated May 5, 2026
CVE-2026-7597
CVE-2026-7597
Description
A vulnerability was found in mem0ai mem0 up to 1.0.11. This affects the function pickle.load/pickle.dump of the file mem0/vector_stores/faiss.py. Performing a manipulation results in deserialization. It is possible to initiate the attack remotely. The exploit has been made public and could be used. The patch is named 62dca096f9236010ca15fea9ba369ba740b86b7a. Applying a patch is the recommended action to fix this issue.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
mem0aiPyPI | < 2.0.0b2 | 2.0.0b2 |
Patches
162dca096f923fix: prevent arbitrary code execution via pickle in FAISS vector store
2 files changed · +523 −20
mem0/vector_stores/faiss.py+155 −17 modified@@ -1,10 +1,11 @@ +import json import logging import os import pickle import uuid import warnings from pathlib import Path -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import numpy as np from pydantic import BaseModel @@ -13,7 +14,7 @@ # Suppress SWIG deprecation warnings from FAISS warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*SwigPy.*") warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*swigvarlink.*") - + logging.getLogger("faiss").setLevel(logging.WARNING) logging.getLogger("faiss.loader").setLevel(logging.WARNING) @@ -30,6 +31,93 @@ logger = logging.getLogger(__name__) +class SafeUnpickler(pickle.Unpickler): + """ + Restricted unpickler that only allows safe built-in types. + + This prevents arbitrary code execution via pickle deserialization by only + allowing a whitelist of safe types (dict, list, str, int, float, bool, tuple, None). + """ + + # Only allow builtins module + SAFE_MODULES = frozenset({"builtins", "__builtin__"}) + # Only allow safe basic types + SAFE_NAMES = frozenset({"dict", "list", "str", "int", "float", "bool", "tuple", "set", "frozenset", "NoneType"}) + + def find_class(self, module: str, name: str) -> Any: + """Override find_class to only allow safe types.""" + if module in self.SAFE_MODULES and name in self.SAFE_NAMES: + import builtins + + if hasattr(builtins, name): + return getattr(builtins, name) + # NoneType special case + if name == "NoneType": + return type(None) + raise pickle.UnpicklingError( + f"Unsafe pickle: attempted to load '{module}.{name}'. " + f"Only basic Python types are allowed for security reasons." + ) + + +def _safe_pickle_load(file_path: str) -> Any: + """ + Safely load a pickle file using restricted unpickler. + + Args: + file_path: Path to the pickle file. + + Returns: + The deserialized object (only basic Python types allowed). + + Raises: + pickle.UnpicklingError: If the pickle contains unsafe types. + """ + with open(file_path, "rb") as f: + return SafeUnpickler(f).load() + + +def _validate_docstore_structure(data: Any) -> tuple: + """ + Validate that loaded data has the expected structure. + + Args: + data: The loaded data to validate. + + Returns: + Tuple of (docstore, index_to_id) if valid. + + Raises: + ValueError: If the data structure is invalid. + """ + if not isinstance(data, tuple) or len(data) != 2: + raise ValueError("Invalid docstore format: expected tuple of (docstore, index_to_id)") + + docstore, index_to_id = data + + if not isinstance(docstore, dict): + raise ValueError("Invalid docstore format: docstore must be a dict") + + if not isinstance(index_to_id, dict): + raise ValueError("Invalid docstore format: index_to_id must be a dict") + + # Validate docstore entries + for key, value in docstore.items(): + if not isinstance(key, str): + raise ValueError(f"Invalid docstore key type: {type(key)}, expected str") + if not isinstance(value, dict): + raise ValueError(f"Invalid docstore value type: {type(value)}, expected dict") + + # Validate index_to_id entries + for key, value in index_to_id.items(): + if not isinstance(key, int): + raise ValueError(f"Invalid index_to_id key type: {type(key)}, expected int") + if not isinstance(value, str): + raise ValueError(f"Invalid index_to_id value type: {type(value)}, expected str") + + return docstore, index_to_id + + class OutputData(BaseModel): id: Optional[str] # memory id score: Optional[float] # distance @@ -73,44 +161,90 @@ def __init__( # Try to load existing index if available index_path = f"{self.path}/{collection_name}.faiss" - docstore_path = f"{self.path}/{collection_name}.pkl" - if os.path.exists(index_path) and os.path.exists(docstore_path): - self._load(index_path, docstore_path) + json_docstore_path = f"{self.path}/{collection_name}.json" + pkl_docstore_path = f"{self.path}/{collection_name}.pkl" + + # Check for index file and either JSON (preferred) or legacy pickle docstore + if os.path.exists(index_path) and (os.path.exists(json_docstore_path) or os.path.exists(pkl_docstore_path)): + # _load will prefer JSON over pickle and auto-migrate + self._load(index_path, pkl_docstore_path) else: self.create_col(collection_name) def _load(self, index_path: str, docstore_path: str): """ Load FAISS index and docstore from disk. + Supports both JSON (preferred) and legacy pickle formats. Pickle files are loaded + using a restricted unpickler that only allows basic Python types to prevent + arbitrary code execution (CVE mitigation). + Args: index_path (str): Path to FAISS index file. - docstore_path (str): Path to docstore pickle file. + docstore_path (str): Path to docstore file (.json or legacy .pkl). """ try: self.index = faiss.read_index(index_path) - with open(docstore_path, "rb") as f: - self.docstore, self.index_to_id = pickle.load(f) - logger.info(f"Loaded FAISS index from {index_path} with {self.index.ntotal} vectors") + + # Determine docstore format - prefer JSON over pickle + json_docstore_path = docstore_path.replace(".pkl", ".json") + + if os.path.exists(json_docstore_path): + # Load from JSON (safe, preferred format) + with open(json_docstore_path, "r", encoding="utf-8") as f: + data = json.load(f) + self.docstore = data.get("docstore", {}) + # JSON keys are always strings, convert back to int + self.index_to_id = {int(k): v for k, v in data.get("index_to_id", {}).items()} + logger.info(f"Loaded FAISS index from {index_path} with {self.index.ntotal} vectors (JSON format)") + + elif os.path.exists(docstore_path): + # Load from legacy pickle using safe unpickler + # This prevents arbitrary code execution from malicious pickle files + logger.warning( + f"Loading legacy pickle docstore from {docstore_path}. " + f"Consider migrating to JSON format for better security." + ) + data = _safe_pickle_load(docstore_path) + self.docstore, self.index_to_id = _validate_docstore_structure(data) + logger.info(f"Loaded FAISS index from {index_path} with {self.index.ntotal} vectors (pickle format)") + + # Auto-migrate to JSON format + self._save() + logger.info(f"Migrated docstore to JSON format: {json_docstore_path}") + + else: + raise FileNotFoundError(f"No docstore found at {docstore_path} or {json_docstore_path}") + + except pickle.UnpicklingError as e: + logger.error(f"Security error loading FAISS docstore: {e}") + raise ValueError(f"Failed to load FAISS docstore: potentially malicious pickle file. {e}") from e except Exception as e: logger.warning(f"Failed to load FAISS index: {e}") - self.docstore = {} self.index_to_id = {} def _save(self): - """Save FAISS index and docstore to disk.""" + """Save FAISS index and docstore to disk using JSON format (secure).""" if not self.path or not self.index: return try: os.makedirs(self.path, exist_ok=True) index_path = f"{self.path}/{self.collection_name}.faiss" - docstore_path = f"{self.path}/{self.collection_name}.pkl" + json_docstore_path = f"{self.path}/{self.collection_name}.json" faiss.write_index(self.index, index_path) - with open(docstore_path, "wb") as f: - pickle.dump((self.docstore, self.index_to_id), f) + + # Save docstore as JSON (safe format, no code execution risk) + # JSON keys must be strings, so convert int keys to str + data = { + "docstore": self.docstore, + "index_to_id": {str(k): v for k, v in self.index_to_id.items()}, + } + with open(json_docstore_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + except Exception as e: logger.warning(f"Failed to save FAISS index: {e}") @@ -417,12 +551,16 @@ def delete_col(self): if self.path: try: index_path = f"{self.path}/{self.collection_name}.faiss" - docstore_path = f"{self.path}/{self.collection_name}.pkl" + json_docstore_path = f"{self.path}/{self.collection_name}.json" + pkl_docstore_path = f"{self.path}/{self.collection_name}.pkl" if os.path.exists(index_path): os.remove(index_path) - if os.path.exists(docstore_path): - os.remove(docstore_path) + if os.path.exists(json_docstore_path): + os.remove(json_docstore_path) + # Also clean up legacy pickle files if they exist + if os.path.exists(pkl_docstore_path): + os.remove(pkl_docstore_path) logger.info(f"Deleted collection {self.collection_name}") except Exception as e:
tests/vector_stores/test_faiss.py+368 −3 modified@@ -1,12 +1,20 @@ +import json import os +import pickle import tempfile from unittest.mock import Mock, patch import faiss import numpy as np import pytest -from mem0.vector_stores.faiss import FAISS, OutputData +from mem0.vector_stores.faiss import ( + FAISS, + OutputData, + SafeUnpickler, + _safe_pickle_load, + _validate_docstore_structure, +) @pytest.fixture @@ -273,8 +281,8 @@ def test_delete_col(faiss_instance): # Call delete_col faiss_instance.delete_col() - # Verify os.remove was called twice (for index and docstore files) - assert mock_remove.call_count == 2 + # Verify os.remove was called for index, json docstore, and legacy pkl files + assert mock_remove.call_count == 3 # Verify the internal state was reset assert faiss_instance.index is None @@ -299,3 +307,360 @@ def test_normalize_L2(faiss_instance, mock_faiss_index): # Verify faiss.normalize_L2 was called mock_normalize.assert_called_once() + + +# ============================================================================= +# Security Tests for Pickle Deserialization Vulnerability Fix +# ============================================================================= + + +class TestSafeUnpickler: + """Tests for the SafeUnpickler class that prevents arbitrary code execution.""" + + def test_safe_unpickler_allows_basic_types(self): + """SafeUnpickler should allow basic Python types.""" + # Create a legitimate pickle with basic types + data = ( + {"key1": "value1", "key2": {"nested": "dict"}}, + {0: "id1", 1: "id2"}, + ) + pickled = pickle.dumps(data) + + # Should load successfully + import io + + result = SafeUnpickler(io.BytesIO(pickled)).load() + assert result == data + + def test_safe_unpickler_blocks_os_system(self): + """SafeUnpickler should block os.system execution attempts.""" + # Generate the malicious payload dynamically to ensure correct format + import io + + class Evil: + def __reduce__(self): + return (os.system, ("echo pwned",)) + + malicious_payload = pickle.dumps(Evil()) + + with pytest.raises(pickle.UnpicklingError) as exc_info: + SafeUnpickler(io.BytesIO(malicious_payload)).load() + + assert "Unsafe pickle" in str(exc_info.value) + assert "posix.system" in str(exc_info.value) + + def test_safe_unpickler_blocks_subprocess(self): + """SafeUnpickler should block subprocess execution attempts.""" + import subprocess + + # Create a malicious pickle that tries to use subprocess + class MaliciousSubprocess: + def __reduce__(self): + return (subprocess.call, (["echo", "pwned"],)) + + malicious_payload = pickle.dumps(MaliciousSubprocess()) + + import io + + with pytest.raises(pickle.UnpicklingError) as exc_info: + SafeUnpickler(io.BytesIO(malicious_payload)).load() + + assert "Unsafe pickle" in str(exc_info.value) + + def test_safe_unpickler_blocks_eval(self): + """SafeUnpickler should block eval/exec attempts.""" + + # Create a malicious pickle that tries to use eval + class MaliciousEval: + def __reduce__(self): + return (eval, ("__import__('os').system('touch pwned')",)) + + malicious_payload = pickle.dumps(MaliciousEval()) + + import io + + with pytest.raises(pickle.UnpicklingError) as exc_info: + SafeUnpickler(io.BytesIO(malicious_payload)).load() + + assert "Unsafe pickle" in str(exc_info.value) + + def test_safe_unpickler_blocks_arbitrary_modules(self): + """SafeUnpickler should block imports from arbitrary modules.""" + + # Create a pickle that tries to load a class from a non-builtins module + class ArbitraryClass: + def __reduce__(self): + return (type, ("Evil", (), {})) + + malicious_payload = pickle.dumps(ArbitraryClass()) + + import io + + # This should either work (type is a builtin) or fail safely + # The key is it shouldn't execute arbitrary code + try: + result = SafeUnpickler(io.BytesIO(malicious_payload)).load() + # If it loads, verify it's just a benign type object + assert isinstance(result, type) + except pickle.UnpicklingError: + # This is also acceptable - blocking unknown patterns + pass + + +class TestSafePickleLoad: + """Tests for the _safe_pickle_load function.""" + + def test_safe_pickle_load_with_valid_file(self): + """_safe_pickle_load should load valid pickle files.""" + with tempfile.NamedTemporaryFile(mode="wb", suffix=".pkl", delete=False) as f: + data = ({"id1": {"data": "test"}}, {0: "id1"}) + pickle.dump(data, f) + temp_path = f.name + + try: + result = _safe_pickle_load(temp_path) + assert result == data + finally: + os.unlink(temp_path) + + def test_safe_pickle_load_blocks_malicious_file(self): + """_safe_pickle_load should block malicious pickle files.""" + + # Generate the malicious payload dynamically + class Evil: + def __reduce__(self): + return (os.system, ("echo pwned",)) + + malicious_payload = pickle.dumps(Evil()) + + with tempfile.NamedTemporaryFile(mode="wb", suffix=".pkl", delete=False) as f: + f.write(malicious_payload) + temp_path = f.name + + try: + with pytest.raises(pickle.UnpicklingError) as exc_info: + _safe_pickle_load(temp_path) + assert "Unsafe pickle" in str(exc_info.value) + finally: + os.unlink(temp_path) + + +class TestValidateDocstoreStructure: + """Tests for the _validate_docstore_structure function.""" + + def test_valid_structure(self): + """Should accept valid docstore structure.""" + data = ({"id1": {"data": "test"}}, {0: "id1"}) + docstore, index_to_id = _validate_docstore_structure(data) + assert docstore == {"id1": {"data": "test"}} + assert index_to_id == {0: "id1"} + + def test_invalid_tuple_length(self): + """Should reject tuples with wrong length.""" + with pytest.raises(ValueError, match="expected tuple"): + _validate_docstore_structure(({}, {}, {})) + + def test_invalid_docstore_type(self): + """Should reject non-dict docstore.""" + with pytest.raises(ValueError, match="docstore must be a dict"): + _validate_docstore_structure(("not a dict", {})) + + def test_invalid_index_to_id_type(self): + """Should reject non-dict index_to_id.""" + with pytest.raises(ValueError, match="index_to_id must be a dict"): + _validate_docstore_structure(({}, "not a dict")) + + def test_invalid_docstore_key_type(self): + """Should reject non-string docstore keys.""" + with pytest.raises(ValueError, match="Invalid docstore key type"): + _validate_docstore_structure(({123: {"data": "test"}}, {0: "id1"})) + + def test_invalid_index_to_id_key_type(self): + """Should reject non-int index_to_id keys.""" + with pytest.raises(ValueError, match="Invalid index_to_id key type"): + _validate_docstore_structure(({"id1": {"data": "test"}}, {"0": "id1"})) + + +class TestFAISSSecurityIntegration: + """Integration tests for FAISS security fixes.""" + + def test_faiss_saves_as_json(self): + """FAISS should save docstore as JSON, not pickle.""" + with tempfile.TemporaryDirectory() as temp_dir: + mock_index = Mock() + mock_index.d = 128 + mock_index.ntotal = 0 + + with patch("mem0.vector_stores.faiss.faiss.IndexFlatL2", return_value=mock_index): + with patch("mem0.vector_stores.faiss.faiss.write_index"): + faiss_store = FAISS( + collection_name="test_security", + path=os.path.join(temp_dir, "test_faiss"), + distance_strategy="euclidean", + ) + faiss_store.index = mock_index + + # Insert some data + faiss_store.docstore = {"id1": {"data": "test"}} + faiss_store.index_to_id = {0: "id1"} + faiss_store._save() + + # Verify JSON file was created + json_path = os.path.join(temp_dir, "test_faiss", "test_security.json") + pkl_path = os.path.join(temp_dir, "test_faiss", "test_security.pkl") + + assert os.path.exists(json_path), "JSON docstore file should be created" + assert not os.path.exists(pkl_path), "Pickle file should NOT be created" + + # Verify JSON content + with open(json_path, "r") as f: + data = json.load(f) + assert data["docstore"] == {"id1": {"data": "test"}} + assert data["index_to_id"] == {"0": "id1"} + + def test_faiss_loads_json_preferentially(self): + """FAISS should prefer JSON over pickle when both exist.""" + with tempfile.TemporaryDirectory() as temp_dir: + faiss_path = os.path.join(temp_dir, "test_faiss") + os.makedirs(faiss_path) + + # Create both JSON and pickle files with different data + json_data = {"docstore": {"id1": {"source": "json"}}, "index_to_id": {"0": "id1"}} + pkl_data = ({"id1": {"source": "pickle"}}, {0: "id1"}) + + with open(os.path.join(faiss_path, "test_pref.json"), "w") as f: + json.dump(json_data, f) + + with open(os.path.join(faiss_path, "test_pref.pkl"), "wb") as f: + pickle.dump(pkl_data, f) + + mock_index = Mock() + mock_index.d = 128 + mock_index.ntotal = 1 + + with patch("mem0.vector_stores.faiss.faiss.read_index", return_value=mock_index): + with patch("mem0.vector_stores.faiss.faiss.write_index"): + faiss_store = FAISS.__new__(FAISS) + faiss_store.collection_name = "test_pref" + faiss_store.path = faiss_path + faiss_store.index = None + faiss_store.docstore = {} + faiss_store.index_to_id = {} + + faiss_store._load( + os.path.join(faiss_path, "test_pref.faiss"), + os.path.join(faiss_path, "test_pref.pkl"), + ) + + # Should have loaded from JSON, not pickle + assert faiss_store.docstore == {"id1": {"source": "json"}} + + def test_faiss_blocks_malicious_pickle_on_load(self): + """FAISS should block loading of malicious pickle files.""" + with tempfile.TemporaryDirectory() as temp_dir: + faiss_path = os.path.join(temp_dir, "test_faiss") + os.makedirs(faiss_path) + + # Create a malicious pickle file (RCE payload) + class Evil: + def __reduce__(self): + return (os.system, (f"touch {temp_dir}/pwned",)) + + malicious_payload = pickle.dumps(Evil()) + + with open(os.path.join(faiss_path, "malicious.pkl"), "wb") as f: + f.write(malicious_payload) + + mock_index = Mock() + mock_index.ntotal = 1 + + with patch("mem0.vector_stores.faiss.faiss.read_index", return_value=mock_index): + faiss_store = FAISS.__new__(FAISS) + faiss_store.collection_name = "malicious" + faiss_store.path = faiss_path + faiss_store.index = None + faiss_store.docstore = {} + faiss_store.index_to_id = {} + + # Should raise an error, not execute the malicious payload + with pytest.raises(ValueError) as exc_info: + faiss_store._load( + os.path.join(faiss_path, "malicious.faiss"), + os.path.join(faiss_path, "malicious.pkl"), + ) + + assert "malicious pickle" in str(exc_info.value).lower() or "unsafe" in str(exc_info.value).lower() + + # Verify the malicious command was NOT executed + pwned_file = os.path.join(temp_dir, "pwned") + assert not os.path.exists(pwned_file), "Malicious payload should NOT have been executed!" + + def test_faiss_migrates_legacy_pickle_to_json(self): + """FAISS should auto-migrate valid pickle files to JSON format.""" + with tempfile.TemporaryDirectory() as temp_dir: + faiss_path = os.path.join(temp_dir, "test_faiss") + os.makedirs(faiss_path) + + # Create a legitimate legacy pickle file + pkl_data = ({"id1": {"data": "legacy"}}, {0: "id1"}) + with open(os.path.join(faiss_path, "legacy.pkl"), "wb") as f: + pickle.dump(pkl_data, f) + + mock_index = Mock() + mock_index.d = 128 + mock_index.ntotal = 1 + + with patch("mem0.vector_stores.faiss.faiss.read_index", return_value=mock_index): + with patch("mem0.vector_stores.faiss.faiss.write_index"): + faiss_store = FAISS.__new__(FAISS) + faiss_store.collection_name = "legacy" + faiss_store.path = faiss_path + faiss_store.index = None + faiss_store.docstore = {} + faiss_store.index_to_id = {} + + faiss_store._load( + os.path.join(faiss_path, "legacy.faiss"), + os.path.join(faiss_path, "legacy.pkl"), + ) + + # Data should be loaded correctly + assert faiss_store.docstore == {"id1": {"data": "legacy"}} + assert faiss_store.index_to_id == {0: "id1"} + + # JSON file should now exist (auto-migrated) + json_path = os.path.join(faiss_path, "legacy.json") + assert os.path.exists(json_path), "JSON file should be created during migration" + + def test_delete_col_removes_json_and_pkl(self): + """delete_col should remove both JSON and legacy pickle files.""" + with tempfile.TemporaryDirectory() as temp_dir: + faiss_path = os.path.join(temp_dir, "test_faiss") + os.makedirs(faiss_path) + + # Create both file types + json_path = os.path.join(faiss_path, "test_del.json") + pkl_path = os.path.join(faiss_path, "test_del.pkl") + faiss_index_path = os.path.join(faiss_path, "test_del.faiss") + + with open(json_path, "w") as f: + json.dump({"docstore": {}, "index_to_id": {}}, f) + with open(pkl_path, "wb") as f: + pickle.dump(({}, {}), f) + with open(faiss_index_path, "w") as f: + f.write("dummy") + + with patch("faiss.IndexFlatL2"): + faiss_store = FAISS.__new__(FAISS) + faiss_store.collection_name = "test_del" + faiss_store.path = faiss_path + faiss_store.index = Mock() + faiss_store.docstore = {} + faiss_store.index_to_id = {} + + faiss_store.delete_col() + + # Both files should be deleted + assert not os.path.exists(json_path), "JSON file should be deleted" + assert not os.path.exists(pkl_path), "PKL file should be deleted" + assert not os.path.exists(faiss_index_path), "FAISS index should be deleted"
Vulnerability mechanics
AI mechanics synthesis has not run for this CVE yet.
References
8- github.com/advisories/GHSA-xqxw-r767-67m7ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-7597ghsaADVISORY
- github.com/mem0ai/mem0/commit/62dca096f9236010ca15fea9ba369ba740b86b7anvdWEB
- github.com/mem0ai/mem0/issues/3778nvdWEB
- github.com/mem0ai/mem0/pull/4833nvdWEB
- vuldb.com/submit/805562nvdWEB
- vuldb.com/vuln/360550nvdWEB
- vuldb.com/vuln/360550/ctinvdWEB
News mentions
0No linked articles in our index yet.