High severity8.6NVD Advisory· Published Aug 25, 2025· Updated Apr 15, 2026
CVE-2025-5302
CVE-2025-5302
Description
A denial of service vulnerability exists in the JSONReader component of the run-llama/llama_index repository, specifically in version v0.12.37. The vulnerability is caused by uncontrolled recursion when parsing deeply nested JSON files, which can lead to Python hitting its maximum recursion depth limit. This results in high resource consumption and potential crashes of the Python process. The issue is resolved in version 0.12.38.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
llama-index-corePyPI | < 0.12.38 | 0.12.38 |
Affected products
1Patches
1c032843a02cefix: prevent DoS attacks in JSONReader (#18877)
2 files changed · +85 −44
llama-index-core/llama_index/core/readers/json.py+53 −44 modified@@ -2,6 +2,7 @@ import json import re +import warnings from typing import Any, Dict, Generator, List, Optional from llama_index.core.readers.base import BaseReader @@ -97,49 +98,57 @@ def load_data( self, input_file: str, extra_info: Optional[Dict] = {} ) -> List[Document]: """Load data from the input file.""" - with open(input_file, encoding="utf-8") as f: - load_data = [] - if self.is_jsonl: - for line in f: - load_data.append(json.loads(line.strip())) - else: - load_data = [json.load(f)] - - documents = [] - for data in load_data: - if self.levels_back is None and self.clean_json is True: - # If levels_back isn't set and clean json is set, - # remove lines containing only formatting, we just format and make each - # line an embedding - json_output = json.dumps( - data, indent=0, ensure_ascii=self.ensure_ascii - ) - lines = json_output.split("\n") - useful_lines = [ - line for line in lines if not re.match(r"^[{}\[\],]*$", line) - ] - documents.append( - Document(text="\n".join(useful_lines), metadata=extra_info) - ) - - elif self.levels_back is None and self.clean_json is False: - # If levels_back isn't set and clean json is False, create documents without cleaning - json_output = json.dumps(data, ensure_ascii=self.ensure_ascii) - documents.append(Document(text=json_output, metadata=extra_info)) - - elif self.levels_back is not None: - # If levels_back is set, we make the embeddings contain the labels - # from further up the JSON tree - lines = [ - *_depth_first_yield( - data, - self.levels_back, - self.collapse_length, - [], - self.ensure_ascii, + try: + with open(input_file, encoding="utf-8") as f: + load_data = [] + if self.is_jsonl: + for line in f: + load_data.append(json.loads(line.strip())) + else: + load_data = [json.load(f)] + + documents = [] + for data in load_data: + if self.levels_back is None and self.clean_json is True: + # If levels_back isn't set and clean json is set, + # remove lines containing only formatting, we just format and make each + # line an embedding + json_output = json.dumps( + data, indent=0, ensure_ascii=self.ensure_ascii + ) + lines = json_output.split("\n") + useful_lines = [ + line + for line in lines + if not re.match(r"^[{}\[\],]*$", line) + ] + documents.append( + Document(text="\n".join(useful_lines), metadata=extra_info) + ) + + elif self.levels_back is None and self.clean_json is False: + # If levels_back isn't set and clean json is False, create documents without cleaning + json_output = json.dumps(data, ensure_ascii=self.ensure_ascii) + documents.append( + Document(text=json_output, metadata=extra_info) + ) + + elif self.levels_back is not None: + # If levels_back is set, we make the embeddings contain the labels + # from further up the JSON tree + lines = [ + *_depth_first_yield( + data, + self.levels_back, + self.collapse_length, + [], + self.ensure_ascii, + ) + ] + documents.append( + Document(text="\n".join(lines), metadata=extra_info) ) - ] - documents.append( - Document(text="\n".join(lines), metadata=extra_info) - ) return documents + except RecursionError: + warnings.warn("Recursion error occurred while processing JSON data.") + return []
llama-index-core/tests/readers/test_json.py+32 −0 modified@@ -1,7 +1,10 @@ """Test file reader.""" +import json +import sys from tempfile import TemporaryDirectory +import pytest from llama_index.core.readers.json import JSONReader @@ -93,3 +96,32 @@ def test_clean_json() -> None: reader1 = JSONReader(clean_json=True) data1 = reader1.load_data(file_name) assert data1[0].get_content() == '"a": {\n"b": "c"' + + +def test_max_recursion_attack(tmp_path): + original_limit = sys.getrecursionlimit() + try: + nested_dict = {} + current_level = nested_dict + sys.setrecursionlimit(5000) + + for i in range(1, 2001): # Create 2000 levels of nesting + if i == 2000: + current_level[f"level{i}"] = "final_value" + else: + current_level[f"level{i}"] = {} + current_level = current_level[f"level{i}"] + + file_name = tmp_path / "test_nested.json" + with open(file_name, "w") as f: + f.write(json.dumps(nested_dict)) + + # Force a recursion error + sys.setrecursionlimit(500) + reader = JSONReader(levels_back=1) + with pytest.warns(UserWarning): + data = reader.load_data(file_name) + assert data == [] + + finally: + sys.setrecursionlimit(original_limit)
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
4News mentions
0No linked articles in our index yet.