High severity8.6NVD Advisory· Published Aug 25, 2025· Updated Apr 15, 2026

CVE-2025-5302

Description

A denial of service vulnerability exists in the JSONReader component of the run-llama/llama_index repository, specifically in version v0.12.37. The vulnerability is caused by uncontrolled recursion when parsing deeply nested JSON files, which can lead to Python hitting its maximum recursion depth limit. This results in high resource consumption and potential crashes of the Python process. The issue is resolved in version 0.12.38.

Affected packages

Versions sourced from the GitHub Security Advisory.

Package	Affected versions	Patched versions
llama-index-corePyPI	< 0.12.38	0.12.38

Affected products

Run Llama/Llama Indexreferences

Patches

c032843a02ce

fix: prevent DoS attacks in JSONReader (#18877)

https://github.com/run-llama/llama_indexMassimiliano PippiMay 29, 2025via ghsa

commit

2 files changed · +85 −44

llama-index-core/llama_index/core/readers/json.py+53 −44 modified

@@ -2,6 +2,7 @@
 
 import json
 import re
+import warnings
 from typing import Any, Dict, Generator, List, Optional
 
 from llama_index.core.readers.base import BaseReader
@@ -97,49 +98,57 @@ def load_data(
         self, input_file: str, extra_info: Optional[Dict] = {}
     ) -> List[Document]:
         """Load data from the input file."""
-        with open(input_file, encoding="utf-8") as f:
-            load_data = []
-            if self.is_jsonl:
-                for line in f:
-                    load_data.append(json.loads(line.strip()))
-            else:
-                load_data = [json.load(f)]
-
-            documents = []
-            for data in load_data:
-                if self.levels_back is None and self.clean_json is True:
-                    # If levels_back isn't set and clean json is set,
-                    # remove lines containing only formatting, we just format and make each
-                    # line an embedding
-                    json_output = json.dumps(
-                        data, indent=0, ensure_ascii=self.ensure_ascii
-                    )
-                    lines = json_output.split("\n")
-                    useful_lines = [
-                        line for line in lines if not re.match(r"^[{}\[\],]*$", line)
-                    ]
-                    documents.append(
-                        Document(text="\n".join(useful_lines), metadata=extra_info)
-                    )
-
-                elif self.levels_back is None and self.clean_json is False:
-                    # If levels_back isn't set  and clean json is False, create documents without cleaning
-                    json_output = json.dumps(data, ensure_ascii=self.ensure_ascii)
-                    documents.append(Document(text=json_output, metadata=extra_info))
-
-                elif self.levels_back is not None:
-                    # If levels_back is set, we make the embeddings contain the labels
-                    # from further up the JSON tree
-                    lines = [
-                        *_depth_first_yield(
-                            data,
-                            self.levels_back,
-                            self.collapse_length,
-                            [],
-                            self.ensure_ascii,
+        try:
+            with open(input_file, encoding="utf-8") as f:
+                load_data = []
+                if self.is_jsonl:
+                    for line in f:
+                        load_data.append(json.loads(line.strip()))
+                else:
+                    load_data = [json.load(f)]
+
+                documents = []
+                for data in load_data:
+                    if self.levels_back is None and self.clean_json is True:
+                        # If levels_back isn't set and clean json is set,
+                        # remove lines containing only formatting, we just format and make each
+                        # line an embedding
+                        json_output = json.dumps(
+                            data, indent=0, ensure_ascii=self.ensure_ascii
+                        )
+                        lines = json_output.split("\n")
+                        useful_lines = [
+                            line
+                            for line in lines
+                            if not re.match(r"^[{}\[\],]*$", line)
+                        ]
+                        documents.append(
+                            Document(text="\n".join(useful_lines), metadata=extra_info)
+                        )
+
+                    elif self.levels_back is None and self.clean_json is False:
+                        # If levels_back isn't set  and clean json is False, create documents without cleaning
+                        json_output = json.dumps(data, ensure_ascii=self.ensure_ascii)
+                        documents.append(
+                            Document(text=json_output, metadata=extra_info)
+                        )
+
+                    elif self.levels_back is not None:
+                        # If levels_back is set, we make the embeddings contain the labels
+                        # from further up the JSON tree
+                        lines = [
+                            *_depth_first_yield(
+                                data,
+                                self.levels_back,
+                                self.collapse_length,
+                                [],
+                                self.ensure_ascii,
+                            )
+                        ]
+                        documents.append(
+                            Document(text="\n".join(lines), metadata=extra_info)
                         )
-                    ]
-                    documents.append(
-                        Document(text="\n".join(lines), metadata=extra_info)
-                    )
             return documents
+        except RecursionError:
+            warnings.warn("Recursion error occurred while processing JSON data.")
+            return []

llama-index-core/tests/readers/test_json.py+32 −0 modified

@@ -1,7 +1,10 @@
 """Test file reader."""
 
+import json
+import sys
 from tempfile import TemporaryDirectory
 
+import pytest
 from llama_index.core.readers.json import JSONReader
 
 
@@ -93,3 +96,32 @@ def test_clean_json() -> None:
         reader1 = JSONReader(clean_json=True)
         data1 = reader1.load_data(file_name)
         assert data1[0].get_content() == '"a": {\n"b": "c"'
+
+
+def test_max_recursion_attack(tmp_path):
+    original_limit = sys.getrecursionlimit()
+    try:
+        nested_dict = {}
+        current_level = nested_dict
+        sys.setrecursionlimit(5000)
+
+        for i in range(1, 2001):  # Create 2000 levels of nesting
+            if i == 2000:
+                current_level[f"level{i}"] = "final_value"
+            else:
+                current_level[f"level{i}"] = {}
+                current_level = current_level[f"level{i}"]
+
+        file_name = tmp_path / "test_nested.json"
+        with open(file_name, "w") as f:
+            f.write(json.dumps(nested_dict))
+
+        # Force a recursion error
+        sys.setrecursionlimit(500)
+        reader = JSONReader(levels_back=1)
+        with pytest.warns(UserWarning):
+            data = reader.load_data(file_name)
+            assert data == []
+
+    finally:
+        sys.setrecursionlimit(original_limit)

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

github.com/advisories/GHSA-7753-xrfw-ch36ghsaADVISORY
nvd.nist.gov/vuln/detail/CVE-2025-5302ghsaADVISORY
github.com/run-llama/llama_index/commit/c032843a02ce38fd8f284b2aa5a37fd1c17ae635nvdWEB
huntr.com/bounties/70041b81-de9e-4046-8c0e-6ccd557048a6nvdWEB

News mentions

No linked articles in our index yet.

cvss	0.559
epss	0.000
exploit	0.000
kev	0.000
patch	-0.070
ransomware	0.000