VYPR
High severity8.6NVD Advisory· Published Aug 25, 2025· Updated Apr 15, 2026

CVE-2025-5302

CVE-2025-5302

Description

A denial of service vulnerability exists in the JSONReader component of the run-llama/llama_index repository, specifically in version v0.12.37. The vulnerability is caused by uncontrolled recursion when parsing deeply nested JSON files, which can lead to Python hitting its maximum recursion depth limit. This results in high resource consumption and potential crashes of the Python process. The issue is resolved in version 0.12.38.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
llama-index-corePyPI
< 0.12.380.12.38

Affected products

1

Patches

1
c032843a02ce

fix: prevent DoS attacks in JSONReader (#18877)

https://github.com/run-llama/llama_indexMassimiliano PippiMay 29, 2025via ghsa
2 files changed · +85 44
  • llama-index-core/llama_index/core/readers/json.py+53 44 modified
    @@ -2,6 +2,7 @@
     
     import json
     import re
    +import warnings
     from typing import Any, Dict, Generator, List, Optional
     
     from llama_index.core.readers.base import BaseReader
    @@ -97,49 +98,57 @@ def load_data(
             self, input_file: str, extra_info: Optional[Dict] = {}
         ) -> List[Document]:
             """Load data from the input file."""
    -        with open(input_file, encoding="utf-8") as f:
    -            load_data = []
    -            if self.is_jsonl:
    -                for line in f:
    -                    load_data.append(json.loads(line.strip()))
    -            else:
    -                load_data = [json.load(f)]
    -
    -            documents = []
    -            for data in load_data:
    -                if self.levels_back is None and self.clean_json is True:
    -                    # If levels_back isn't set and clean json is set,
    -                    # remove lines containing only formatting, we just format and make each
    -                    # line an embedding
    -                    json_output = json.dumps(
    -                        data, indent=0, ensure_ascii=self.ensure_ascii
    -                    )
    -                    lines = json_output.split("\n")
    -                    useful_lines = [
    -                        line for line in lines if not re.match(r"^[{}\[\],]*$", line)
    -                    ]
    -                    documents.append(
    -                        Document(text="\n".join(useful_lines), metadata=extra_info)
    -                    )
    -
    -                elif self.levels_back is None and self.clean_json is False:
    -                    # If levels_back isn't set  and clean json is False, create documents without cleaning
    -                    json_output = json.dumps(data, ensure_ascii=self.ensure_ascii)
    -                    documents.append(Document(text=json_output, metadata=extra_info))
    -
    -                elif self.levels_back is not None:
    -                    # If levels_back is set, we make the embeddings contain the labels
    -                    # from further up the JSON tree
    -                    lines = [
    -                        *_depth_first_yield(
    -                            data,
    -                            self.levels_back,
    -                            self.collapse_length,
    -                            [],
    -                            self.ensure_ascii,
    +        try:
    +            with open(input_file, encoding="utf-8") as f:
    +                load_data = []
    +                if self.is_jsonl:
    +                    for line in f:
    +                        load_data.append(json.loads(line.strip()))
    +                else:
    +                    load_data = [json.load(f)]
    +
    +                documents = []
    +                for data in load_data:
    +                    if self.levels_back is None and self.clean_json is True:
    +                        # If levels_back isn't set and clean json is set,
    +                        # remove lines containing only formatting, we just format and make each
    +                        # line an embedding
    +                        json_output = json.dumps(
    +                            data, indent=0, ensure_ascii=self.ensure_ascii
    +                        )
    +                        lines = json_output.split("\n")
    +                        useful_lines = [
    +                            line
    +                            for line in lines
    +                            if not re.match(r"^[{}\[\],]*$", line)
    +                        ]
    +                        documents.append(
    +                            Document(text="\n".join(useful_lines), metadata=extra_info)
    +                        )
    +
    +                    elif self.levels_back is None and self.clean_json is False:
    +                        # If levels_back isn't set  and clean json is False, create documents without cleaning
    +                        json_output = json.dumps(data, ensure_ascii=self.ensure_ascii)
    +                        documents.append(
    +                            Document(text=json_output, metadata=extra_info)
    +                        )
    +
    +                    elif self.levels_back is not None:
    +                        # If levels_back is set, we make the embeddings contain the labels
    +                        # from further up the JSON tree
    +                        lines = [
    +                            *_depth_first_yield(
    +                                data,
    +                                self.levels_back,
    +                                self.collapse_length,
    +                                [],
    +                                self.ensure_ascii,
    +                            )
    +                        ]
    +                        documents.append(
    +                            Document(text="\n".join(lines), metadata=extra_info)
                             )
    -                    ]
    -                    documents.append(
    -                        Document(text="\n".join(lines), metadata=extra_info)
    -                    )
                 return documents
    +        except RecursionError:
    +            warnings.warn("Recursion error occurred while processing JSON data.")
    +            return []
    
  • llama-index-core/tests/readers/test_json.py+32 0 modified
    @@ -1,7 +1,10 @@
     """Test file reader."""
     
    +import json
    +import sys
     from tempfile import TemporaryDirectory
     
    +import pytest
     from llama_index.core.readers.json import JSONReader
     
     
    @@ -93,3 +96,32 @@ def test_clean_json() -> None:
             reader1 = JSONReader(clean_json=True)
             data1 = reader1.load_data(file_name)
             assert data1[0].get_content() == '"a": {\n"b": "c"'
    +
    +
    +def test_max_recursion_attack(tmp_path):
    +    original_limit = sys.getrecursionlimit()
    +    try:
    +        nested_dict = {}
    +        current_level = nested_dict
    +        sys.setrecursionlimit(5000)
    +
    +        for i in range(1, 2001):  # Create 2000 levels of nesting
    +            if i == 2000:
    +                current_level[f"level{i}"] = "final_value"
    +            else:
    +                current_level[f"level{i}"] = {}
    +                current_level = current_level[f"level{i}"]
    +
    +        file_name = tmp_path / "test_nested.json"
    +        with open(file_name, "w") as f:
    +            f.write(json.dumps(nested_dict))
    +
    +        # Force a recursion error
    +        sys.setrecursionlimit(500)
    +        reader = JSONReader(levels_back=1)
    +        with pytest.warns(UserWarning):
    +            data = reader.load_data(file_name)
    +            assert data == []
    +
    +    finally:
    +        sys.setrecursionlimit(original_limit)
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

4

News mentions

0

No linked articles in our index yet.