VYPR
Moderate severityNVD Advisory· Published Mar 10, 2026· Updated Mar 11, 2026

pypdf: manipulated stream length values can exhaust RAM

CVE-2026-31826

Description

pypdf is a free and open-source pure-python PDF library. Prior to 6.8.0, an attacker who uses this vulnerability can craft a PDF which leads to large memory usage. This requires parsing a content stream with a rather large /Length value, regardless of the actual data length inside the stream. This vulnerability is fixed in 6.8.0.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
pypdfPyPI
< 6.8.06.8.0

Affected products

1

Patches

1
3c550b3196ad

SEC: Limit allowed `/Length` value of stream (#3675)

https://github.com/py-pdf/pypdfStefanMar 9, 2026via ghsa
4 files changed · +125 1
  • docs/user/security.md+3 0 modified
    @@ -18,6 +18,9 @@ aware of the possible side effects, you can modify the following constants which
     For JBIG2 images, there is a similar parameter to limit the memory usage during decoding: `pypdf.filters.JBIG2_MAX_OUTPUT_LENGTH`
     It defaults to 75 MB as well.
     
    +For all streams, the maximum allowed value for the `/Length` field is limited to `pypdf.filters.MAX_DECLARED_STREAM_LENGTH`, which
    +defaults to 75 MB as well.
    +
     For the *FlateDecode* filter, the number of bytes to attempt recovery with can be set by `pypdf.filters.ZLIB_MAX_RECOVERY_INPUT_LENGTH`.
     It defaults to 5 MB due to the much more complex recovery approach.
     
    
  • pypdf/filters.py+2 0 modified
    @@ -72,6 +72,8 @@
         is_null_or_none,
     )
     
    +MAX_DECLARED_STREAM_LENGTH = 75_000_000
    +
     JBIG2_MAX_OUTPUT_LENGTH = 75_000_000
     LZW_MAX_OUTPUT_LENGTH = 75_000_000
     RUN_LENGTH_MAX_OUTPUT_LENGTH = 75_000_000
    
  • pypdf/generic/_data_structures.py+5 1 modified
    @@ -63,7 +63,7 @@
     from ..constants import StreamAttributes as SA
     from ..constants import TypArguments as TA
     from ..constants import TypFitArguments as TF
    -from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
    +from ..errors import STREAM_TRUNCATED_PREMATURELY, LimitReachedError, PdfReadError, PdfStreamError
     from ._base import (
         BooleanObject,
         ByteStringObject,
    @@ -637,6 +637,10 @@ def read_from_stream(
                     length = -1
                 pstart = stream.tell()
                 if length >= 0:
    +                from ..filters import MAX_DECLARED_STREAM_LENGTH  # noqa: PLC0415
    +                if length > MAX_DECLARED_STREAM_LENGTH:
    +                    raise LimitReachedError(f"Declared stream length of {length} exceeds maximum allowed length.")
    +
                     data["__streamdata__"] = stream.read(length)
                 else:
                     data["__streamdata__"] = read_until_regex(
    
  • tests/generic/test_data_structures.py+115 0 modified
    @@ -1,12 +1,23 @@
     """Test the pypdf.generic._data_structures module."""
    +import os
    +import subprocess
    +import sys
     from io import BytesIO
    +from pathlib import Path
    +from typing import Callable
     
     import pytest
     
     from pypdf import PdfReader, PdfWriter
    +from pypdf.errors import LimitReachedError
     from pypdf.generic import DictionaryObject, NameObject, RectangleObject, TreeObject
     from tests import RESOURCE_ROOT, get_data_from_url
     
    +try:
    +    import resource
    +except ImportError:
    +    resource = None
    +
     
     def test_dictionary_object__get_next_object_position():
         reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    @@ -54,3 +65,107 @@ def test_array_object__clone_same_object_multiple_times(caplog):
             page2 = writer.add_page(page)
             assert page2.mediabox == RectangleObject((0, 0, 595, 841))
         assert caplog.messages == []
    +
    +
    +@pytest.mark.enable_socket
    +def test_dictionary_object__read_from_stream__limit():
    +    name = "read_from_stream__length_2gb.pdf"
    +    url = "https://github.com/user-attachments/files/25842437/read_from_stream__length_2gb.pdf"
    +
    +    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))
    +    page = reader.pages[0]
    +
    +    with pytest.raises(
    +            expected_exception=LimitReachedError,
    +            match=r"^Declared stream length of 2147483647 exceeds maximum allowed length\.$"
    +    ):
    +        page.extract_text()
    +
    +
    +def _prepare_test_dictionary_object__read_from_stream__no_limit(
    +        path: Path
    +) -> tuple[str, dict[str, str], Callable[[], None]]:
    +    env = os.environ.copy()
    +    env["COVERAGE_PROCESS_START"] = "pyproject.toml"
    +
    +    name = "read_from_stream__length_2gb.pdf"
    +    url = "https://github.com/user-attachments/files/25842437/read_from_stream__length_2gb.pdf"
    +    data = get_data_from_url(url=url, name=name)
    +    pdf_path = path / name
    +    pdf_path.write_bytes(data)
    +    pdf_path_str = pdf_path.resolve().as_posix()
    +
    +    try:
    +        env["PYTHONPATH"] = "." + os.pathsep + env["PYTHONPATH"]
    +    except KeyError:
    +        env["PYTHONPATH"] = "."
    +
    +    def limit_virtual_memory() -> None:
    +        limit_kb = 1_000_000
    +        limit_bytes = limit_kb * 1024
    +        resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
    +
    +    return pdf_path_str, env, limit_virtual_memory
    +
    +
    +@pytest.mark.enable_socket
    +@pytest.mark.skipif(condition=resource is None, reason="Does not have 'resource' module.")
    +@pytest.mark.skipif(sys.platform == "darwin", reason="RLIMIT_AS is unreliable.")
    +def test_dictionary_object__read_from_stream__no_limit(tmp_path):
    +    pdf_path_str, env, limit_virtual_memory = _prepare_test_dictionary_object__read_from_stream__no_limit(tmp_path)
    +
    +    source_file = tmp_path / "script.py"
    +    source_file.write_text(
    +        f"""
    +import sys
    +from pypdf import filters, PdfReader
    +
    +filters.MAX_DECLARED_STREAM_LENGTH = sys.maxsize
    +
    +with open({pdf_path_str!r}, mode="rb") as fd:
    +    reader = PdfReader(fd)
    +    print(reader.pages[0].extract_text())
    +"""
    +    )
    +
    +    result = subprocess.run(  # noqa: S603  # We have the control here.
    +        [sys.executable, source_file],
    +        capture_output=True,
    +        env=env,
    +        text=True,
    +        preexec_fn=limit_virtual_memory,
    +    )
    +    assert result.returncode == 1
    +    assert result.stdout == ""
    +    assert result.stderr.replace("\r", "").endswith("\nMemoryError\n")
    +
    +
    +@pytest.mark.enable_socket
    +@pytest.mark.skipif(condition=resource is None, reason="Does not have 'resource' module.")
    +@pytest.mark.skipif(sys.platform == "darwin", reason="RLIMIT_AS is unreliable.")
    +def test_dictionary_object__read_from_stream__no_limit__path(tmp_path):
    +    pdf_path_str, env, limit_virtual_memory = _prepare_test_dictionary_object__read_from_stream__no_limit(tmp_path)
    +
    +    source_file = tmp_path / "script.py"
    +    source_file.write_text(
    +        f"""
    +import sys
    +from pypdf import filters, PdfReader
    +
    +filters.MAX_DECLARED_STREAM_LENGTH = sys.maxsize
    +
    +reader = PdfReader({pdf_path_str!r})
    +print(reader.pages[0].extract_text())
    +"""
    +    )
    +
    +    result = subprocess.run(  # noqa: S603  # We have the control here.
    +        [sys.executable, source_file],
    +        capture_output=True,
    +        env=env,
    +        text=True,
    +        preexec_fn=limit_virtual_memory,
    +    )
    +    assert result.returncode == 0
    +    assert result.stdout.replace("\r", "") == "Hello from pypdf\n"
    +    assert result.stderr == ""
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.