VYPR
Medium severity5.3NVD Advisory· Published Apr 22, 2026· Updated Apr 24, 2026

CVE-2026-41168

CVE-2026-41168

Description

pypdf is a free and open-source pure-python PDF library. An attacker who uses a vulnerability present in versions prior to 6.10.1 can craft a PDF which leads to long runtimes. This requires cross-reference streams with wrong large /Size values or object streams with wrong large /N values. This has been fixed in pypdf 6.10.1. As a workaround, one may apply the changes from the patch manually.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
pypdfPyPI
< 6.10.16.10.1

Affected products

1

Patches

1
62338e9d3641

SEC: Limit the allowed size of xref and object streams (#3733)

https://github.com/py-pdf/pypdfStefanApr 14, 2026via ghsa
3 files changed · +148 13
  • pypdf/_doc_common.py+2 1 modified
    @@ -1166,14 +1166,15 @@ def _flatten(
             )
             if inherit is None:
                 inherit = {}
    -        if pages is None:
    +        if is_null_or_none(pages):
                 # Fix issue 327: set flattened_pages attribute only for
                 # decrypted file
                 catalog = self.root_object
                 pages = catalog.get("/Pages").get_object()  # type: ignore
                 if not isinstance(pages, DictionaryObject):
                     raise PdfReadError("Invalid object in /Pages")
                 self.flattened_pages = []
    +        assert pages is not None, "mypy"
     
             if PagesAttributes.TYPE in pages:
                 t = cast(str, pages[PagesAttributes.TYPE])
    
  • pypdf/_reader.py+66 12 modified
    @@ -361,6 +361,20 @@ def _get_object_from_stream(
             n = int(obj_stm["/N"])  # type: ignore[call-overload]
             first_offset = int(obj_stm["/First"])  # type: ignore[call-overload]
     
    +        # ObjStm header format: "objnum offset objnum offset ..."
    +        # smallest possible entry: "0 0" = 3 bytes (1 digit + 1 space + 1 digit)
    +        # using // 4 would reject a valid 3-byte single entry (3 // 4 = 0)
    +        max_n = stream_data.getbuffer().nbytes // 3
    +        stream_data.seek(0)
    +        if n > max_n:
    +            if self.strict:
    +                raise LimitReachedError(f"Value /N {n} for object {stmnum} exceeds maximum allowed value {max_n}.")
    +            logger_warning(
    +                f"Value /N {n} for object {stmnum} exceeds maximum allowed value {max_n}. Limiting to {max_n}.",
    +                src=__name__
    +            )
    +            n = max_n
    +
             # Phase 1: Read the index (objnum, offset) pairs from the header.
             obj_index: list[tuple[int, int]] = []
             for _i in range(n):
    @@ -1035,30 +1049,70 @@ def _read_xref_other_error(
                     raise PdfReadError("Cannot rebuild xref")
             raise PdfReadError("Could not find xref table at specified location")
     
    +    def _sanitize_pdf15_xref_stream_index_pairs(
    +            self, index_pairs: list[int], entry_sizes: list[int], xref_stream: ContentStream
    +    ) -> list[int]:
    +        # `entry_sizes` holds the byte widths for the entries. Summing determines the total number of bytes per entry.
    +        # We expect up to 3 values, clamping to at least 1 avoids ZeroDivisionError in next step.
    +        # `min_entry_bytes` will be the smallest plausible size of one xref entry.
    +        min_entry_bytes = max(sum(int(entry_sizes[i]) for i in range(min(len(entry_sizes), 3))), 1)
    +        # maximum number of entries that could physically fit
    +        max_entries = len(xref_stream.get_data()) // min_entry_bytes + 1
    +
    +        result = []
    +        total = 0
    +
    +        for index, pair_value in enumerate(index_pairs):
    +            pair_value_int = int(pair_value)
    +
    +            # `index_pairs` has the format `[start0, count0, start1, count1, ...]`
    +            # Only modify the counts here, but keep the start values.
    +            if index % 2 == 1:
    +                if total + pair_value_int > max_entries:
    +                    if self.strict:
    +                        raise LimitReachedError(
    +                            f"Total XRef entries {total + pair_value_int} exceed maximum allowed value {max_entries}."
    +                        )
    +                    new_v = max(0, max_entries - total)
    +                    logger_warning(
    +                        f"Clamping XRef count from {pair_value_int} to {new_v} to fit stream size.",
    +                        src=__name__
    +                    )
    +                    pair_value_int = new_v
    +
    +                total += pair_value_int
    +
    +            result.append(pair_value_int)
    +
    +        return result
    +
         def _read_pdf15_xref_stream(
             self, stream: StreamType
         ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]:
             """Read the cross-reference stream for PDF 1.5+."""
             stream.seek(-1, 1)
    -        idnum, generation = self.read_object_header(stream)
    -        xrefstream = cast(ContentStream, read_object(stream, self))
    -        if cast(str, xrefstream["/Type"]) != "/XRef":
    -            raise PdfReadError(f"Unexpected type {xrefstream['/Type']!r}")
    -        self.cache_indirect_object(generation, idnum, xrefstream)
    +        stream_idnum, stream_generation = self.read_object_header(stream)
    +        xref_stream = cast(ContentStream, read_object(stream, self))
    +        if cast(str, xref_stream["/Type"]) != "/XRef":
    +            raise PdfReadError(f"Unexpected type {xref_stream['/Type']!r}")
    +        self.cache_indirect_object(stream_generation, stream_idnum, xref_stream)
     
             # Index pairs specify the subsections in the dictionary.
             # If none, create one subsection that spans everything.
    -        if "/Size" not in xrefstream:
    +        if "/Size" not in xref_stream:
                 # According to table 17 of the PDF 2.0 specification, this key is required.
    -            raise PdfReadError(f"Size missing from XRef stream {xrefstream!r}!")
    -        idx_pairs = xrefstream.get("/Index", [0, xrefstream["/Size"]])
    +            raise PdfReadError(f"Size missing from XRef stream {xref_stream!r}!")
    +        index_pairs = xref_stream.get("/Index", [0, xref_stream["/Size"]])
     
    -        entry_sizes = cast(dict[Any, Any], xrefstream.get("/W"))
    +        entry_sizes = cast(list[int], xref_stream.get("/W"))
             assert len(entry_sizes) >= 3
             if self.strict and len(entry_sizes) > 3:
                 raise PdfReadError(f"Too many entry sizes: {entry_sizes}")
    +        index_pairs = self._sanitize_pdf15_xref_stream_index_pairs(
    +            index_pairs=index_pairs, entry_sizes=entry_sizes, xref_stream=xref_stream
    +        )
     
    -        stream_data = BytesIO(xrefstream.get_data())
    +        stream_data = BytesIO(xref_stream.get_data())
     
             def get_entry(i: int) -> Union[int, tuple[int, ...]]:
                 # Reads the correct number of bytes for each entry. See the
    @@ -1078,8 +1132,8 @@ def used_before(num: int, generation: Union[int, tuple[int, ...]]) -> bool:
                 return num in self.xref.get(generation, []) or num in self.xref_objStm  # type: ignore
     
             # Iterate through each subsection
    -        self._read_xref_subsections(idx_pairs, get_entry, used_before)
    -        return xrefstream
    +        self._read_xref_subsections(index_pairs, get_entry, used_before)
    +        return xref_stream
     
         @staticmethod
         def _get_xref_issues(stream: StreamType, startxref: int) -> int:
    
  • tests/test_reader.py+80 0 modified
    @@ -1,5 +1,6 @@
     """Test the pypdf._reader module."""
     import io
    +import struct
     import sys
     import time
     from io import BytesIO
    @@ -23,6 +24,7 @@
         PdfStreamError,
         WrongPasswordError,
     )
    +from pypdf.filters import FlateDecode
     from pypdf.generic import (
         ArrayObject,
         Destination,
    @@ -2185,3 +2187,81 @@ def test_xref_table_with_comments_before_trailer():
         )
         reader = PdfReader(BytesIO(pdf_data))
         assert len(reader.pages) == 1
    +
    +
    +@pytest.mark.timeout(10)
    +def test_read_pdf15_xref_stream__size_limit(caplog):
    +    pdf = b"%PDF-1.7\n"
    +    pdf += b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
    +    pdf += b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n"
    +    startxref = len(pdf)
    +    encoded = FlateDecode.encode(b"")
    +    pdf += (
    +        f"3 0 obj\n<< /Type /XRef /Size 50000000 /W [0 0 0] /Root 1 0 R /Filter /FlateDecode /Length {len(encoded)} >>"
    +        f"\nstream\n"
    +    ).encode()
    +    pdf += encoded + b"\nendstream\nendobj\n"
    +    pdf += f"startxref\n{startxref}\n%%EOF\n".encode()
    +
    +    with pytest.raises(
    +            PdfReadError,
    +            match=r"^Trailer cannot be read: Total XRef entries 50000000 exceed maximum allowed value 1\.$"
    +    ):
    +        _ = PdfReader(BytesIO(pdf), strict=True)
    +    assert caplog.messages == []
    +
    +    _ = PdfReader(BytesIO(pdf), strict=False)
    +    assert caplog.messages == [
    +        "Clamping XRef count from 50000000 to 1 to fit stream size.",
    +    ]
    +
    +
    +@pytest.mark.timeout(10)
    +def test_get_object_from_stream__size_limit(caplog):
    +    obj_stm_encoded = FlateDecode.encode(b"4 0\nnull")
    +    pdf = b"%PDF-1.7\n"
    +    header_length = len(pdf)
    +    pdf += b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
    +    catalog_length = len(pdf)
    +    pdf += b"2 0 obj\n<< /Type /Pages /Kids [4 0 R] /Count 1 >>\nendobj\n"
    +    pages_length = len(pdf)
    +    pdf += (
    +        f"3 0 obj\n<< /Type /ObjStm /N 5000000 /First 4 /Filter /FlateDecode /Length {len(obj_stm_encoded)} >>\n"
    +        f"stream\n"
    +    ).encode()
    +    pdf += obj_stm_encoded + b"\nendstream\nendobj\n"
    +    xref = bytearray()
    +    for xref_type, value, generation in [
    +            (0, 0, 0),
    +            (1, header_length, 0),
    +            (1, catalog_length, 0),
    +            (1, pages_length, 0),
    +            (2, 3, 0)
    +    ]:
    +        # xref type: 0 = free object, 1 = in-use object (points to byte offset), 2 = compressed (stored in stream)
    +        # value: type 1 = byte offset in the file, type 2 = object stream number
    +        # >B = 1 byte (unsigned char) for type and generation
    +        # >H = 2 bytes (unsigned short) for value
    +        xref += struct.pack(">B", xref_type) + struct.pack(">H", value) + struct.pack(">B", generation)
    +    xref_encoded = FlateDecode.encode(bytes(xref))
    +    startxref = len(pdf)
    +    pdf += (
    +        f"5 0 obj\n<< /Type /XRef /Size 6 /W [1 2 1] /Root 1 0 R /Filter /FlateDecode "
    +        f"/Length {len(xref_encoded)} >>\nstream\n"
    +    ).encode()
    +    pdf += xref_encoded + b"\nendstream\nendobj\n"
    +    pdf += f"startxref\n{startxref}\n%%EOF\n".encode()
    +
    +    with pytest.raises(LimitReachedError, match=r"^Value /N 5000000 for object 3 exceeds maximum allowed value 2\.$"):
    +        reader = PdfReader(BytesIO(pdf), strict=True)
    +        _ = reader.pages[0]
    +    assert caplog.messages == []
    +
    +    with pytest.raises(PdfReadError, match=r"^Maximum recursion depth reached during page flattening\.$"):
    +        reader = PdfReader(BytesIO(pdf), strict=False)
    +        _ = reader.pages[0]
    +    assert caplog.messages == [
    +        "Value /N 5000000 for object 3 exceeds maximum allowed value 2. Limiting to 2.",
    +        "NumberObject(b'') invalid; use 0 instead",
    +        "NumberObject(b'') invalid; use 0 instead",
    +    ]
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.