Medium severity5.3NVD Advisory· Published Apr 22, 2026· Updated Apr 24, 2026
CVE-2026-41168
CVE-2026-41168
Description
pypdf is a free and open-source pure-python PDF library. An attacker who uses a vulnerability present in versions prior to 6.10.1 can craft a PDF which leads to long runtimes. This requires cross-reference streams with wrong large /Size values or object streams with wrong large /N values. This has been fixed in pypdf 6.10.1. As a workaround, one may apply the changes from the patch manually.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
pypdfPyPI | < 6.10.1 | 6.10.1 |
Affected products
1Patches
162338e9d3641SEC: Limit the allowed size of xref and object streams (#3733)
3 files changed · +148 −13
pypdf/_doc_common.py+2 −1 modified@@ -1166,14 +1166,15 @@ def _flatten( ) if inherit is None: inherit = {} - if pages is None: + if is_null_or_none(pages): # Fix issue 327: set flattened_pages attribute only for # decrypted file catalog = self.root_object pages = catalog.get("/Pages").get_object() # type: ignore if not isinstance(pages, DictionaryObject): raise PdfReadError("Invalid object in /Pages") self.flattened_pages = [] + assert pages is not None, "mypy" if PagesAttributes.TYPE in pages: t = cast(str, pages[PagesAttributes.TYPE])
pypdf/_reader.py+66 −12 modified@@ -361,6 +361,20 @@ def _get_object_from_stream( n = int(obj_stm["/N"]) # type: ignore[call-overload] first_offset = int(obj_stm["/First"]) # type: ignore[call-overload] + # ObjStm header format: "objnum offset objnum offset ..." + # smallest possible entry: "0 0" = 3 bytes (1 digit + 1 space + 1 digit) + # using // 4 would reject a valid 3-byte single entry (3 // 4 = 0) + max_n = stream_data.getbuffer().nbytes // 3 + stream_data.seek(0) + if n > max_n: + if self.strict: + raise LimitReachedError(f"Value /N {n} for object {stmnum} exceeds maximum allowed value {max_n}.") + logger_warning( + f"Value /N {n} for object {stmnum} exceeds maximum allowed value {max_n}. Limiting to {max_n}.", + src=__name__ + ) + n = max_n + # Phase 1: Read the index (objnum, offset) pairs from the header. obj_index: list[tuple[int, int]] = [] for _i in range(n): @@ -1035,30 +1049,70 @@ def _read_xref_other_error( raise PdfReadError("Cannot rebuild xref") raise PdfReadError("Could not find xref table at specified location") + def _sanitize_pdf15_xref_stream_index_pairs( + self, index_pairs: list[int], entry_sizes: list[int], xref_stream: ContentStream + ) -> list[int]: + # `entry_sizes` holds the byte widths for the entries. Summing determines the total number of bytes per entry. + # We expect up to 3 values, clamping to at least 1 avoids ZeroDivisionError in next step. + # `min_entry_bytes` will be the smallest plausible size of one xref entry. + min_entry_bytes = max(sum(int(entry_sizes[i]) for i in range(min(len(entry_sizes), 3))), 1) + # maximum number of entries that could physically fit + max_entries = len(xref_stream.get_data()) // min_entry_bytes + 1 + + result = [] + total = 0 + + for index, pair_value in enumerate(index_pairs): + pair_value_int = int(pair_value) + + # `index_pairs` has the format `[start0, count0, start1, count1, ...]` + # Only modify the counts here, but keep the start values. + if index % 2 == 1: + if total + pair_value_int > max_entries: + if self.strict: + raise LimitReachedError( + f"Total XRef entries {total + pair_value_int} exceed maximum allowed value {max_entries}." + ) + new_v = max(0, max_entries - total) + logger_warning( + f"Clamping XRef count from {pair_value_int} to {new_v} to fit stream size.", + src=__name__ + ) + pair_value_int = new_v + + total += pair_value_int + + result.append(pair_value_int) + + return result + def _read_pdf15_xref_stream( self, stream: StreamType ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]: """Read the cross-reference stream for PDF 1.5+.""" stream.seek(-1, 1) - idnum, generation = self.read_object_header(stream) - xrefstream = cast(ContentStream, read_object(stream, self)) - if cast(str, xrefstream["/Type"]) != "/XRef": - raise PdfReadError(f"Unexpected type {xrefstream['/Type']!r}") - self.cache_indirect_object(generation, idnum, xrefstream) + stream_idnum, stream_generation = self.read_object_header(stream) + xref_stream = cast(ContentStream, read_object(stream, self)) + if cast(str, xref_stream["/Type"]) != "/XRef": + raise PdfReadError(f"Unexpected type {xref_stream['/Type']!r}") + self.cache_indirect_object(stream_generation, stream_idnum, xref_stream) # Index pairs specify the subsections in the dictionary. # If none, create one subsection that spans everything. - if "/Size" not in xrefstream: + if "/Size" not in xref_stream: # According to table 17 of the PDF 2.0 specification, this key is required. - raise PdfReadError(f"Size missing from XRef stream {xrefstream!r}!") - idx_pairs = xrefstream.get("/Index", [0, xrefstream["/Size"]]) + raise PdfReadError(f"Size missing from XRef stream {xref_stream!r}!") + index_pairs = xref_stream.get("/Index", [0, xref_stream["/Size"]]) - entry_sizes = cast(dict[Any, Any], xrefstream.get("/W")) + entry_sizes = cast(list[int], xref_stream.get("/W")) assert len(entry_sizes) >= 3 if self.strict and len(entry_sizes) > 3: raise PdfReadError(f"Too many entry sizes: {entry_sizes}") + index_pairs = self._sanitize_pdf15_xref_stream_index_pairs( + index_pairs=index_pairs, entry_sizes=entry_sizes, xref_stream=xref_stream + ) - stream_data = BytesIO(xrefstream.get_data()) + stream_data = BytesIO(xref_stream.get_data()) def get_entry(i: int) -> Union[int, tuple[int, ...]]: # Reads the correct number of bytes for each entry. See the @@ -1078,8 +1132,8 @@ def used_before(num: int, generation: Union[int, tuple[int, ...]]) -> bool: return num in self.xref.get(generation, []) or num in self.xref_objStm # type: ignore # Iterate through each subsection - self._read_xref_subsections(idx_pairs, get_entry, used_before) - return xrefstream + self._read_xref_subsections(index_pairs, get_entry, used_before) + return xref_stream @staticmethod def _get_xref_issues(stream: StreamType, startxref: int) -> int:
tests/test_reader.py+80 −0 modified@@ -1,5 +1,6 @@ """Test the pypdf._reader module.""" import io +import struct import sys import time from io import BytesIO @@ -23,6 +24,7 @@ PdfStreamError, WrongPasswordError, ) +from pypdf.filters import FlateDecode from pypdf.generic import ( ArrayObject, Destination, @@ -2185,3 +2187,81 @@ def test_xref_table_with_comments_before_trailer(): ) reader = PdfReader(BytesIO(pdf_data)) assert len(reader.pages) == 1 + + +@pytest.mark.timeout(10) +def test_read_pdf15_xref_stream__size_limit(caplog): + pdf = b"%PDF-1.7\n" + pdf += b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n" + pdf += b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n" + startxref = len(pdf) + encoded = FlateDecode.encode(b"") + pdf += ( + f"3 0 obj\n<< /Type /XRef /Size 50000000 /W [0 0 0] /Root 1 0 R /Filter /FlateDecode /Length {len(encoded)} >>" + f"\nstream\n" + ).encode() + pdf += encoded + b"\nendstream\nendobj\n" + pdf += f"startxref\n{startxref}\n%%EOF\n".encode() + + with pytest.raises( + PdfReadError, + match=r"^Trailer cannot be read: Total XRef entries 50000000 exceed maximum allowed value 1\.$" + ): + _ = PdfReader(BytesIO(pdf), strict=True) + assert caplog.messages == [] + + _ = PdfReader(BytesIO(pdf), strict=False) + assert caplog.messages == [ + "Clamping XRef count from 50000000 to 1 to fit stream size.", + ] + + +@pytest.mark.timeout(10) +def test_get_object_from_stream__size_limit(caplog): + obj_stm_encoded = FlateDecode.encode(b"4 0\nnull") + pdf = b"%PDF-1.7\n" + header_length = len(pdf) + pdf += b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n" + catalog_length = len(pdf) + pdf += b"2 0 obj\n<< /Type /Pages /Kids [4 0 R] /Count 1 >>\nendobj\n" + pages_length = len(pdf) + pdf += ( + f"3 0 obj\n<< /Type /ObjStm /N 5000000 /First 4 /Filter /FlateDecode /Length {len(obj_stm_encoded)} >>\n" + f"stream\n" + ).encode() + pdf += obj_stm_encoded + b"\nendstream\nendobj\n" + xref = bytearray() + for xref_type, value, generation in [ + (0, 0, 0), + (1, header_length, 0), + (1, catalog_length, 0), + (1, pages_length, 0), + (2, 3, 0) + ]: + # xref type: 0 = free object, 1 = in-use object (points to byte offset), 2 = compressed (stored in stream) + # value: type 1 = byte offset in the file, type 2 = object stream number + # >B = 1 byte (unsigned char) for type and generation + # >H = 2 bytes (unsigned short) for value + xref += struct.pack(">B", xref_type) + struct.pack(">H", value) + struct.pack(">B", generation) + xref_encoded = FlateDecode.encode(bytes(xref)) + startxref = len(pdf) + pdf += ( + f"5 0 obj\n<< /Type /XRef /Size 6 /W [1 2 1] /Root 1 0 R /Filter /FlateDecode " + f"/Length {len(xref_encoded)} >>\nstream\n" + ).encode() + pdf += xref_encoded + b"\nendstream\nendobj\n" + pdf += f"startxref\n{startxref}\n%%EOF\n".encode() + + with pytest.raises(LimitReachedError, match=r"^Value /N 5000000 for object 3 exceeds maximum allowed value 2\.$"): + reader = PdfReader(BytesIO(pdf), strict=True) + _ = reader.pages[0] + assert caplog.messages == [] + + with pytest.raises(PdfReadError, match=r"^Maximum recursion depth reached during page flattening\.$"): + reader = PdfReader(BytesIO(pdf), strict=False) + _ = reader.pages[0] + assert caplog.messages == [ + "Value /N 5000000 for object 3 exceeds maximum allowed value 2. Limiting to 2.", + "NumberObject(b'') invalid; use 0 instead", + "NumberObject(b'') invalid; use 0 instead", + ]
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
6- github.com/py-pdf/pypdf/commit/62338e9d36419cf193ccec7331784f45df1d70b3nvdPatchWEB
- github.com/py-pdf/pypdf/pull/3733nvdIssue TrackingPatchWEB
- github.com/py-pdf/pypdf/security/advisories/GHSA-jj6c-8h6c-hppxnvdMitigationPatchVendor AdvisoryWEB
- github.com/advisories/GHSA-jj6c-8h6c-hppxghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-41168ghsaADVISORY
- github.com/py-pdf/pypdf/releases/tag/6.10.1nvdProductRelease NotesWEB
News mentions
0No linked articles in our index yet.