Moderate severityNVD Advisory· Published Feb 20, 2026· Updated Feb 24, 2026
pypdf has possible long runtimes/large memory usage for large /ToUnicode streams
CVE-2026-27025
Description
pypdf is a free and open-source pure-python PDF library. Prior to 6.7.1, an attacker who uses this vulnerability can craft a PDF which leads to long runtimes and large memory consumption. This requires parsing the /ToUnicode entry of a font with unusually large values, for example during text extraction. This vulnerability is fixed in 6.7.1.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
pypdfPyPI | < 6.7.1 | 6.7.1 |
Affected products
1Patches
177d7b8d7cfbeSEC: Limit size of `/ToUnicode` entries (#3646)
2 files changed · +109 −2
pypdf/_cmap.py+20 −0 modified@@ -6,6 +6,7 @@ from ._codecs import adobe_glyphs, charset_encoding from ._utils import logger_error, logger_warning +from .errors import LimitReachedError from .generic import ( DecodedStreamObject, DictionaryObject, @@ -217,6 +218,15 @@ def process_cm_line( return process_rg, process_char, multiline_rg +# Usual values should be up to 65_536. +MAPPING_DICTIONARY_SIZE_LIMIT = 100_000 + + +def _check_mapping_size(size: int) -> None: + if size > MAPPING_DICTIONARY_SIZE_LIMIT: + raise LimitReachedError(f"Maximum /ToUnicode size limit reached: {size} > {MAPPING_DICTIONARY_SIZE_LIMIT}.") + + def parse_bfrange( line: bytes, map_dict: dict[Any, Any], @@ -225,6 +235,8 @@ def parse_bfrange( ) -> Union[None, tuple[int, int]]: lst = [x for x in line.split(b" ") if x] closure_found = False + entry_count = len(int_entry) + _check_mapping_size(entry_count) if multiline_rg is not None: fmt = b"%%0%dX" % (map_dict[-1] * 2) a = multiline_rg[0] # a, b not in the current line @@ -233,6 +245,8 @@ def parse_bfrange( if sq == b"]": closure_found = True break + entry_count += 1 + _check_mapping_size(entry_count) map_dict[ unhexlify(fmt % a).decode( "charmap" if map_dict[-1] == 1 else "utf-16-be", @@ -252,6 +266,8 @@ def parse_bfrange( if sq == b"]": closure_found = True break + entry_count += 1 + _check_mapping_size(entry_count) map_dict[ unhexlify(fmt % a).decode( "charmap" if map_dict[-1] == 1 else "utf-16-be", @@ -264,6 +280,8 @@ def parse_bfrange( c = int(lst[2], 16) fmt2 = b"%%0%dX" % max(4, len(lst[2])) closure_found = True + range_size = max(0, b - a + 1) + _check_mapping_size(entry_count + range_size) # This can be checked beforehand. while a <= b: map_dict[ unhexlify(fmt % a).decode( @@ -279,6 +297,8 @@ def parse_bfrange( def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None: lst = [x for x in line.split(b" ") if x] + new_count = len(lst) // 2 + _check_mapping_size(len(int_entry) + new_count) # This can be checked beforehand. map_dict[-1] = len(lst[0]) // 2 while len(lst) > 1: map_to = ""
tests/test_cmap.py+89 −2 modified@@ -4,10 +4,11 @@ import pytest from pypdf import PdfReader, PdfWriter -from pypdf._cmap import get_encoding, parse_bfchar +from pypdf._cmap import get_encoding, parse_bfchar, parse_bfrange from pypdf._codecs import charset_encoding from pypdf._font import Font -from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject, NameObject, NullObject +from pypdf.errors import LimitReachedError +from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject, NameObject, NullObject, StreamObject from . import RESOURCE_ROOT, get_data_from_url @@ -335,3 +336,89 @@ def test_parse_bfchar(caplog): assert map_dict == {-1: 2, "ծ": "", "վ": "ጷ"} assert int_entry == [1406, 1390] assert caplog.messages == ["Got invalid hex string: Odd-length string (b'1f310')"] + + +def test_parse_bfrange__iteration_limit(): + writer = PdfWriter() + + to_unicode = StreamObject() + to_unicode.set_data( + b"beginbfrange\n" + b"<00000000> <001FFFFF> <00000000>\n" + b"endbfrange\n" + ) + font = writer._add_object(DictionaryObject({ + NameObject("/Type"): NameObject("/Font"), + NameObject("/Subtype"): NameObject("/Type1"), + NameObject("/BaseFont"): NameObject("/Helvetica"), + NameObject("/ToUnicode"): to_unicode, + })) + + page = writer.add_blank_page(width=100, height=100) + page[NameObject("/Resources")] = DictionaryObject({ + NameObject("/Font"): DictionaryObject({ + NameObject("/F1"): font.indirect_reference, + }) + }) + + # Case without list, exceeding list directly. + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 2097152 > 100000\.$" + ): + _ = page.extract_text() + + # Use a pre-filled dummy list to simulate multiple calls where the upper bound does + # not overflow, but the overall size does. Case without list. + int_entry = [0] * 99_999 + map_dict = {} + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 165535 > 100000\.$" + ): + _ = parse_bfrange(line=b"0000 FFFF 0000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None) + assert map_dict == {-1: 2} + + # Exceeding from previous call. + int_entry.append(1) + map_dict = {} + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$" + ): + _ = parse_bfrange(line=b"00000000 00000000 00000000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None) + assert map_dict == {-1: 4} + + # multiline_rg + int_entry = [0] * 99_995 + map_dict = {-1: 1} + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$" + ): + _ = parse_bfrange( + line=b"0020 0021 0022 0023 0024 0025 0026 2019", + map_dict=map_dict, int_entry=int_entry, multiline_rg=(32, 251) + ) + assert map_dict == {-1: 1, " ": " ", "!": "!", '"': '"', "#": "#", "$": "$"} + + # No multiline_rg, but list. + int_entry = [0] * 99_995 + map_dict = {} + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$" + ): + _ = parse_bfrange( + line=b"01 8A [ FFFD FFFD FFFD FFFF FFAB AAAA BBBB", + map_dict=map_dict, int_entry=int_entry, multiline_rg=None + ) + assert map_dict == {-1: 1, "\x01": "�", "\x02": "�", "\x03": "�", "\x04": "\uffff", "\x05": "ᆱ"} + + +def test_parse_bfchar__iteration_limit(): + int_entry = [0] * 99_995 + map_dict = {} + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100002 > 100000\.$" + ): + parse_bfchar( + line=b"0003 0020 0008 0025 0009 0026 000A 0027 000B 0028 000C 0029 000D 002A", + map_dict=map_dict, int_entry=int_entry, + ) + assert map_dict == {}
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
6- github.com/advisories/GHSA-wgvp-vg3v-2xq3ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-27025ghsaADVISORY
- github.com/py-pdf/pypdf/commit/77d7b8d7cfbe8dd179858dfa42666f73fc6e57a2ghsax_refsource_MISCWEB
- github.com/py-pdf/pypdf/pull/3646ghsax_refsource_MISCWEB
- github.com/py-pdf/pypdf/releases/tag/6.7.1ghsax_refsource_MISCWEB
- github.com/py-pdf/pypdf/security/advisories/GHSA-wgvp-vg3v-2xq3ghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.