VYPR
Moderate severityNVD Advisory· Published Feb 20, 2026· Updated Feb 24, 2026

pypdf has possible long runtimes/large memory usage for large /ToUnicode streams

CVE-2026-27025

Description

pypdf is a free and open-source pure-python PDF library. Prior to 6.7.1, an attacker who uses this vulnerability can craft a PDF which leads to long runtimes and large memory consumption. This requires parsing the /ToUnicode entry of a font with unusually large values, for example during text extraction. This vulnerability is fixed in 6.7.1.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
pypdfPyPI
< 6.7.16.7.1

Affected products

1

Patches

1
77d7b8d7cfbe

SEC: Limit size of `/ToUnicode` entries (#3646)

https://github.com/py-pdf/pypdfStefanFeb 17, 2026via ghsa
2 files changed · +109 2
  • pypdf/_cmap.py+20 0 modified
    @@ -6,6 +6,7 @@
     
     from ._codecs import adobe_glyphs, charset_encoding
     from ._utils import logger_error, logger_warning
    +from .errors import LimitReachedError
     from .generic import (
         DecodedStreamObject,
         DictionaryObject,
    @@ -217,6 +218,15 @@ def process_cm_line(
         return process_rg, process_char, multiline_rg
     
     
    +# Usual values should be up to 65_536.
    +MAPPING_DICTIONARY_SIZE_LIMIT = 100_000
    +
    +
    +def _check_mapping_size(size: int) -> None:
    +    if size > MAPPING_DICTIONARY_SIZE_LIMIT:
    +        raise LimitReachedError(f"Maximum /ToUnicode size limit reached: {size} > {MAPPING_DICTIONARY_SIZE_LIMIT}.")
    +
    +
     def parse_bfrange(
         line: bytes,
         map_dict: dict[Any, Any],
    @@ -225,6 +235,8 @@ def parse_bfrange(
     ) -> Union[None, tuple[int, int]]:
         lst = [x for x in line.split(b" ") if x]
         closure_found = False
    +    entry_count = len(int_entry)
    +    _check_mapping_size(entry_count)
         if multiline_rg is not None:
             fmt = b"%%0%dX" % (map_dict[-1] * 2)
             a = multiline_rg[0]  # a, b not in the current line
    @@ -233,6 +245,8 @@ def parse_bfrange(
                 if sq == b"]":
                     closure_found = True
                     break
    +            entry_count += 1
    +            _check_mapping_size(entry_count)
                 map_dict[
                     unhexlify(fmt % a).decode(
                         "charmap" if map_dict[-1] == 1 else "utf-16-be",
    @@ -252,6 +266,8 @@ def parse_bfrange(
                     if sq == b"]":
                         closure_found = True
                         break
    +                entry_count += 1
    +                _check_mapping_size(entry_count)
                     map_dict[
                         unhexlify(fmt % a).decode(
                             "charmap" if map_dict[-1] == 1 else "utf-16-be",
    @@ -264,6 +280,8 @@ def parse_bfrange(
                 c = int(lst[2], 16)
                 fmt2 = b"%%0%dX" % max(4, len(lst[2]))
                 closure_found = True
    +            range_size = max(0, b - a + 1)
    +            _check_mapping_size(entry_count + range_size)  # This can be checked beforehand.
                 while a <= b:
                     map_dict[
                         unhexlify(fmt % a).decode(
    @@ -279,6 +297,8 @@ def parse_bfrange(
     
     def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
         lst = [x for x in line.split(b" ") if x]
    +    new_count = len(lst) // 2
    +    _check_mapping_size(len(int_entry) + new_count)  # This can be checked beforehand.
         map_dict[-1] = len(lst[0]) // 2
         while len(lst) > 1:
             map_to = ""
    
  • tests/test_cmap.py+89 2 modified
    @@ -4,10 +4,11 @@
     import pytest
     
     from pypdf import PdfReader, PdfWriter
    -from pypdf._cmap import get_encoding, parse_bfchar
    +from pypdf._cmap import get_encoding, parse_bfchar, parse_bfrange
     from pypdf._codecs import charset_encoding
     from pypdf._font import Font
    -from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject, NameObject, NullObject
    +from pypdf.errors import LimitReachedError
    +from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject, NameObject, NullObject, StreamObject
     
     from . import RESOURCE_ROOT, get_data_from_url
     
    @@ -335,3 +336,89 @@ def test_parse_bfchar(caplog):
         assert map_dict == {-1: 2, "ծ": "", "վ": "ጷ"}
         assert int_entry == [1406, 1390]
         assert caplog.messages == ["Got invalid hex string: Odd-length string (b'1f310')"]
    +
    +
    +def test_parse_bfrange__iteration_limit():
    +    writer = PdfWriter()
    +
    +    to_unicode = StreamObject()
    +    to_unicode.set_data(
    +        b"beginbfrange\n"
    +        b"<00000000> <001FFFFF> <00000000>\n"
    +        b"endbfrange\n"
    +    )
    +    font = writer._add_object(DictionaryObject({
    +        NameObject("/Type"): NameObject("/Font"),
    +        NameObject("/Subtype"): NameObject("/Type1"),
    +        NameObject("/BaseFont"): NameObject("/Helvetica"),
    +        NameObject("/ToUnicode"): to_unicode,
    +    }))
    +
    +    page = writer.add_blank_page(width=100, height=100)
    +    page[NameObject("/Resources")] = DictionaryObject({
    +        NameObject("/Font"): DictionaryObject({
    +            NameObject("/F1"): font.indirect_reference,
    +        })
    +    })
    +
    +    # Case without list, exceeding list directly.
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 2097152 > 100000\.$"
    +    ):
    +        _ = page.extract_text()
    +
    +    # Use a pre-filled dummy list to simulate multiple calls where the upper bound does
    +    # not overflow, but the overall size does. Case without list.
    +    int_entry = [0] * 99_999
    +    map_dict = {}
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 165535 > 100000\.$"
    +    ):
    +        _ = parse_bfrange(line=b"0000 FFFF 0000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
    +    assert map_dict == {-1: 2}
    +
    +    # Exceeding from previous call.
    +    int_entry.append(1)
    +    map_dict = {}
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
    +    ):
    +        _ = parse_bfrange(line=b"00000000 00000000 00000000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
    +    assert map_dict == {-1: 4}
    +
    +    # multiline_rg
    +    int_entry = [0] * 99_995
    +    map_dict = {-1: 1}
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
    +    ):
    +        _ = parse_bfrange(
    +            line=b"0020  0021  0022  0023  0024  0025  0026  2019",
    +            map_dict=map_dict, int_entry=int_entry, multiline_rg=(32, 251)
    +        )
    +    assert map_dict == {-1: 1, " ": " ", "!": "!", '"': '"', "#": "#", "$": "$"}
    +
    +    # No multiline_rg, but list.
    +    int_entry = [0] * 99_995
    +    map_dict = {}
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
    +    ):
    +        _ = parse_bfrange(
    +            line=b"01 8A [ FFFD FFFD FFFD FFFF FFAB AAAA BBBB",
    +            map_dict=map_dict, int_entry=int_entry, multiline_rg=None
    +        )
    +    assert map_dict == {-1: 1, "\x01": "�", "\x02": "�", "\x03": "�", "\x04": "\uffff", "\x05": "ᆱ"}
    +
    +
    +def test_parse_bfchar__iteration_limit():
    +    int_entry = [0] * 99_995
    +    map_dict = {}
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100002 > 100000\.$"
    +    ):
    +        parse_bfchar(
    +            line=b"0003   0020   0008   0025   0009   0026   000A   0027   000B   0028   000C   0029   000D   002A",
    +            map_dict=map_dict, int_entry=int_entry,
    +        )
    +    assert map_dict == {}
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.