VYPR
Moderate severityNVD Advisory· Published Oct 22, 2025· Updated Oct 23, 2025

pypdf manipulated LZWDecode streams can exhaust RAM

CVE-2025-62708

Description

pypdf is a free and open-source pure-python PDF library. Prior to version 6.1.3, an attacker who uses this vulnerability can craft a PDF which leads to large memory usage. This requires parsing the content stream of a page using the LZWDecode filter. This has been fixed in pypdf version 6.1.3.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
pypdfPyPI
< 6.1.36.1.3

Affected products

1

Patches

1
e51d07807ffc

SEC: Allow limiting size of LZWDecode streams (#3502)

https://github.com/py-pdf/pypdfStefanOct 22, 2025via ghsa
3 files changed · +36 9
  • pypdf/_codecs/_codecs.py+18 7 modified
    @@ -9,6 +9,7 @@
     from abc import ABC, abstractmethod
     
     from pypdf._utils import logger_warning
    +from pypdf.errors import LimitReachedError
     
     
     class Codec(ABC):
    @@ -49,6 +50,9 @@ class LzwCodec(Codec):
         INITIAL_BITS_PER_CODE = 9  # Initial code bit width
         MAX_BITS_PER_CODE = 12  # Maximum code bit width
     
    +    def __init__(self, max_output_length: int = 1_000_000_000) -> None:
    +        self.max_output_length = max_output_length
    +
         def _initialize_encoding_table(self) -> None:
             """Initialize the encoding table and state to initial conditions."""
             self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
    @@ -217,6 +221,7 @@ def decode(self, data: bytes) -> bytes:
             self._next_bits = 0
     
             output_stream = io.BytesIO()
    +        output_length = 0
     
             self._initialize_decoding_table()
             self._byte_pointer = 0
    @@ -234,23 +239,29 @@ def decode(self, data: bytes) -> bytes:
                     code = self._next_code_decode(data)
                     if code == self.EOD_MARKER:
                         break
    -                output_stream.write(self.decoding_table[code])
    +                output_stream.write(decoded := self.decoding_table[code])
                     old_code = code
                 elif code < self._table_index:
    -                string = self.decoding_table[code]
    -                output_stream.write(string)
    +                decoded = self.decoding_table[code]
    +                output_stream.write(decoded)
                     if old_code != self.CLEAR_TABLE_MARKER:
    -                    self._add_entry_decode(self.decoding_table[old_code], string[0])
    +                    self._add_entry_decode(self.decoding_table[old_code], decoded[0])
                     old_code = code
                 else:
                     # The code is not in the table and not one of the special codes
    -                string = (
    +                decoded = (
                         self.decoding_table[old_code] + self.decoding_table[old_code][:1]
                     )
    -                output_stream.write(string)
    -                self._add_entry_decode(self.decoding_table[old_code], string[0])
    +                output_stream.write(decoded)
    +                self._add_entry_decode(self.decoding_table[old_code], decoded[0])
                     old_code = code
     
    +            output_length += len(decoded)
    +            if output_length > self.max_output_length:
    +                raise LimitReachedError(
    +                    f"Limit reached while decompressing: {output_length} > {self.max_output_length}"
    +                )
    +
             return output_stream.getvalue()
     
         def _add_entry_decode(self, old_string: bytes, new_char: int) -> None:
    
  • pypdf/filters.py+2 1 modified
    @@ -71,6 +71,7 @@
     )
     
     ZLIB_MAX_OUTPUT_LENGTH = 75_000_000
    +LZW_MAX_OUTPUT_LENGTH = 1_000_000_000
     
     
     def _decompress_with_limit(data: bytes) -> bytes:
    @@ -435,7 +436,7 @@ def __init__(self, data: bytes) -> None:
                 self.data = data
     
             def decode(self) -> bytes:
    -            return _LzwCodec().decode(self.data)
    +            return _LzwCodec(max_output_length=LZW_MAX_OUTPUT_LENGTH).decode(self.data)
     
         @staticmethod
         def decode(
    
  • tests/test_codecs.py+16 1 modified
    @@ -1,10 +1,12 @@
     """Test LZW-related code."""
    -
    +from io import BytesIO
     from pathlib import Path
     
     import pytest
     
    +from pypdf import PdfReader
     from pypdf._codecs._codecs import LzwCodec
    +from pypdf.errors import LimitReachedError
     
     from . import get_data_from_url
     
    @@ -80,3 +82,16 @@ def test_lzw_decoder_table_overflow(caplog):
     @pytest.mark.timeout(timeout=15, method="thread")
     def test_lzw_decoder_large_stream_performance(caplog):
         LzwCodec().decode(get_data_from_url(name="large_lzw_example_encoded.dat"))
    +
    +
    +@pytest.mark.enable_socket
    +def test_lzw_decoder__output_limit():
    +    url = "https://github.com/user-attachments/files/23057035/lzw__output_limit.pdf"
    +    name = "lzw__output_limit.pdf"
    +    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    +    page = reader.pages[0]
    +
    +    with pytest.raises(
    +            expected_exception=LimitReachedError, match=r"^Limit reached while decompressing: 1000000170 > 1000000000$"
    +    ):
    +        page.images[0].image.load()
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.