Moderate severityNVD Advisory· Published Oct 22, 2025· Updated Oct 23, 2025
pypdf manipulated LZWDecode streams can exhaust RAM
CVE-2025-62708
Description
pypdf is a free and open-source pure-python PDF library. Prior to version 6.1.3, an attacker who uses this vulnerability can craft a PDF which leads to large memory usage. This requires parsing the content stream of a page using the LZWDecode filter. This has been fixed in pypdf version 6.1.3.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
pypdfPyPI | < 6.1.3 | 6.1.3 |
Affected products
1Patches
1e51d07807ffcSEC: Allow limiting size of LZWDecode streams (#3502)
3 files changed · +36 −9
pypdf/_codecs/_codecs.py+18 −7 modified@@ -9,6 +9,7 @@ from abc import ABC, abstractmethod from pypdf._utils import logger_warning +from pypdf.errors import LimitReachedError class Codec(ABC): @@ -49,6 +50,9 @@ class LzwCodec(Codec): INITIAL_BITS_PER_CODE = 9 # Initial code bit width MAX_BITS_PER_CODE = 12 # Maximum code bit width + def __init__(self, max_output_length: int = 1_000_000_000) -> None: + self.max_output_length = max_output_length + def _initialize_encoding_table(self) -> None: """Initialize the encoding table and state to initial conditions.""" self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)} @@ -217,6 +221,7 @@ def decode(self, data: bytes) -> bytes: self._next_bits = 0 output_stream = io.BytesIO() + output_length = 0 self._initialize_decoding_table() self._byte_pointer = 0 @@ -234,23 +239,29 @@ def decode(self, data: bytes) -> bytes: code = self._next_code_decode(data) if code == self.EOD_MARKER: break - output_stream.write(self.decoding_table[code]) + output_stream.write(decoded := self.decoding_table[code]) old_code = code elif code < self._table_index: - string = self.decoding_table[code] - output_stream.write(string) + decoded = self.decoding_table[code] + output_stream.write(decoded) if old_code != self.CLEAR_TABLE_MARKER: - self._add_entry_decode(self.decoding_table[old_code], string[0]) + self._add_entry_decode(self.decoding_table[old_code], decoded[0]) old_code = code else: # The code is not in the table and not one of the special codes - string = ( + decoded = ( self.decoding_table[old_code] + self.decoding_table[old_code][:1] ) - output_stream.write(string) - self._add_entry_decode(self.decoding_table[old_code], string[0]) + output_stream.write(decoded) + self._add_entry_decode(self.decoding_table[old_code], decoded[0]) old_code = code + output_length += len(decoded) + if output_length > self.max_output_length: + raise LimitReachedError( + f"Limit reached while decompressing: {output_length} > {self.max_output_length}" + ) + return output_stream.getvalue() def _add_entry_decode(self, old_string: bytes, new_char: int) -> None:
pypdf/filters.py+2 −1 modified@@ -71,6 +71,7 @@ ) ZLIB_MAX_OUTPUT_LENGTH = 75_000_000 +LZW_MAX_OUTPUT_LENGTH = 1_000_000_000 def _decompress_with_limit(data: bytes) -> bytes: @@ -435,7 +436,7 @@ def __init__(self, data: bytes) -> None: self.data = data def decode(self) -> bytes: - return _LzwCodec().decode(self.data) + return _LzwCodec(max_output_length=LZW_MAX_OUTPUT_LENGTH).decode(self.data) @staticmethod def decode(
tests/test_codecs.py+16 −1 modified@@ -1,10 +1,12 @@ """Test LZW-related code.""" - +from io import BytesIO from pathlib import Path import pytest +from pypdf import PdfReader from pypdf._codecs._codecs import LzwCodec +from pypdf.errors import LimitReachedError from . import get_data_from_url @@ -80,3 +82,16 @@ def test_lzw_decoder_table_overflow(caplog): @pytest.mark.timeout(timeout=15, method="thread") def test_lzw_decoder_large_stream_performance(caplog): LzwCodec().decode(get_data_from_url(name="large_lzw_example_encoded.dat")) + + +@pytest.mark.enable_socket +def test_lzw_decoder__output_limit(): + url = "https://github.com/user-attachments/files/23057035/lzw__output_limit.pdf" + name = "lzw__output_limit.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + page = reader.pages[0] + + with pytest.raises( + expected_exception=LimitReachedError, match=r"^Limit reached while decompressing: 1000000170 > 1000000000$" + ): + page.images[0].image.load()
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
6- github.com/advisories/GHSA-jfx9-29x2-rv3jghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2025-62708ghsaADVISORY
- github.com/py-pdf/pypdf/commit/e51d07807ffcdaf18077b9486dadb3dc05b368daghsax_refsource_MISCWEB
- github.com/py-pdf/pypdf/pull/3502ghsax_refsource_MISCWEB
- github.com/py-pdf/pypdf/releases/tag/6.1.3ghsax_refsource_MISCWEB
- github.com/py-pdf/pypdf/security/advisories/GHSA-jfx9-29x2-rv3jghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.