Medium severity5.3NVD Advisory· Published Apr 17, 2026· Updated Apr 22, 2026
CVE-2026-40260
CVE-2026-40260
Description
pypdf is a free and open-source pure-python PDF library. In versions prior to 6.10.0, manipulated XMP metadata entity declarations can exhaust RAM. An attacker who exploits this vulnerability can craft a PDF which leads to large memory usage. This requires parsing the XMP metadata. This issue has been fixed in version 6.10.0.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
pypdfPyPI | < 6.10.0 | 6.10.0 |
Affected products
1Patches
1b15a374e5ca6SEC: Disallow custom XML entity declarations for XMP metadata (#3724)
2 files changed · +60 −9
pypdf/xmp.py+32 −3 modified@@ -15,9 +15,10 @@ TypeVar, Union, ) -from xml.dom.minidom import Document, parseString +from xml.dom.expatbuilder import ExpatBuilderNS +from xml.dom.minidom import Document from xml.dom.minidom import Element as XmlElement -from xml.parsers.expat import ExpatError +from xml.parsers.expat import ExpatError, XMLParserType from ._protocols import XmpInformationProtocol from ._utils import StreamType, deprecate_with_replacement, deprecation_no_replacement @@ -161,6 +162,34 @@ def _generic_get( return None +class _XmpBuilder(ExpatBuilderNS): + """ + Custom XML parser denying all entity declarations. + + This is a stripped down and typed version inspired by what *defusedxml* does. + + Why do we need this? The default limits of *libexpat* used by Python only block exponential entity expansion, + but not cases like quadratic entity expansion which can still cause quite some memory usage. + """ + + def custom_entity_declaration_handler( + self, + entity_name: str, + is_parameter_entity: bool, + value: Optional[str], + base: Optional[str], + system_id: str, + public_id: Optional[str], + notation_name: Optional[str], + ) -> None: + raise ExpatError(f"Forbidden entities: {entity_name!r}") + + def install(self, parser: XMLParserType) -> None: + super().install(parser) + + parser.EntityDeclHandler = self.custom_entity_declaration_handler + + class XmpInformation(XmpInformationProtocol, PdfObject): """ An object that represents Extensible Metadata Platform (XMP) metadata. @@ -175,7 +204,7 @@ def __init__(self, stream: ContentStream) -> None: self.stream = stream try: data = self.stream.get_data() - doc_root: Document = parseString(data) # noqa: S318 + doc_root: Document = _XmpBuilder().parseString(data) except (AttributeError, ExpatError) as e: raise PdfReadError(f"XML in XmpInformation was invalid: {e}") self.rdf_root: XmlElement = doc_root.getElementsByTagNameNS(
tests/test_xmp.py+28 −6 modified@@ -906,8 +906,11 @@ def test_xmp_information__external_entity_expansion(tmpdir): </rdf:RDF> </x:xmpmeta>""".encode()) - xmp = XmpInformation(stream) - assert xmp.dc_creator == ["abc"] + with pytest.raises( + expected_exception=PdfReadError, + match=r"^XML in XmpInformation was invalid: Forbidden entities: 'xxe'$" + ): + XmpInformation(stream) @pytest.mark.timeout(10) @@ -935,9 +938,28 @@ def test_xmp_information__exponential_entity_expansion(): with pytest.raises( expected_exception=PdfReadError, - match=( - r"^XML in XmpInformation was invalid: limit on input amplification factor " - r"\(from DTD and entities\) breached: line 16, column 60$" - ) + match=r"^XML in XmpInformation was invalid: Forbidden entities: 'lol'$" + ): + XmpInformation(stream) + + +@pytest.mark.timeout(10) +def test_xmp_information__quadratic_entity_expansion(): + stream = ContentStream(pdf=None, stream=None) + stream.set_data(f"""<?xml version="1.0"?> +<!DOCTYPE lolz [ + <!ENTITY a "{'A' * 10_000}"> +]> +<x:xmpmeta xmlns:x="adobe:ns:meta/"> + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + <rdf:Description rdf:about=""> + <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">{'&a;' * 99}</dc:title> + </rdf:Description> + </rdf:RDF> +</x:xmpmeta>""".encode()) + + with pytest.raises( + expected_exception=PdfReadError, + match=r"^XML in XmpInformation was invalid: Forbidden entities: 'a'$" ): XmpInformation(stream)
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
6- github.com/py-pdf/pypdf/commit/b15a374e5ca648d4878e57c3b2c0551e7f8cc7f8nvdPatchWEB
- github.com/py-pdf/pypdf/pull/3724nvdIssue TrackingPatchWEB
- github.com/py-pdf/pypdf/security/advisories/GHSA-3crg-w4f6-42mxnvdMitigationPatchVendor AdvisoryWEB
- github.com/advisories/GHSA-3crg-w4f6-42mxghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-40260ghsaADVISORY
- github.com/py-pdf/pypdf/releases/tag/6.10.0nvdProductRelease NotesWEB
News mentions
0No linked articles in our index yet.