VYPR
Medium severity5.3NVD Advisory· Published Apr 17, 2026· Updated Apr 22, 2026

CVE-2026-40260

CVE-2026-40260

Description

pypdf is a free and open-source pure-python PDF library. In versions prior to 6.10.0, manipulated XMP metadata entity declarations can exhaust RAM. An attacker who exploits this vulnerability can craft a PDF which leads to large memory usage. This requires parsing the XMP metadata. This issue has been fixed in version 6.10.0.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
pypdfPyPI
< 6.10.06.10.0

Affected products

1

Patches

1
b15a374e5ca6

SEC: Disallow custom XML entity declarations for XMP metadata (#3724)

https://github.com/py-pdf/pypdfStefanApr 10, 2026via ghsa
2 files changed · +60 9
  • pypdf/xmp.py+32 3 modified
    @@ -15,9 +15,10 @@
         TypeVar,
         Union,
     )
    -from xml.dom.minidom import Document, parseString
    +from xml.dom.expatbuilder import ExpatBuilderNS
    +from xml.dom.minidom import Document
     from xml.dom.minidom import Element as XmlElement
    -from xml.parsers.expat import ExpatError
    +from xml.parsers.expat import ExpatError, XMLParserType
     
     from ._protocols import XmpInformationProtocol
     from ._utils import StreamType, deprecate_with_replacement, deprecation_no_replacement
    @@ -161,6 +162,34 @@ def _generic_get(
         return None
     
     
    +class _XmpBuilder(ExpatBuilderNS):
    +    """
    +    Custom XML parser denying all entity declarations.
    +
    +    This is a stripped down and typed version inspired by what *defusedxml* does.
    +
    +    Why do we need this? The default limits of *libexpat* used by Python only block exponential entity expansion,
    +    but not cases like quadratic entity expansion which can still cause quite some memory usage.
    +    """
    +
    +    def custom_entity_declaration_handler(
    +            self,
    +            entity_name: str,
    +            is_parameter_entity: bool,
    +            value: Optional[str],
    +            base: Optional[str],
    +            system_id: str,
    +            public_id: Optional[str],
    +            notation_name: Optional[str],
    +    ) -> None:
    +        raise ExpatError(f"Forbidden entities: {entity_name!r}")
    +
    +    def install(self, parser: XMLParserType) -> None:
    +        super().install(parser)
    +
    +        parser.EntityDeclHandler = self.custom_entity_declaration_handler
    +
    +
     class XmpInformation(XmpInformationProtocol, PdfObject):
         """
         An object that represents Extensible Metadata Platform (XMP) metadata.
    @@ -175,7 +204,7 @@ def __init__(self, stream: ContentStream) -> None:
             self.stream = stream
             try:
                 data = self.stream.get_data()
    -            doc_root: Document = parseString(data)  # noqa: S318
    +            doc_root: Document = _XmpBuilder().parseString(data)
             except (AttributeError, ExpatError) as e:
                 raise PdfReadError(f"XML in XmpInformation was invalid: {e}")
             self.rdf_root: XmlElement = doc_root.getElementsByTagNameNS(
    
  • tests/test_xmp.py+28 6 modified
    @@ -906,8 +906,11 @@ def test_xmp_information__external_entity_expansion(tmpdir):
       </rdf:RDF>
     </x:xmpmeta>""".encode())
     
    -    xmp = XmpInformation(stream)
    -    assert xmp.dc_creator == ["abc"]
    +    with pytest.raises(
    +            expected_exception=PdfReadError,
    +            match=r"^XML in XmpInformation was invalid: Forbidden entities: 'xxe'$"
    +    ):
    +        XmpInformation(stream)
     
     
     @pytest.mark.timeout(10)
    @@ -935,9 +938,28 @@ def test_xmp_information__exponential_entity_expansion():
     
         with pytest.raises(
                 expected_exception=PdfReadError,
    -            match=(
    -                r"^XML in XmpInformation was invalid: limit on input amplification factor "
    -                r"\(from DTD and entities\) breached: line 16, column 60$"
    -            )
    +            match=r"^XML in XmpInformation was invalid: Forbidden entities: 'lol'$"
    +    ):
    +        XmpInformation(stream)
    +
    +
    +@pytest.mark.timeout(10)
    +def test_xmp_information__quadratic_entity_expansion():
    +    stream = ContentStream(pdf=None, stream=None)
    +    stream.set_data(f"""<?xml version="1.0"?>
    +<!DOCTYPE lolz [
    +  <!ENTITY a "{'A' * 10_000}">
    +]>
    +<x:xmpmeta xmlns:x="adobe:ns:meta/">
    +  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    +    <rdf:Description rdf:about="">
    +      <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">{'&a;' * 99}</dc:title>
    +    </rdf:Description>
    +  </rdf:RDF>
    +</x:xmpmeta>""".encode())
    +
    +    with pytest.raises(
    +            expected_exception=PdfReadError,
    +            match=r"^XML in XmpInformation was invalid: Forbidden entities: 'a'$"
         ):
             XmpInformation(stream)
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.