High severityNVD Advisory· Published Feb 19, 2024· Updated Feb 13, 2025
CBOR2 decoder has potential buffer overflow
CVE-2024-26134
Description
cbor2 provides encoding and decoding for the Concise Binary Object Representation (CBOR) (RFC 8949) serialization format. Starting in version 5.5.1 and prior to version 5.6.2, an attacker can crash a service using cbor2 to parse a CBOR binary by sending a long enough object. Version 5.6.2 contains a patch for this issue.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
cbor2PyPI | >= 5.5.1, < 5.6.2 | 5.6.2 |
Affected products
1Patches
24de6991ba29bCheck PyObject_Hash() return value for errors
2 files changed · +7 −0
docs/versionhistory.rst+5 −0 modified@@ -5,6 +5,11 @@ Version history This library adheres to `Semantic Versioning <http://semver.org/>`_. +**UNRELEASED** + +- Fixed ``__hash__()`` of the C version of the ``CBORTag`` type crashing when there's a recursive + reference cycle + **5.6.1** (2024-02-01) - Fixed use-after-free in the decoder's C version when prematurely encountering the end of stream
source/tags.c+2 −0 modified@@ -182,6 +182,8 @@ CBORTag_hash(CBORTagObject *self) goto exit; ret = PyObject_Hash(tmp); + if (ret == -1) + goto exit; // Remove id(self) from thread_locals.running_hashes if (PySet_Discard(running_hashes, self_id) == -1) {
387755eacf0bFixed MemoryError when decoding large definite strings (#204)
6 files changed · +263 −47
cbor2/_decoder.py+27 −2 modified@@ -3,6 +3,7 @@ import re import struct import sys +from codecs import getincrementaldecoder from collections.abc import Callable, Mapping, Sequence from datetime import date, datetime, timedelta, timezone from io import BytesIO @@ -31,6 +32,7 @@ timestamp_re = re.compile( r"^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)" r"(?:\.(\d{1,6})\d*)?(?:Z|([+-])(\d\d):(\d\d))$" ) +incremental_utf8_decoder = getincrementaldecoder("utf-8") class CBORDecoder: @@ -305,8 +307,19 @@ def decode_bytestring(self, subtype: int) -> bytes: else: if length > sys.maxsize: raise CBORDecodeValueError("invalid length for bytestring 0x%x" % length) + elif length <= 65536: + result = self.read(length) + else: + # Read large bytestrings 65536 (2 ** 16) bytes at a time + left = length + buffer = bytearray() + while left: + chunk_size = min(left, 65536) + buffer.extend(self.read(chunk_size)) + left -= chunk_size + + result = bytes(buffer) - result = self.read(length) self._stringref_namespace_add(result, length) return self.set_shareable(result) @@ -350,7 +363,19 @@ def decode_string(self, subtype: int) -> str: if length > sys.maxsize: raise CBORDecodeValueError("invalid length for string 0x%x" % length) - result = self.read(length).decode("utf-8", self._str_errors) + if length <= 65536: + result = self.read(length).decode("utf-8", self._str_errors) + else: + # Read and decode large text strings 65536 (2 ** 16) bytes at a time + codec = incremental_utf8_decoder(self._str_errors) + left = length + result = "" + while left: + chunk_size = min(left, 65536) + final = left <= chunk_size + result += codec.decode(self.read(chunk_size), final) + left -= chunk_size + self._stringref_namespace_add(result, length) return self.set_shareable(result)
docs/versionhistory.rst+3 −1 modified@@ -8,14 +8,16 @@ This library adheres to `Semantic Versioning <http://semver.org/>`_. **UNRELEASED** - Added the ``cbor2`` command line tool (for ``pipx run cbor2``) +- Added support for native date encoding (bschoenmaeckers) - Fixed ``SystemError`` in the C extension when decoding a ``Fractional`` with a bad number of arguments - Fixed ``SystemError`` in the C extension when the decoder object hook raises an exception - Fixed a segmentation fault when decoding invalid unicode data - Fixed infinite recursion when trying to hash a CBOR tag whose value points to the tag itself -- Added support for native date encoding (bschoenmaeckers) +- Fixed ``MemoryError`` when maliciously constructed bytestrings or string (declared to be absurdly + large) are being decoded **5.5.1** (2023-11-02)
setup.py+1 −1 modified@@ -57,7 +57,7 @@ def check_libc(): "source/tags.c", "source/halffloat.c", ], - optional=True, + # optional=True, ) kwargs = {"ext_modules": [_cbor2]} else:
source/decoder.c+195 −37 modified@@ -348,31 +348,44 @@ _CBORDecoder_get_immutable(CBORDecoderObject *self, void *closure) // Utility functions ///////////////////////////////////////////////////////// -static int -fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size) +static PyObject * +fp_read_object(CBORDecoderObject *self, const Py_ssize_t size) { + PyObject *ret = NULL; PyObject *obj, *size_obj; - char *data; - int ret = -1; - size_obj = PyLong_FromSsize_t(size); if (size_obj) { obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL); + Py_DECREF(size_obj); if (obj) { assert(PyBytes_CheckExact(obj)); if (PyBytes_GET_SIZE(obj) == (Py_ssize_t) size) { - data = PyBytes_AS_STRING(obj); - memcpy(buf, data, size); - ret = 0; + ret = obj; } else { + Py_DECREF(obj); PyErr_Format( _CBOR2_CBORDecodeEOF, "premature end of stream (expected to read %zd bytes, " "got %zd instead)", size, PyBytes_GET_SIZE(obj)); } - Py_DECREF(obj); } - Py_DECREF(size_obj); + } + return ret; +} + + +static int +fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size) +{ + int ret = -1; + PyObject *obj = fp_read_object(self, size); + if (obj) { + char *data = PyBytes_AS_STRING(obj); + if (data) { + memcpy(buf, data, size); + ret = 0; + } + Py_DECREF(obj); } return ret; } @@ -538,17 +551,12 @@ decode_negint(CBORDecoderObject *self, uint8_t subtype) static PyObject * -decode_definite_bytestring(CBORDecoderObject *self, Py_ssize_t length) +decode_definite_short_bytestring(CBORDecoderObject *self, Py_ssize_t length) { - PyObject *ret = NULL; - - ret = PyBytes_FromStringAndSize(NULL, length); + PyObject *ret = fp_read_object(self, length); if (!ret) return NULL; - if (fp_read(self, PyBytes_AS_STRING(ret), length) == -1) { - Py_DECREF(ret); - return NULL; - } + if (string_namespace_add(self, ret, length) == -1) { Py_DECREF(ret); return NULL; @@ -557,6 +565,56 @@ decode_definite_bytestring(CBORDecoderObject *self, Py_ssize_t length) } +static PyObject * +decode_definite_long_bytestring(CBORDecoderObject *self, Py_ssize_t length) +{ + PyObject *buffer = NULL; + Py_ssize_t left = length; + while (left) { + Py_ssize_t chunk_length = length <= 65536 ? length : 65536; + PyObject *chunk = fp_read_object(self, chunk_length); + if (!chunk) { + break; + } + + if (!PyBytes_CheckExact(chunk)) { + Py_DECREF(chunk); + break; + } + + if (buffer) { + PyObject *new_buffer = PyByteArray_Concat(buffer, chunk); + Py_DECREF(chunk); + if (!new_buffer) + break; + + if (new_buffer != buffer) { + Py_DECREF(buffer); + buffer = new_buffer; + } + } else { + buffer = PyByteArray_FromObject(chunk); + Py_DECREF(chunk); + if (!buffer) + break; + } + left -= chunk_length; + } + + PyObject *ret = NULL; + if (buffer) { + ret = PyBytes_FromObject(buffer); + Py_DECREF(buffer); + + if (ret && string_namespace_add(self, ret, length) == -1) { + Py_DECREF(ret); + ret = NULL; + } + } + return ret; +} + + static PyObject * decode_indefinite_bytestrings(CBORDecoderObject *self) { @@ -615,9 +673,14 @@ decode_bytestring(CBORDecoderObject *self, uint8_t subtype) } if (indefinite) ret = decode_indefinite_bytestrings(self); + else if (length <= 65536) + ret = decode_definite_short_bytestring(self, (Py_ssize_t)length); else - ret = decode_definite_bytestring(self, (Py_ssize_t)length); - set_shareable(self, ret); + ret = decode_definite_long_bytestring(self, (Py_ssize_t)length); + + if (ret) + set_shareable(self, ret); + return ret; } @@ -637,31 +700,121 @@ decode_bytestring(CBORDecoderObject *self, uint8_t subtype) static PyObject * -decode_definite_string(CBORDecoderObject *self, Py_ssize_t length) +decode_definite_short_string(CBORDecoderObject *self, Py_ssize_t length) { - PyObject *ret = NULL; - char *buf; - - buf = PyMem_Malloc(length); - if (!buf) - return PyErr_NoMemory(); - - if (fp_read(self, buf, length) == 0) - ret = PyUnicode_DecodeUTF8( - buf, length, PyBytes_AS_STRING(self->str_errors)); - PyMem_Free(buf); - - if (!ret) + PyObject *bytes_obj = fp_read_object(self, length); + if (!bytes_obj) return NULL; - if (string_namespace_add(self, ret, length) == -1) { + const char *bytes = PyBytes_AS_STRING(bytes_obj); + PyObject *ret = PyUnicode_FromStringAndSize(bytes, length); + Py_DECREF(bytes_obj); + if (ret && string_namespace_add(self, ret, length) == -1) { Py_DECREF(ret); return NULL; } return ret; } +static PyObject * +decode_definite_long_string(CBORDecoderObject *self, Py_ssize_t length) +{ + PyObject *ret = NULL, *chunk = NULL, *string = NULL; + Py_ssize_t left = length; + Py_ssize_t consumed; + Py_ssize_t buffer_size = 0; // how many bytes are allocated for the buffer + Py_ssize_t buffer_length = 0; // how many bytes are actually stored in the buffer + char *buffer = NULL; + while (left) { + // Read up to 65536 bytes of data from the stream + Py_ssize_t chunk_length = 65536 - buffer_size; + if (left < chunk_length) + chunk_length = left; + + PyObject *chunk = fp_read_object(self, chunk_length); + left -= chunk_length; + if (!chunk) + goto error; + + // Get the internal buffer of the bytes object + char *bytes_buffer = PyBytes_AsString(chunk); + if (!bytes_buffer) + goto error; + + char *source_buffer; + if (buffer) { + // Grow the buffer to accommodate the previous data plus the new chunk + if (buffer_length + chunk_length > buffer_size) { + buffer_size = buffer_length + chunk_length; + char *new_buffer = PyMem_Realloc(buffer, buffer_size); + if (!new_buffer) + goto error; + + buffer = new_buffer; + } + + // Concatenate the chunk into the buffer + memcpy(buffer + buffer_length, bytes_buffer, chunk_length); + buffer_length += chunk_length; + + source_buffer = buffer; + chunk_length = buffer_length; + } else { + // Use the chunk's internal buffer directly to decode as many characters as possible + source_buffer = bytes_buffer; + } + + string = PyUnicode_DecodeUTF8Stateful(source_buffer, chunk_length, NULL, &consumed); + if (!string) + goto error; + + if (ret) { + // Concatenate the result to the existing result + PyObject *joined = PyUnicode_Concat(ret, string); + if (!joined) + goto error; + + Py_DECREF(string); + string = NULL; + ret = joined; + } else { + // Set the result to the decoded string + ret = string; + } + + Py_ssize_t unconsumed = chunk_length - consumed; + if (consumed != chunk_length) { + if (buffer) { + // Move the unconsumed bytes to the start of the buffer + memmove(buffer, buffer + consumed, unconsumed); + } else { + // Create a new buffer + buffer = PyMem_Malloc(unconsumed); + if (!buffer) + goto error; + + memcpy(buffer, bytes_buffer + consumed, unconsumed); + } + buffer_length = unconsumed; + } + } + + if (ret && string_namespace_add(self, ret, length) == -1) + goto error; + + return ret; +error: + Py_XDECREF(ret); + Py_XDECREF(chunk); + Py_XDECREF(string); + if (buffer) + PyMem_Free(buffer); + + return NULL; +} + + static PyObject * decode_indefinite_strings(CBORDecoderObject *self) { @@ -719,9 +872,14 @@ decode_string(CBORDecoderObject *self, uint8_t subtype) } if (indefinite) ret = decode_indefinite_strings(self); + else if (length <= 65536) + ret = decode_definite_short_string(self, (Py_ssize_t)length); else - ret = decode_definite_string(self, (Py_ssize_t)length); - set_shareable(self, ret); + ret = decode_definite_long_string(self, (Py_ssize_t)length); + + if (ret) + set_shareable(self, ret); + return ret; }
source/tags.c+1 −1 modified@@ -191,7 +191,7 @@ CBORTag_hash(CBORTagObject *self) // Check how many more references there are in running_hashes Py_ssize_t length = PySequence_Length(running_hashes); - if (length == 1) { + if (length == -1) { ret = -1; goto exit; }
tests/test_decoder.py+36 −5 modified@@ -1,3 +1,5 @@ +from __future__ import annotations + import math import re import struct @@ -9,6 +11,8 @@ from fractions import Fraction from io import BytesIO from ipaddress import ip_address, ip_network +from pathlib import Path +from typing import Type, cast from uuid import UUID import pytest @@ -226,6 +230,7 @@ def test_binary(impl, payload, expected): ("62225c", '"\\'), ("62c3bc", "\u00fc"), ("63e6b0b4", "\u6c34"), + pytest.param("7a00010001" + "61" * 65535 + "c3b6", "a" * 65535 + "ö", id="split_unicode"), ], ) def test_string(impl, payload, expected): @@ -846,20 +851,46 @@ def test_decimal_payload_unpacking(impl, data, expected): "payload, exception, pattern", [ pytest.param( - b"\xd8\x1e\x84\xff\xff\xff\xff", + unhexlify("d81e84ffffffff"), TypeError, r"__new__\(\) takes from 1 to 3 positional arguments but 5 were given", id="fractional", ), pytest.param( - b"\xae\xae\xae\xae\xae\xae\xae\xae\xae\x01\x08\xc2\x98C\xd9\x01\x00\xd8$" - b"\x9f\x00\x00\xae\xae\xff\xc2l\xa7\x99", - Exception, + unhexlify("aeaeaeaeaeaeaeaeae0108c29843d90100d8249f0000aeaeffc26ca799"), + "CBORDecodeEOF", "premature end of stream", id="unicode", ), ], ) -def test_invalid_data(impl, payload, exception, pattern) -> None: +def test_invalid_data( + impl, payload: bytes, exception: type[Exception] | str, pattern: str +) -> None: + if isinstance(exception, str): + exception = getattr(impl, exception) + with pytest.raises(exception, match=pattern): impl.loads(payload) + + +@pytest.mark.parametrize( + "payload", + [ + pytest.param( + unhexlify("5b7fffffffffffff00"), + id="bytestring", + ), + pytest.param( + unhexlify("7b7fffffffffffff00"), + id="unicode", + ), + ], +) +def test_oversized_read(impl, payload: bytes, tmp_path: Path) -> None: + CBORDecodeEOF = cast(Type[Exception], getattr(impl, "CBORDecodeEOF")) + with pytest.raises(CBORDecodeEOF, match="premature end of stream"): + dummy_path = tmp_path / "testdata" + dummy_path.write_bytes(payload) + with dummy_path.open("rb") as f: + impl.load(f)
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
14- github.com/advisories/GHSA-375g-39jq-vq7mghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2024-26134ghsaADVISORY
- github.com/agronholm/cbor2/commit/387755eacf0be35591a478d3c67fe10618a6d542ghsax_refsource_MISCWEB
- github.com/agronholm/cbor2/commit/4de6991ba29bf2290d7b9d83525eda7d021873dfghsax_refsource_MISCWEB
- github.com/agronholm/cbor2/pull/204ghsax_refsource_MISCWEB
- github.com/agronholm/cbor2/releases/tag/5.6.2ghsax_refsource_MISCWEB
- github.com/agronholm/cbor2/security/advisories/GHSA-375g-39jq-vq7mghsax_refsource_CONFIRMWEB
- github.com/pypa/advisory-database/tree/main/vulns/cbor2/PYSEC-2024-155.yamlghsaWEB
- lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/BT42VXZMMMCSSHMA65KKPOZCXJEYHNR5ghsaWEB
- lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/GX524ZG2XJWFV37UQKQ4LWIH4UICSGEQghsaWEB
- lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/PWC3VU6YV6EXKCSX5GTKWLBZIDIJNQJYghsaWEB
- lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/BT42VXZMMMCSSHMA65KKPOZCXJEYHNR5/mitre
- lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/GX524ZG2XJWFV37UQKQ4LWIH4UICSGEQ/mitre
- lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/PWC3VU6YV6EXKCSX5GTKWLBZIDIJNQJY/mitre
News mentions
0No linked articles in our index yet.