VYPR
High severityNVD Advisory· Published Feb 19, 2024· Updated Feb 13, 2025

CBOR2 decoder has potential buffer overflow

CVE-2024-26134

Description

cbor2 provides encoding and decoding for the Concise Binary Object Representation (CBOR) (RFC 8949) serialization format. Starting in version 5.5.1 and prior to version 5.6.2, an attacker can crash a service using cbor2 to parse a CBOR binary by sending a long enough object. Version 5.6.2 contains a patch for this issue.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
cbor2PyPI
>= 5.5.1, < 5.6.25.6.2

Affected products

1

Patches

2
4de6991ba29b

Check PyObject_Hash() return value for errors

https://github.com/agronholm/cbor2Alex GrönholmFeb 3, 2024via ghsa
2 files changed · +7 0
  • docs/versionhistory.rst+5 0 modified
    @@ -5,6 +5,11 @@ Version history
     
     This library adheres to `Semantic Versioning <http://semver.org/>`_.
     
    +**UNRELEASED**
    +
    +- Fixed ``__hash__()`` of the C version of the ``CBORTag`` type crashing when there's a recursive
    +  reference cycle
    +
     **5.6.1** (2024-02-01)
     
     - Fixed use-after-free in the decoder's C version when prematurely encountering the end of stream
    
  • source/tags.c+2 0 modified
    @@ -182,6 +182,8 @@ CBORTag_hash(CBORTagObject *self)
             goto exit;
     
         ret = PyObject_Hash(tmp);
    +    if (ret == -1)
    +        goto exit;
     
         // Remove id(self) from thread_locals.running_hashes
         if (PySet_Discard(running_hashes, self_id) == -1) {
    
387755eacf0b

Fixed MemoryError when decoding large definite strings (#204)

https://github.com/agronholm/cbor2Alex GrönholmJan 14, 2024via ghsa
6 files changed · +263 47
  • cbor2/_decoder.py+27 2 modified
    @@ -3,6 +3,7 @@
     import re
     import struct
     import sys
    +from codecs import getincrementaldecoder
     from collections.abc import Callable, Mapping, Sequence
     from datetime import date, datetime, timedelta, timezone
     from io import BytesIO
    @@ -31,6 +32,7 @@
     timestamp_re = re.compile(
         r"^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)" r"(?:\.(\d{1,6})\d*)?(?:Z|([+-])(\d\d):(\d\d))$"
     )
    +incremental_utf8_decoder = getincrementaldecoder("utf-8")
     
     
     class CBORDecoder:
    @@ -305,8 +307,19 @@ def decode_bytestring(self, subtype: int) -> bytes:
             else:
                 if length > sys.maxsize:
                     raise CBORDecodeValueError("invalid length for bytestring 0x%x" % length)
    +            elif length <= 65536:
    +                result = self.read(length)
    +            else:
    +                # Read large bytestrings 65536 (2 ** 16) bytes at a time
    +                left = length
    +                buffer = bytearray()
    +                while left:
    +                    chunk_size = min(left, 65536)
    +                    buffer.extend(self.read(chunk_size))
    +                    left -= chunk_size
    +
    +                result = bytes(buffer)
     
    -            result = self.read(length)
                 self._stringref_namespace_add(result, length)
     
             return self.set_shareable(result)
    @@ -350,7 +363,19 @@ def decode_string(self, subtype: int) -> str:
                 if length > sys.maxsize:
                     raise CBORDecodeValueError("invalid length for string 0x%x" % length)
     
    -            result = self.read(length).decode("utf-8", self._str_errors)
    +            if length <= 65536:
    +                result = self.read(length).decode("utf-8", self._str_errors)
    +            else:
    +                # Read and decode large text strings 65536 (2 ** 16) bytes at a time
    +                codec = incremental_utf8_decoder(self._str_errors)
    +                left = length
    +                result = ""
    +                while left:
    +                    chunk_size = min(left, 65536)
    +                    final = left <= chunk_size
    +                    result += codec.decode(self.read(chunk_size), final)
    +                    left -= chunk_size
    +
                 self._stringref_namespace_add(result, length)
     
             return self.set_shareable(result)
    
  • docs/versionhistory.rst+3 1 modified
    @@ -8,14 +8,16 @@ This library adheres to `Semantic Versioning <http://semver.org/>`_.
     **UNRELEASED**
     
     - Added the ``cbor2`` command line tool (for ``pipx run cbor2``)
    +- Added support for native date encoding (bschoenmaeckers)
     - Fixed ``SystemError`` in the C extension when decoding a ``Fractional`` with a bad
       number of arguments
     - Fixed ``SystemError`` in the C extension when the decoder object hook raises an
       exception
     - Fixed a segmentation fault when decoding invalid unicode data
     - Fixed infinite recursion when trying to hash a CBOR tag whose value points to the tag
       itself
    -- Added support for native date encoding (bschoenmaeckers)
    +- Fixed ``MemoryError`` when maliciously constructed bytestrings or string (declared to be absurdly
    +  large) are being decoded
     
     **5.5.1** (2023-11-02)
     
    
  • setup.py+1 1 modified
    @@ -57,7 +57,7 @@ def check_libc():
                 "source/tags.c",
                 "source/halffloat.c",
             ],
    -        optional=True,
    +        # optional=True,
         )
         kwargs = {"ext_modules": [_cbor2]}
     else:
    
  • source/decoder.c+195 37 modified
    @@ -348,31 +348,44 @@ _CBORDecoder_get_immutable(CBORDecoderObject *self, void *closure)
     
     // Utility functions /////////////////////////////////////////////////////////
     
    -static int
    -fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size)
    +static PyObject *
    +fp_read_object(CBORDecoderObject *self, const Py_ssize_t size)
     {
    +    PyObject *ret = NULL;
         PyObject *obj, *size_obj;
    -    char *data;
    -    int ret = -1;
    -
         size_obj = PyLong_FromSsize_t(size);
         if (size_obj) {
             obj = PyObject_CallFunctionObjArgs(self->read, size_obj, NULL);
    +        Py_DECREF(size_obj);
             if (obj) {
                 assert(PyBytes_CheckExact(obj));
                 if (PyBytes_GET_SIZE(obj) == (Py_ssize_t) size) {
    -                data = PyBytes_AS_STRING(obj);
    -                memcpy(buf, data, size);
    -                ret = 0;
    +                ret = obj;
                 } else {
    +                Py_DECREF(obj);
                     PyErr_Format(
                         _CBOR2_CBORDecodeEOF,
                         "premature end of stream (expected to read %zd bytes, "
                         "got %zd instead)", size, PyBytes_GET_SIZE(obj));
                 }
    -            Py_DECREF(obj);
             }
    -        Py_DECREF(size_obj);
    +    }
    +    return ret;
    +}
    +
    +
    +static int
    +fp_read(CBORDecoderObject *self, char *buf, const Py_ssize_t size)
    +{
    +    int ret = -1;
    +    PyObject *obj = fp_read_object(self, size);
    +    if (obj) {
    +        char *data = PyBytes_AS_STRING(obj);
    +        if (data) {
    +            memcpy(buf, data, size);
    +            ret = 0;
    +        }
    +        Py_DECREF(obj);
         }
         return ret;
     }
    @@ -538,17 +551,12 @@ decode_negint(CBORDecoderObject *self, uint8_t subtype)
     
     
     static PyObject *
    -decode_definite_bytestring(CBORDecoderObject *self, Py_ssize_t length)
    +decode_definite_short_bytestring(CBORDecoderObject *self, Py_ssize_t length)
     {
    -    PyObject *ret = NULL;
    -
    -    ret = PyBytes_FromStringAndSize(NULL, length);
    +    PyObject *ret = fp_read_object(self, length);
         if (!ret)
             return NULL;
    -    if (fp_read(self, PyBytes_AS_STRING(ret), length) == -1) {
    -        Py_DECREF(ret);
    -        return NULL;
    -    }
    +
         if (string_namespace_add(self, ret, length) == -1) {
             Py_DECREF(ret);
             return NULL;
    @@ -557,6 +565,56 @@ decode_definite_bytestring(CBORDecoderObject *self, Py_ssize_t length)
     }
     
     
    +static PyObject *
    +decode_definite_long_bytestring(CBORDecoderObject *self, Py_ssize_t length)
    +{
    +    PyObject *buffer = NULL;
    +    Py_ssize_t left = length;
    +    while (left) {
    +        Py_ssize_t chunk_length = length <= 65536 ? length : 65536;
    +        PyObject *chunk = fp_read_object(self, chunk_length);
    +        if (!chunk) {
    +            break;
    +        }
    +
    +        if (!PyBytes_CheckExact(chunk)) {
    +            Py_DECREF(chunk);
    +            break;
    +        }
    +
    +        if (buffer) {
    +            PyObject *new_buffer = PyByteArray_Concat(buffer, chunk);
    +            Py_DECREF(chunk);
    +            if (!new_buffer)
    +                break;
    +
    +            if (new_buffer != buffer) {
    +                Py_DECREF(buffer);
    +                buffer = new_buffer;
    +            }
    +        } else {
    +            buffer = PyByteArray_FromObject(chunk);
    +            Py_DECREF(chunk);
    +            if (!buffer)
    +                break;
    +        }
    +        left -= chunk_length;
    +    }
    +
    +    PyObject *ret = NULL;
    +    if (buffer) {
    +        ret = PyBytes_FromObject(buffer);
    +        Py_DECREF(buffer);
    +
    +        if (ret && string_namespace_add(self, ret, length) == -1) {
    +            Py_DECREF(ret);
    +            ret = NULL;
    +        }
    +    }
    +    return ret;
    +}
    +
    +
     static PyObject *
     decode_indefinite_bytestrings(CBORDecoderObject *self)
     {
    @@ -615,9 +673,14 @@ decode_bytestring(CBORDecoderObject *self, uint8_t subtype)
         }
         if (indefinite)
             ret = decode_indefinite_bytestrings(self);
    +    else if (length <= 65536)
    +        ret = decode_definite_short_bytestring(self, (Py_ssize_t)length);
         else
    -        ret = decode_definite_bytestring(self, (Py_ssize_t)length);
    -    set_shareable(self, ret);
    +        ret = decode_definite_long_bytestring(self, (Py_ssize_t)length);
    +
    +    if (ret)
    +        set_shareable(self, ret);
    +
         return ret;
     }
     
    @@ -637,31 +700,121 @@ decode_bytestring(CBORDecoderObject *self, uint8_t subtype)
     
     
     static PyObject *
    -decode_definite_string(CBORDecoderObject *self, Py_ssize_t length)
    +decode_definite_short_string(CBORDecoderObject *self, Py_ssize_t length)
     {
    -    PyObject *ret = NULL;
    -    char *buf;
    -
    -    buf = PyMem_Malloc(length);
    -    if (!buf)
    -        return PyErr_NoMemory();
    -
    -    if (fp_read(self, buf, length) == 0)
    -        ret = PyUnicode_DecodeUTF8(
    -                buf, length, PyBytes_AS_STRING(self->str_errors));
    -    PyMem_Free(buf);
    -
    -    if (!ret)
    +    PyObject *bytes_obj = fp_read_object(self, length);
    +    if (!bytes_obj)
             return NULL;
     
    -    if (string_namespace_add(self, ret, length) == -1) {
    +    const char *bytes = PyBytes_AS_STRING(bytes_obj);
    +    PyObject *ret = PyUnicode_FromStringAndSize(bytes, length);
    +    Py_DECREF(bytes_obj);
    +    if (ret && string_namespace_add(self, ret, length) == -1) {
             Py_DECREF(ret);
             return NULL;
         }
         return ret;
     }
     
     
    +static PyObject *
    +decode_definite_long_string(CBORDecoderObject *self, Py_ssize_t length)
    +{
    +    PyObject *ret = NULL, *chunk = NULL, *string = NULL;
    +    Py_ssize_t left = length;
    +    Py_ssize_t consumed;
    +    Py_ssize_t buffer_size = 0;  // how many bytes are allocated for the buffer
    +    Py_ssize_t buffer_length = 0;  // how many bytes are actually stored in the buffer
    +    char *buffer = NULL;
    +    while (left) {
    +        // Read up to 65536 bytes of data from the stream
    +        Py_ssize_t chunk_length = 65536 - buffer_size;
    +        if (left < chunk_length)
    +            chunk_length = left;
    +
    +        PyObject *chunk = fp_read_object(self, chunk_length);
    +        left -= chunk_length;
    +        if (!chunk)
    +            goto error;
    +
    +        // Get the internal buffer of the bytes object
    +        char *bytes_buffer = PyBytes_AsString(chunk);
    +        if (!bytes_buffer)
    +            goto error;
    +
    +        char *source_buffer;
    +        if (buffer) {
    +            // Grow the buffer to accommodate the previous data plus the new chunk
    +            if (buffer_length + chunk_length > buffer_size) {
    +                buffer_size = buffer_length + chunk_length;
    +                char *new_buffer = PyMem_Realloc(buffer, buffer_size);
    +                if (!new_buffer)
    +                    goto error;
    +
    +                buffer = new_buffer;
    +            }
    +
    +            // Concatenate the chunk into the buffer
    +            memcpy(buffer + buffer_length, bytes_buffer, chunk_length);
    +            buffer_length += chunk_length;
    +
    +            source_buffer = buffer;
    +            chunk_length = buffer_length;
    +        } else {
    +            // Use the chunk's internal buffer directly to decode as many characters as possible
    +            source_buffer = bytes_buffer;
    +        }
    +
    +        string = PyUnicode_DecodeUTF8Stateful(source_buffer, chunk_length, NULL, &consumed);
    +        if (!string)
    +            goto error;
    +
    +        if (ret) {
    +            // Concatenate the result to the existing result
    +            PyObject *joined = PyUnicode_Concat(ret, string);
    +            if (!joined)
    +                goto error;
    +
    +            Py_DECREF(string);
    +            string = NULL;
    +            ret = joined;
    +        } else {
    +            // Set the result to the decoded string
    +            ret = string;
    +        }
    +
    +        Py_ssize_t unconsumed = chunk_length - consumed;
    +        if (consumed != chunk_length) {
    +            if (buffer) {
    +                // Move the unconsumed bytes to the start of the buffer
    +                memmove(buffer, buffer + consumed, unconsumed);
    +            } else {
    +                // Create a new buffer
    +                buffer = PyMem_Malloc(unconsumed);
    +                if (!buffer)
    +                    goto error;
    +
    +                memcpy(buffer, bytes_buffer + consumed, unconsumed);
    +            }
    +            buffer_length = unconsumed;
    +        }
    +    }
    +
    +    if (ret && string_namespace_add(self, ret, length) == -1)
    +        goto error;
    +
    +    return ret;
    +error:
    +    Py_XDECREF(ret);
    +    Py_XDECREF(chunk);
    +    Py_XDECREF(string);
    +    if (buffer)
    +        PyMem_Free(buffer);
    +
    +    return NULL;
    +}
    +
    +
     static PyObject *
     decode_indefinite_strings(CBORDecoderObject *self)
     {
    @@ -719,9 +872,14 @@ decode_string(CBORDecoderObject *self, uint8_t subtype)
         }
         if (indefinite)
             ret = decode_indefinite_strings(self);
    +    else if (length <= 65536)
    +        ret = decode_definite_short_string(self, (Py_ssize_t)length);
         else
    -        ret = decode_definite_string(self, (Py_ssize_t)length);
    -    set_shareable(self, ret);
    +        ret = decode_definite_long_string(self, (Py_ssize_t)length);
    +
    +    if (ret)
    +        set_shareable(self, ret);
    +
         return ret;
     }
     
    
  • source/tags.c+1 1 modified
    @@ -191,7 +191,7 @@ CBORTag_hash(CBORTagObject *self)
     
         // Check how many more references there are in running_hashes
         Py_ssize_t length = PySequence_Length(running_hashes);
    -    if (length == 1) {
    +    if (length == -1) {
             ret = -1;
             goto exit;
         }
    
  • tests/test_decoder.py+36 5 modified
    @@ -1,3 +1,5 @@
    +from __future__ import annotations
    +
     import math
     import re
     import struct
    @@ -9,6 +11,8 @@
     from fractions import Fraction
     from io import BytesIO
     from ipaddress import ip_address, ip_network
    +from pathlib import Path
    +from typing import Type, cast
     from uuid import UUID
     
     import pytest
    @@ -226,6 +230,7 @@ def test_binary(impl, payload, expected):
             ("62225c", '"\\'),
             ("62c3bc", "\u00fc"),
             ("63e6b0b4", "\u6c34"),
    +        pytest.param("7a00010001" + "61" * 65535 + "c3b6", "a" * 65535 + "ö", id="split_unicode"),
         ],
     )
     def test_string(impl, payload, expected):
    @@ -846,20 +851,46 @@ def test_decimal_payload_unpacking(impl, data, expected):
         "payload, exception, pattern",
         [
             pytest.param(
    -            b"\xd8\x1e\x84\xff\xff\xff\xff",
    +            unhexlify("d81e84ffffffff"),
                 TypeError,
                 r"__new__\(\) takes from 1 to 3 positional arguments but 5 were given",
                 id="fractional",
             ),
             pytest.param(
    -            b"\xae\xae\xae\xae\xae\xae\xae\xae\xae\x01\x08\xc2\x98C\xd9\x01\x00\xd8$"
    -            b"\x9f\x00\x00\xae\xae\xff\xc2l\xa7\x99",
    -            Exception,
    +            unhexlify("aeaeaeaeaeaeaeaeae0108c29843d90100d8249f0000aeaeffc26ca799"),
    +            "CBORDecodeEOF",
                 "premature end of stream",
                 id="unicode",
             ),
         ],
     )
    -def test_invalid_data(impl, payload, exception, pattern) -> None:
    +def test_invalid_data(
    +    impl, payload: bytes, exception: type[Exception] | str, pattern: str
    +) -> None:
    +    if isinstance(exception, str):
    +        exception = getattr(impl, exception)
    +
         with pytest.raises(exception, match=pattern):
             impl.loads(payload)
    +
    +
    +@pytest.mark.parametrize(
    +    "payload",
    +    [
    +        pytest.param(
    +            unhexlify("5b7fffffffffffff00"),
    +            id="bytestring",
    +        ),
    +        pytest.param(
    +            unhexlify("7b7fffffffffffff00"),
    +            id="unicode",
    +        ),
    +    ],
    +)
    +def test_oversized_read(impl, payload: bytes, tmp_path: Path) -> None:
    +    CBORDecodeEOF = cast(Type[Exception], getattr(impl, "CBORDecodeEOF"))
    +    with pytest.raises(CBORDecodeEOF, match="premature end of stream"):
    +        dummy_path = tmp_path / "testdata"
    +        dummy_path.write_bytes(payload)
    +        with dummy_path.open("rb") as f:
    +            impl.load(f)
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

14

News mentions

0

No linked articles in our index yet.