CVE-2025-58058
Description
xz is a pure golang package for reading and writing xz-compressed files. Prior to version 0.5.14, it is possible to put data in front of an LZMA-encoded byte stream without detecting the situation while reading the header. This can lead to increased memory consumption because the current implementation allocates the full decoding buffer directly after reading the header. The LZMA header doesn't include a magic number or has a checksum to detect such an issue according to the specification. Note that the code recognizes the issue later while reading the stream, but at this time the memory allocation has already been done. This issue has been patched in version 0.5.14.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
github.com/ulikunitz/xzGo | < 0.5.15 | 0.5.15 |
Affected products
1Patches
27184815834c4Preparation of release v0.5.14
5 files changed · +15 −12
doc/relnotes/release-v0.5.14.md+5 −0 added@@ -0,0 +1,5 @@ +# Release Notes v0.5.14 + +This release addresses security vulnerability CVE-2025-58058. It implements a +number of mitigation for a resource leak problem. It needs to only to be updated +if lzma.NewWriter is used.
lzma/reader.go+1 −1 modified@@ -19,7 +19,7 @@ import ( // ReaderConfig stores the parameters for the reader of the classic LZMA // format. type ReaderConfig struct { - // Since v0.5.14 this parameter sets an upper limit for a .lzma file's + // Since v0.5.14 this parameter sets an upper limit for a .lzma file's // dictionary size. This helps to mitigate problems with mangled // headers. DictCap int
lzma/reader_test.go+1 −1 modified@@ -377,7 +377,7 @@ func TestZeroPrefixIssue(t *testing.T) { } h, ok := l.Header() t.Logf("Header %+v ok %v", h, ok) - actualDictSize := len(l.d.Dict.buf.data)-1 + actualDictSize := len(l.d.Dict.buf.data) - 1 t.Logf("Actual dictionary size: %d", actualDictSize) if actualDictSize > MinDictCap && h.Size >= 0 && h.Size < int64(actualDictSize) {
lzma/writer.go+1 −1 modified@@ -13,7 +13,7 @@ import ( // MinDictCap and MaxDictCap provide the range of supported dictionary // capacities. const ( - MinDictCap = 1 << 12 + MinDictCap = 1 << 12 MaxDictCap = 1<<32 - 1 )
TODO.md+7 −9 modified@@ -1,14 +1,5 @@ # TODO list -## Release v0.5.14 - -* If the DictionarySize is larger than the UncompressedSize set it to - UncompressedSize -* make a Header() (h Header, ok bool) function so the user can implement its own - policy -* Add documentation to Reader to explain the situation -* Add a TODO for the rewrite version - ## Release v0.6 1. Review encoder and check for lzma improvements under xz. @@ -91,6 +82,13 @@ ## Log +## 2025-08-28 + +Release v0.5.14 addresses the security vulnerability CVE-2025-58058. If you put +bytes in from of a LZMA stream, the header might not be read correctly and +memory for the dictionary buffer allocated. I have implemented mitigations for +the problem. + ### 2025-08-20 Release v0.5.13 addressed issue #61 regarding handling of multiple WriteClosers
88ddf1d0d98dAddress Security Issue GHSA-jc7w-c686-c4v9
7 files changed · +217 −71
lzma/header2_test.go+1 −0 modified@@ -78,6 +78,7 @@ func TestHeaderLen(t *testing.T) { } func chunkHeaderSamples(t *testing.T) []chunkHeader { + _ = t props := Properties{LC: 3, LP: 0, PB: 2} headers := make([]chunkHeader, 0, 12) for c := cEOS; c <= cLRND; c++ {
lzma/header.go+29 −26 modified@@ -60,36 +60,36 @@ const noHeaderSize uint64 = 1<<64 - 1 // HeaderLen provides the length of the LZMA file header. const HeaderLen = 13 -// header represents the header of an LZMA file. -type header struct { - properties Properties - dictCap int - // uncompressed size; negative value if no size is given - size int64 +// Header represents the Header of an LZMA file. +type Header struct { + Properties Properties + DictSize uint32 + // uncompressed Size; negative value if no Size is given + Size int64 } // marshalBinary marshals the header. -func (h *header) marshalBinary() (data []byte, err error) { - if err = h.properties.verify(); err != nil { +func (h *Header) marshalBinary() (data []byte, err error) { + if err = h.Properties.verify(); err != nil { return nil, err } - if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) { + if !(h.DictSize <= MaxDictCap) { return nil, fmt.Errorf("lzma: DictCap %d out of range", - h.dictCap) + h.DictSize) } data = make([]byte, 13) // property byte - data[0] = h.properties.Code() + data[0] = h.Properties.Code() // dictionary capacity - putUint32LE(data[1:5], uint32(h.dictCap)) + putUint32LE(data[1:5], uint32(h.DictSize)) // uncompressed size var s uint64 - if h.size > 0 { - s = uint64(h.size) + if h.Size > 0 { + s = uint64(h.Size) } else { s = noHeaderSize } @@ -99,20 +99,20 @@ func (h *header) marshalBinary() (data []byte, err error) { } // unmarshalBinary unmarshals the header. -func (h *header) unmarshalBinary(data []byte) error { +func (h *Header) unmarshalBinary(data []byte) error { if len(data) != HeaderLen { return errors.New("lzma.unmarshalBinary: data has wrong length") } // properties var err error - if h.properties, err = PropertiesForCode(data[0]); err != nil { + if h.Properties, err = PropertiesForCode(data[0]); err != nil { return err } // dictionary capacity - h.dictCap = int(uint32LE(data[1:])) - if h.dictCap < 0 { + h.DictSize = uint32LE(data[1:]) + if int(h.DictSize) < 0 { return errors.New( "LZMA header: dictionary capacity exceeds maximum " + "integer") @@ -121,10 +121,10 @@ func (h *header) unmarshalBinary(data []byte) error { // uncompressed size s := uint64LE(data[5:]) if s == noHeaderSize { - h.size = -1 + h.Size = -1 } else { - h.size = int64(s) - if h.size < 0 { + h.Size = int64(s) + if h.Size < 0 { return errors.New( "LZMA header: uncompressed size " + "out of int64 range") @@ -134,9 +134,9 @@ func (h *header) unmarshalBinary(data []byte) error { return nil } -// validDictCap checks whether the dictionary capacity is correct. This +// validDictSize checks whether the dictionary capacity is correct. This // is used to weed out wrong file headers. -func validDictCap(dictcap int) bool { +func validDictSize(dictcap int) bool { if int64(dictcap) == MaxDictCap { return true } @@ -155,13 +155,16 @@ func validDictCap(dictcap int) bool { // dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If // there is an explicit size it must not exceed 256 GiB. The length of // the data argument must be HeaderLen. +// +// This function should be disregarded because there is no guarantee that LZMA +// files follow the constraints. func ValidHeader(data []byte) bool { - var h header + var h Header if err := h.unmarshalBinary(data); err != nil { return false } - if !validDictCap(h.dictCap) { + if !validDictSize(int(h.DictSize)) { return false } - return h.size < 0 || h.size <= 1<<38 + return h.Size < 0 || h.Size <= 1<<38 }
lzma/header_test.go+11 −11 modified@@ -7,18 +7,18 @@ package lzma import "testing" func TestHeaderMarshalling(t *testing.T) { - tests := []header{ - {properties: Properties{3, 0, 2}, dictCap: 8 * 1024 * 1024, - size: -1}, - {properties: Properties{4, 3, 3}, dictCap: 4096, - size: 10}, + tests := []Header{ + {Properties: Properties{3, 0, 2}, DictSize: 8 * 1024 * 1024, + Size: -1}, + {Properties: Properties{4, 3, 3}, DictSize: 4096, + Size: 10}, } for _, h := range tests { data, err := h.marshalBinary() if err != nil { t.Fatalf("marshalBinary error %s", err) } - var g header + var g Header if err = g.unmarshalBinary(data); err != nil { t.Fatalf("unmarshalBinary error %s", err) } @@ -29,11 +29,11 @@ func TestHeaderMarshalling(t *testing.T) { } func TestValidHeader(t *testing.T) { - tests := []header{ - {properties: Properties{3, 0, 2}, dictCap: 8 * 1024 * 1024, - size: -1}, - {properties: Properties{4, 3, 3}, dictCap: 4096, - size: 10}, + tests := []Header{ + {Properties: Properties{3, 0, 2}, DictSize: 8 * 1024 * 1024, + Size: -1}, + {Properties: Properties{4, 3, 3}, DictSize: 4096, + Size: 10}, } for _, h := range tests { data, err := h.marshalBinary()
lzma/reader.go+108 −15 modified@@ -6,25 +6,32 @@ // Reader and Writer support the classic LZMA format. Reader2 and // Writer2 support the decoding and encoding of LZMA2 streams. // -// The package is written completely in Go and doesn't rely on any external +// The package is written completely in Go and does not rely on any external // library. package lzma import ( "errors" + "fmt" "io" ) // ReaderConfig stores the parameters for the reader of the classic LZMA // format. type ReaderConfig struct { + // Since v0.5.14 this parameter sets an upper limit for a .lzma file's + // dictionary size. This helps to mitigate problems with mangled + // headers. DictCap int } // fill converts the zero values of the configuration to the default values. func (c *ReaderConfig) fill() { if c.DictCap == 0 { - c.DictCap = 8 * 1024 * 1024 + // set an upper limit of 2 GB for dictionary capacity to address + // the zero prefix security issue. + c.DictCap = 1 << 31 + // original: c.DictCap = 8 * 1024 * 1024 } } @@ -39,10 +46,33 @@ func (c *ReaderConfig) Verify() error { } // Reader provides a reader for LZMA files or streams. +// +// # Security concerns +// +// Note that LZMA format doesn't support a magic marker in the header. So +// [NewReader] cannot determine whether it reads the actual header. For instance +// the LZMA stream might have a zero byte in front of the reader, leading to +// larger dictionary sizes and file sizes. The code will detect later that there +// are problems with the stream, but the dictionary has already been allocated +// and this might consume a lot of memory. +// +// Version 0.5.14 introduces built-in mitigations: +// +// - The [ReaderConfig] DictCap field is now interpreted as a limit for the +// dictionary size. +// - The default is 2 Gigabytes (2^31 bytes). +// - Users can check with the [Reader.Header] method what the actual values are in +// their LZMA files and set a smaller limit using [ReaderConfig]. +// - The dictionary size doesn't exceed the larger of the file size and +// the minimum dictionary size. This is another measure to prevent huge +// memory allocations for the dictionary. +// - The code supports stream sizes only up to a pebibyte (1024^5). type Reader struct { - lzma io.Reader - h header - d *decoder + lzma io.Reader + header Header + // headerOrig stores the original header read from the stream. + headerOrig Header + d *decoder } // NewReader creates a new reader for an LZMA stream using the classic @@ -51,8 +81,37 @@ func NewReader(lzma io.Reader) (r *Reader, err error) { return ReaderConfig{}.NewReader(lzma) } +// ErrDictSize reports about an error of the dictionary size. +type ErrDictSize struct { + ConfigDictCap int + HeaderDictSize uint32 + Message string +} + +// Error returns the error message. +func (e *ErrDictSize) Error() string { + return e.Message +} + +func newErrDictSize(messageformat string, + configDictCap int, headerDictSize uint32, + args ...interface{}) *ErrDictSize { + newArgs := make([]interface{}, len(args)+2) + newArgs[0] = configDictCap + newArgs[1] = headerDictSize + copy(newArgs[2:], args) + return &ErrDictSize{ + ConfigDictCap: configDictCap, + HeaderDictSize: headerDictSize, + Message: fmt.Sprintf(messageformat, newArgs...), + } +} + +// We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5). +const maxStreamSize = 1 << 50 + // NewReader creates a new reader for an LZMA stream in the classic -// format. The function reads and verifies the the header of the LZMA +// format. The function reads and verifies the header of the LZMA // stream. func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) { if err = c.Verify(); err != nil { @@ -66,29 +125,63 @@ func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) { return nil, err } r = &Reader{lzma: lzma} - if err = r.h.unmarshalBinary(data); err != nil { + if err = r.header.unmarshalBinary(data); err != nil { return nil, err } - if r.h.dictCap < MinDictCap { - r.h.dictCap = MinDictCap + r.headerOrig = r.header + dictSize := int64(r.header.DictSize) + if int64(c.DictCap) < dictSize { + return nil, newErrDictSize( + "lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d", + c.DictCap, uint32(dictSize), + ) + } + if dictSize < MinDictCap { + dictSize = MinDictCap + } + // original code: disabled this because there is no point in increasing + // the dictionary above what is stated in the file. + /* + if int64(c.DictCap) > int64(dictSize) { + dictSize = int64(c.DictCap) + } + */ + size := r.header.Size + if size >= 0 && size < dictSize { + dictSize = size } - dictCap := r.h.dictCap - if c.DictCap > dictCap { - dictCap = c.DictCap + // Protect against modified or malicious headers. + if size > maxStreamSize { + return nil, fmt.Errorf( + "lzma: stream size %d exceeds a pebibyte (1024^5)", + size) } + if dictSize < MinDictCap { + dictSize = MinDictCap + } + + r.header.DictSize = uint32(dictSize) - state := newState(r.h.properties) - dict, err := newDecoderDict(dictCap) + state := newState(r.header.Properties) + dict, err := newDecoderDict(int(dictSize)) if err != nil { return nil, err } - r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size) + r.d, err = newDecoder(ByteReader(lzma), state, dict, r.header.Size) if err != nil { return nil, err } return r, nil } +// Header returns the header as read from the LZMA stream. It is intended to +// allow the user to understand what parameters are typically provided in the +// headers of the LZMA files and set the DictCap field in [ReaderConfig] +// accordingly. +func (r *Reader) Header() (h Header, ok bool) { + return r.headerOrig, r.d != nil +} + // EOSMarker indicates that an EOS marker has been encountered. func (r *Reader) EOSMarker() bool { return r.d.eosMarker
lzma/reader_test.go+45 −1 modified@@ -34,7 +34,7 @@ const ( ) func readOrigFile(t *testing.T) []byte { - orig, err := ioutil.ReadFile(filepath.Join(dirname, origname)) + orig, err := os.ReadFile(filepath.Join(dirname, origname)) if err != nil { t.Fatalf("ReadFile: %s", err) } @@ -349,3 +349,47 @@ func TestMinDictSize(t *testing.T) { t.Fatalf("got %q; want %q", u, uncompressed) } } + +func TestZeroPrefixIssue(t *testing.T) { + files := []string{ + "examples/a.lzma", + "examples/a_lp1_lc2_pb1.lzma", + "examples/a_eos_and_size.lzma", + "fox.lzma", + } + + zeroPrefix := []byte{0} + rcfg := ReaderConfig{} + + for _, tc := range files { + t.Run(tc, func(t *testing.T) { + f, err := os.Open(tc) + if err != nil { + t.Fatalf("Open(%q) error %s", tc, err) + } + defer f.Close() + zp := bytes.NewReader(zeroPrefix) + z := io.MultiReader(zp, f) + l, err := rcfg.NewReader(z) + if err != nil { + t.Logf("NewReader(z) for %s error %s", tc, err) + return + } + h, ok := l.Header() + t.Logf("Header %+v ok %v", h, ok) + actualDictSize := len(l.d.Dict.buf.data)-1 + t.Logf("Actual dictionary size: %d", actualDictSize) + if actualDictSize > MinDictCap && h.Size >= 0 && + h.Size < int64(actualDictSize) { + t.Errorf("actualDictSize %d smaller than file size %d", + actualDictSize, h.Size) + } + _, err = io.ReadAll(l) + if err == nil { + t.Errorf("ReadAll for %q: no error", tc) + return + } + t.Logf("%q: error %s", tc, err) + }) + } +}
lzma/writer.go+15 −15 modified@@ -13,7 +13,7 @@ import ( // MinDictCap and MaxDictCap provide the range of supported dictionary // capacities. const ( - MinDictCap = 1 << 12 + MinDictCap = 1 << 12 MaxDictCap = 1<<32 - 1 ) @@ -96,21 +96,21 @@ func (c *WriterConfig) Verify() error { } // header returns the header structure for this configuration. -func (c *WriterConfig) header() header { - h := header{ - properties: *c.Properties, - dictCap: c.DictCap, - size: -1, +func (c *WriterConfig) header() Header { + h := Header{ + Properties: *c.Properties, + DictSize: uint32(c.DictCap), + Size: -1, } if c.SizeInHeader { - h.size = c.Size + h.Size = c.Size } return h } // Writer writes an LZMA stream in the classic format. type Writer struct { - h header + h Header bw io.ByteWriter buf *bufio.Writer e *encoder @@ -130,12 +130,12 @@ func (c WriterConfig) NewWriter(lzma io.Writer) (w *Writer, err error) { w.buf = bufio.NewWriter(lzma) w.bw = w.buf } - state := newState(w.h.properties) - m, err := c.Matcher.new(w.h.dictCap) + state := newState(w.h.Properties) + m, err := c.Matcher.new(int(w.h.DictSize)) if err != nil { return nil, err } - dict, err := newEncoderDict(w.h.dictCap, c.BufSize, m) + dict, err := newEncoderDict(int(w.h.DictSize), c.BufSize, m) if err != nil { return nil, err } @@ -171,8 +171,8 @@ func (w *Writer) writeHeader() error { // Write puts data into the Writer. func (w *Writer) Write(p []byte) (n int, err error) { - if w.h.size >= 0 { - m := w.h.size + if w.h.Size >= 0 { + m := w.h.Size m -= w.e.Compressed() + int64(w.e.dict.Buffered()) if m < 0 { m = 0 @@ -192,9 +192,9 @@ func (w *Writer) Write(p []byte) (n int, err error) { // Close closes the writer stream. It ensures that all data from the // buffer will be compressed and the LZMA stream will be finished. func (w *Writer) Close() error { - if w.h.size >= 0 { + if w.h.Size >= 0 { n := w.e.Compressed() + int64(w.e.dict.Buffered()) - if n != w.h.size { + if n != w.h.Size { return errSize } }
TODO.md+8 −3 modified@@ -1,8 +1,13 @@ # TODO list -## Release v0.5.x - -1. Support check flag in gxz command. +## Release v0.5.14 + +* If the DictionarySize is larger than the UncompressedSize set it to + UncompressedSize +* make a Header() (h Header, ok bool) function so the user can implement its own + policy +* Add documentation to Reader to explain the situation +* Add a TODO for the rewrite version ## Release v0.6
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
4News mentions
0No linked articles in our index yet.