High severityOSV Advisory· Published Dec 26, 2025· Updated Dec 26, 2025
lmdeploy vulnerable to Arbitrary Code Execution via Insecure Deserialization in torch.load()
CVE-2025-67729
Description
LMDeploy is a toolkit for compressing, deploying, and serving LLMs. Prior to version 0.11.1, an insecure deserialization vulnerability exists in lmdeploy where torch.load() is called without the weights_only=True parameter when loading model checkpoint files. This allows an attacker to execute arbitrary code on the victim's machine when they load a malicious .bin or .pt model file. This issue has been patched in version 0.11.1.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
lmdeployPyPI | < 0.11.1 | 0.11.1 |
Affected products
1Patches
1eb04b4281c57fix insecure deserialization when calling torch.load() (#4202)
6 files changed · +5 −146
lmdeploy/lite/apis/auto_awq.py+1 −1 modified@@ -98,7 +98,7 @@ def auto_awq(model: str, layer_type = LAYER_TYPE_MAP[type(model).__name__] fc2fcs = FC_FCS_MAP[layer_type] norm2fcs = NORM_FCS_MAP[layer_type] - input_stats = torch.load(osp.join(work_dir, 'inputs_stats.pth')) + input_stats = torch.load(osp.join(work_dir, 'inputs_stats.pth'), weights_only=True) layers = collect_target_modules(model, layer_type) fcs = {} for l_name, layer in layers.items():
lmdeploy/lite/apis/get_small_sharded_hf.py+1 −1 modified@@ -38,7 +38,7 @@ def main(): checkpoints = set(index['weight_map'].values()) for ckpt in checkpoints: - state_dict = torch.load(os.path.join(args.src_dir, ckpt), map_location='cuda') + state_dict = torch.load(os.path.join(args.src_dir, ckpt), map_location='cuda', weights_only=True) keys = sorted(list(state_dict.keys())) for k in keys: new_state_dict_name = 'pytorch_model-{:05d}-of-{:05d}.bin'.format(cnt, n_shard)
lmdeploy/lite/apis/kv_qparams.py+0 −141 removed@@ -1,141 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -from pathlib import Path -from typing import Union - -import numpy as np -import torch - - -def _export_weight(into: str, kv_qparams: np.array, out_path: str, tm_params: dict = None): - """Save kv_qparams to disk or copy to tm_params.""" - if tm_params is None: - print(into) - kv_qparams.tofile(out_path) - else: - name = os.path.basename(out_path) - src = torch.from_numpy(kv_qparams) - for tm_tensor in tm_params[name]: - tm_tensor.copy_from(src) - tm_params.pop(name) - - -def _export_sym(key_stats: dict, - value_stats: dict, - bits: int, - out_dir: Union[str, Path], - tp: int = 1, - tm_params: dict = None) -> None: - """Export symmetric quantization parameters to specified directory.""" - keys_absmax = key_stats['absmax'] - values_absmax = value_stats['absmax'] - for layer_idx, name in enumerate(keys_absmax.keys()): - k_absmax = keys_absmax[name] - v_absmax = values_absmax[name] - - heads, dims = k_absmax.shape - assert heads % tp == 0 - - mp_k_absmax = torch.chunk(k_absmax, tp) - mp_v_absmax = torch.chunk(v_absmax, tp) - for i in range(tp): - # quant: q = f / scale - # dequant: f = q * scale - k_s = mp_k_absmax[i].max() / (2**(bits - 1) - 1) - v_s = mp_v_absmax[i].max() / (2**(bits - 1) - 1) - - kv_qparams = np.array([k_s, v_s], dtype=np.float32) - out_path = out_dir / f'layers.{layer_idx}.past_kv_scale.{i}.weight' # noqa: E501 - info = f'Layer {layer_idx} MP {i} qparam: {k_s} \t{v_s}' - _export_weight(info, kv_qparams, out_path, tm_params) - - -def _export_asym(key_stats: dict, - value_stats: dict, - bits: int, - out_dir: Union[str, Path], - tp: int = 1, - tm_params: dict = None) -> None: - """Export asymmetric quantization parameters to specified directory.""" - keys_min = key_stats['min'] - values_min = value_stats['min'] - - keys_max = key_stats['max'] - values_max = value_stats['max'] - for layer_idx, name in enumerate(keys_min.keys()): - k_max = keys_max[name] - v_max = values_max[name] - - k_min = keys_min[name] - v_min = values_min[name] - - heads, dims = k_min.shape - assert heads % tp == 0 - - tp_k_min = torch.chunk(k_min, tp) - tp_v_min = torch.chunk(v_min, tp) - - tp_k_max = torch.chunk(k_max, tp) - tp_v_max = torch.chunk(v_max, tp) - for i in range(tp): - # zp = (min+max) / 2 - # scale = (max-min) / 255 - # quant: q = (f-zp) / scale - # dequant: f = q * scale + zp - k_min = tp_k_min[i].min() - v_min = tp_v_min[i].min() - - k_max = tp_k_max[i].max() - v_max = tp_v_max[i].max() - - k_scale = (k_max - k_min) / (2**bits - 1) - v_scale = (v_max - v_min) / (2**bits - 1) - - k_zp = (k_max + k_min) / 2 - v_zp = (v_max + v_min) / 2 - - kv_qparams = np.array([k_scale, k_zp, v_scale, v_zp], dtype=np.float32) - out_path = out_dir / f'layers.{layer_idx}.past_kv_scale.{i}.weight' - info = f'Layer {layer_idx} MP {i} qparam: ' \ - f'\t{k_scale} \t{k_zp} \t{v_scale} \t{v_zp}' - _export_weight(info, kv_qparams, out_path, tm_params) - - -def main(work_dir: str, - turbomind_dir: str, - kv_bits: int = 8, - kv_sym: bool = False, - num_tp: int = 1, - tm_params: dict = None) -> None: - """Main function to export key and value stats. - - Args: - work_dir (Union[str, Path]): Directory path where the stats are saved. - turbomind_dir (Union[str, Path]): Directory path where to - save the results. - kv_bits (int, optional): Number of bits for quantization. - Defaults to 8. - kv_sym (bool, optional): Whether to use symmetric quantizaiton. - Defaults to False. - num_tp (int, optional): Number of tensor parallelism. Defaults to 1. - tm_params (dict): turbomind model weights. - """ - - work_dir = Path(work_dir) - - tm_dir = Path(turbomind_dir) - assert tm_dir.exists(), 'The specified TurboMind directory does not exist.' - - key_stats = torch.load(work_dir / 'key_stats.pth') - value_stats = torch.load(work_dir / 'value_stats.pth') - - if kv_sym: - _export_sym(key_stats, value_stats, kv_bits, tm_dir, num_tp, tm_params) - else: - _export_asym(key_stats, value_stats, kv_bits, tm_dir, num_tp, tm_params) - - -if __name__ == '__main__': - import fire - - fire.Fire(main)
lmdeploy/lite/apis/smooth_quant.py+1 −1 modified@@ -58,7 +58,7 @@ def smooth_quant(model: str, # calibrate function exports the calibration statistics # (inputs, outputs, keys and values) to `work_dir`. - inp_stats = torch.load(work_dir / 'inputs_stats.pth') + inp_stats = torch.load(work_dir / 'inputs_stats.pth', weights_only=True) act_scales = inp_stats['absmax'] model_type = type(model).__name__
lmdeploy/turbomind/deploy/loader.py+1 −1 modified@@ -119,7 +119,7 @@ def items(self): params = defaultdict(dict) for shard in self.shards: misc = {} - tmp = torch.load(shard, map_location='cpu') + tmp = torch.load(shard, map_location='cpu', weights_only=True) for k, v in tmp.items(): match = re.findall(self.pattern, k) if not match:
lmdeploy/vl/model/utils.py+1 −1 modified@@ -19,7 +19,7 @@ def load_weight_ckpt(ckpt: str) -> Dict[str, torch.Tensor]: if ckpt.endswith('.safetensors'): return load_file(ckpt) else: - return torch.load(ckpt) + return torch.load(ckpt, weights_only=True) def get_used_weight_files(folder: str, state_dict: Dict[str, torch.Tensor]) -> List[str]:
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
4- github.com/advisories/GHSA-9pf3-7rrr-x5jhghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2025-67729ghsaADVISORY
- github.com/InternLM/lmdeploy/commit/eb04b4281c5784a5cff5ea639c8f96b33b3ae5eeghsax_refsource_MISCWEB
- github.com/InternLM/lmdeploy/security/advisories/GHSA-9pf3-7rrr-x5jhghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.