MS SWIFT WEB-UI RCE Vulnerability
Description
I. Detailed Description:
This includes scenarios, screenshots, vulnerability reproduction methods. For account-related vulnerabilities, please provide test accounts. If the reproduction process is complex, you may record a video, upload it to Taopan, and attach the link.
1. Install ms-swift `` pip install ms-swift -U ``
2. Start web-ui `` swift web-ui --lang en ``
- After startup, access through browser at http://localhost:7860/ to see the launched fine-tuning framework program
4. Fill in necessary parameters In the LLM Training interface, fill in required parameters including Model id, Dataset Code. The --output_dir can be filled arbitrarily as it will be modified later through packet capture
- Click Begin to start training. Capture packets and modify the parameter corresponding to --output_dir
You can see the concatenated command being executed in the terminal where web-ui was started
- Wait for the program to run (testing shows it requires at least 5 minutes), and you can observe the effect of command execution creating files
II. Vulnerability Proof (Write POC here): `` /tmp/xxx'; touch /tmp/inject_success_1; # ``
III. Fix Solution: 1. The swift.ui.llm_train.llm_train.LLMTrain#train() method should not directly concatenate parameters with commands after receiving commands from the frontend 2. The swift.ui.llm_train.llm_train.LLMTrain#train_local() method should not use os.system for execution, but should be changed to subprocess.run([cmd, arg1, arg2...]) format
Author
- Discovered by: TencentAISec
- Contact: *security@tencent.com*
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
ms-swiftPyPI | < 3.7.0 | 3.7.0 |
Affected products
1- Range: < 3.7.0
Patches
132f09e9b0a44Fix run cmd with os.system (#5182)
10 files changed · +195 −48
swift/ui/llm_eval/llm_eval.py+19 −5 modified@@ -18,6 +18,7 @@ from swift.ui.llm_eval.eval import Eval from swift.ui.llm_eval.model import Model from swift.ui.llm_eval.runtime import EvalRuntime +from swift.ui.llm_train.llm_train import run_command_in_background_with_popen from swift.utils import get_device_count @@ -142,16 +143,26 @@ def eval(cls, *args): for key, value in kwargs.items() }) params = '' + command = ['swift', 'eval'] sep = f'{cls.quote} {cls.quote}' for e in kwargs: if isinstance(kwargs[e], list): params += f'--{e} {cls.quote}{sep.join(kwargs[e])}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(kwargs[e])}']) elif e in kwargs_is_list and kwargs_is_list[e]: all_args = [arg for arg in kwargs[e].split(' ') if arg.strip()] params += f'--{e} {cls.quote}{sep.join(all_args)}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(all_args)}']) else: params += f'--{e} {cls.quote}{kwargs[e]}{cls.quote} ' - params += more_params_cmd + ' ' + command.extend([f'--{e}', f'{kwargs[e]}']) + if more_params_cmd != '': + params += f'{more_params_cmd.strip()} ' + more_params_cmd = more_params_cmd.split('--') + more_params_cmd = [param.split(' ') for param in more_params_cmd if param] + for param in more_params_cmd: + command.extend([f'--{param[0]}', ' '.join(param[1:])]) + all_envs = {} devices = other_kwargs['gpu_id'] devices = [d for d in devices if d] assert (len(devices) == 1 or 'cpu' not in devices) @@ -160,8 +171,10 @@ def eval(cls, *args): if gpus != 'cpu': if is_torch_npu_available(): cuda_param = f'ASCEND_RT_VISIBLE_DEVICES={gpus}' + all_envs['ASCEND_RT_VISIBLE_DEVICES'] = gpus elif is_torch_cuda_available(): cuda_param = f'CUDA_VISIBLE_DEVICES={gpus}' + all_envs['CUDA_VISIBLE_DEVICES'] = gpus else: cuda_param = '' now = datetime.now() @@ -172,18 +185,19 @@ def eval(cls, *args): log_file = os.path.join(os.getcwd(), f'{file_path}/run_eval.log') eval_args.log_file = log_file params += f'--log_file "{log_file}" ' + command.extend(['--log_file', f'{log_file}']) params += '--ignore_args_error true ' + command.extend(['--ignore_args_error', 'true']) if sys.platform == 'win32': if cuda_param: cuda_param = f'set {cuda_param} && ' run_command = f'{cuda_param}start /b swift eval {params} > {log_file} 2>&1' else: run_command = f'{cuda_param} nohup swift eval {params} > {log_file} 2>&1 &' - return run_command, eval_args, log_file + return command, all_envs, run_command, eval_args, log_file @classmethod def eval_model(cls, *args): - run_command, eval_args, log_file = cls.eval(*args) - os.system(run_command) - time.sleep(2) + command, all_envs, run_command, eval_args, log_file = cls.eval(*args) + run_command_in_background_with_popen(command, all_envs, log_file) return gr.update(open=True), EvalRuntime.refresh_tasks(log_file)
swift/ui/llm_export/llm_export.py+21 −5 modified@@ -5,6 +5,7 @@ import time from datetime import datetime from functools import partial +from subprocess import DEVNULL, PIPE, STDOUT, Popen from typing import Type import gradio as gr @@ -18,6 +19,7 @@ from swift.ui.llm_export.export import Export from swift.ui.llm_export.model import Model from swift.ui.llm_export.runtime import ExportRuntime +from swift.ui.llm_train.llm_train import run_command_in_background_with_popen from swift.utils import get_device_count @@ -139,16 +141,26 @@ def export(cls, *args): for key, value in kwargs.items() }) params = '' + command = ['swift', 'export'] sep = f'{cls.quote} {cls.quote}' for e in kwargs: if isinstance(kwargs[e], list): params += f'--{e} {cls.quote}{sep.join(kwargs[e])}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(kwargs[e])}']) elif e in kwargs_is_list and kwargs_is_list[e]: all_args = [arg for arg in kwargs[e].split(' ') if arg.strip()] params += f'--{e} {cls.quote}{sep.join(all_args)}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(all_args)}']) else: params += f'--{e} {cls.quote}{kwargs[e]}{cls.quote} ' - params += more_params_cmd + ' ' + command.extend([f'--{e}', f'{kwargs[e]}']) + if more_params_cmd != '': + params += f'{more_params_cmd.strip()} ' + more_params_cmd = more_params_cmd.split('--') + more_params_cmd = [param.split(' ') for param in more_params_cmd if param] + for param in more_params_cmd: + command.extend([f'--{param[0]}', ' '.join(param[1:])]) + all_envs = {} devices = other_kwargs['gpu_id'] devices = [d for d in devices if d] assert (len(devices) == 1 or 'cpu' not in devices) @@ -157,8 +169,10 @@ def export(cls, *args): if gpus != 'cpu': if is_torch_npu_available(): cuda_param = f'ASCEND_RT_VISIBLE_DEVICES={gpus}' + all_envs['ASCEND_RT_VISIBLE_DEVICES'] = gpus elif is_torch_cuda_available(): cuda_param = f'CUDA_VISIBLE_DEVICES={gpus}' + all_envs['CUDA_VISIBLE_DEVICES'] = gpus else: cuda_param = '' now = datetime.now() @@ -169,10 +183,13 @@ def export(cls, *args): log_file = os.path.join(os.getcwd(), f'{file_path}/run_export.log') export_args.log_file = log_file params += f'--log_file "{log_file}" ' + command.extend(['--log_file', f'{log_file}']) params += '--ignore_args_error true ' + command.extend(['--ignore_args_error', 'true']) additional_param = '' if export_args.quant_method == 'gptq': additional_param = 'OMP_NUM_THREADS=14' + all_envs['OMP_NUM_THREADS'] = '14' if sys.platform == 'win32': if cuda_param: cuda_param = f'set {cuda_param} && ' @@ -181,11 +198,10 @@ def export(cls, *args): run_command = f'{cuda_param}{additional_param}start /b swift export {params} > {log_file} 2>&1' else: run_command = f'{cuda_param} {additional_param} nohup swift export {params} > {log_file} 2>&1 &' - return run_command, export_args, log_file + return command, all_envs, run_command, export_args, log_file @classmethod def export_model(cls, *args): - run_command, export_args, log_file = cls.export(*args) - os.system(run_command) - time.sleep(2) + command, all_envs, run_command, export_args, log_file = cls.export(*args) + run_command_in_background_with_popen(command, all_envs, log_file) return gr.update(open=True), ExportRuntime.refresh_tasks(log_file)
swift/ui/llm_grpo/external_rollout.py+22 −4 modified@@ -3,8 +3,10 @@ import re import sys import time +from copy import deepcopy from datetime import datetime from functools import partial +from subprocess import DEVNULL, PIPE, STDOUT, Popen from typing import Type import gradio as gr @@ -16,6 +18,7 @@ from swift.llm import DeployArguments, RLHFArguments, RolloutArguments from swift.ui.base import BaseUI from swift.ui.llm_grpo.external_runtime import RolloutRuntime +from swift.ui.llm_train.llm_train import run_command_in_background_with_popen from swift.utils import get_device_count, get_logger logger = get_logger() @@ -186,28 +189,41 @@ def rollout(cls, *args): if rollout_args.port in RolloutRuntime.get_all_ports(): raise gr.Error(cls.locale('port_alert', cls.lang)['value']) params = '' + command = ['swift', 'rollout'] sep = f'{cls.quote} {cls.quote}' for e in kwargs: if isinstance(kwargs[e], list): params += f'--{e} {cls.quote}{sep.join(kwargs[e])}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(kwargs[e])}']) elif e in kwargs_is_list and kwargs_is_list[e]: all_args = [arg for arg in kwargs[e].split(' ') if arg.strip()] params += f'--{e} {cls.quote}{sep.join(all_args)}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(all_args)}']) else: params += f'--{e} {cls.quote}{kwargs[e]}{cls.quote} ' + command.extend([f'--{e}', f'{kwargs[e]}']) if 'port' not in kwargs: params += f'--port "{rollout_args.port}" ' - params += more_params_cmd + ' ' + command.extend(['--port', f'{rollout_args.port}']) + if more_params_cmd != '': + params += f'{more_params_cmd.strip()} ' + more_params_cmd = more_params_cmd.split('--') + more_params_cmd = [param.split(' ') for param in more_params_cmd if param] + for param in more_params_cmd: + command.extend([f'--{param[0]}', ' '.join(param[1:])]) devices = other_kwargs['rollout_gpu_id'] devices = [d for d in devices if d] assert (len(devices) == 1 or 'cpu' not in devices) gpus = ','.join(devices) cuda_param = '' + all_envs = {} if gpus != 'cpu': if is_torch_npu_available(): cuda_param = f'ASCEND_RT_VISIBLE_DEVICES={gpus}' + all_envs['ASCEND_RT_VISIBLE_DEVICES'] = gpus elif is_torch_cuda_available(): cuda_param = f'CUDA_VISIBLE_DEVICES={gpus}' + all_envs['CUDA_VISIBLE_DEVICES'] = gpus else: cuda_param = '' output_dir = 'rollout_output' @@ -219,20 +235,22 @@ def rollout(cls, *args): log_file = os.path.join(os.getcwd(), f'{file_path}/run_rollout.log') rollout_args.log_file = log_file params += f'--log_file "{log_file}" ' + command.extend(['--log_file', f'{log_file}']) params += '--ignore_args_error true ' + command.extend(['--ignore_args_error', 'true']) if sys.platform == 'win32': if cuda_param: cuda_param = f'set {cuda_param} && ' run_command = f'{cuda_param}start /b swift rollout {params} > {log_file} 2>&1' else: run_command = f'{cuda_param} nohup swift rollout {params} > {log_file} 2>&1 &' - return run_command, rollout_args, log_file + return command, all_envs, run_command, rollout_args, log_file @classmethod def rollout_model(cls, *args): - run_command, rollout_args, log_file = cls.rollout(*args) + command, all_envs, run_command, rollout_args, log_file = cls.rollout(*args) logger.info(f'Running rollout command: {run_command}') - os.system(run_command) + run_command_in_background_with_popen(command, all_envs, log_file) gr.Info(cls.locale('load_alert', cls.lang)['value']) time.sleep(2) running_task = RolloutRuntime.refresh_tasks(log_file)
swift/ui/llm_grpo/external_runtime.py+12 −5 modified@@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os +import subprocess import sys import time from typing import Dict, List, Tuple, Type @@ -121,14 +122,20 @@ def kill_task(cls, task): log_file = all_args['log_file'] parent_process = psutil.Process(int(pid)) children = parent_process.children(recursive=True) + commands = [] if sys.platform == 'win32': - os.system(f'taskkill /f /t /pid "{pid}"') + commands.append(['taskkill', '/f', '/t', '/pid', pid]) for child in children: - os.system(f'taskkill /f /t /pid "{child.pid}"') + commands.append(['taskkill', '/f', '/t', '/pid', f'{str(child.pid)}']) else: - os.system(f'pkill -9 -f {log_file}') + commands.append(['pkill', '-9', '-f', log_file]) for child in children: - os.system(f'kill -9 {child.pid}') - time.sleep(1) + commands.append(['kill', '-9', f'{str(child.pid)}']) + for cmd in commands: + try: + result = subprocess.run(cmd, capture_output=True, text=True) + assert result.returncode == 0 + except Exception as e: + raise e cls.break_log_event(task) return [cls.refresh_tasks()] + [gr.update(value=None)]
swift/ui/llm_infer/llm_infer.py+20 −5 modified@@ -19,6 +19,7 @@ from swift.ui.base import BaseUI from swift.ui.llm_infer.model import Model from swift.ui.llm_infer.runtime import Runtime +from swift.ui.llm_train.llm_train import run_command_in_background_with_popen from swift.utils import get_device_count, get_logger logger = get_logger() @@ -232,18 +233,29 @@ def deploy(cls, *args): if deploy_args.port in Runtime.get_all_ports(): raise gr.Error(cls.locale('port_alert', cls.lang)['value']) params = '' + command = ['swift', 'deploy'] sep = f'{cls.quote} {cls.quote}' for e in kwargs: if isinstance(kwargs[e], list): params += f'--{e} {cls.quote}{sep.join(kwargs[e])}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(kwargs[e])}']) elif e in kwargs_is_list and kwargs_is_list[e]: all_args = [arg for arg in kwargs[e].split(' ') if arg.strip()] params += f'--{e} {cls.quote}{sep.join(all_args)}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(all_args)}']) else: params += f'--{e} {cls.quote}{kwargs[e]}{cls.quote} ' + command.extend([f'--{e}', f'{kwargs[e]}']) if 'port' not in kwargs: params += f'--port "{deploy_args.port}" ' - params += more_params_cmd + ' ' + command.extend(['--port', f'{deploy_args.port}']) + if more_params_cmd != '': + params += f'{more_params_cmd.strip()} ' + more_params_cmd = more_params_cmd.split('--') + more_params_cmd = [param.split(' ') for param in more_params_cmd if param] + for param in more_params_cmd: + command.extend([f'--{param[0]}', ' '.join(param[1:])]) + all_envs = {} devices = other_kwargs['gpu_id'] devices = [d for d in devices if d] assert (len(devices) == 1 or 'cpu' not in devices) @@ -252,8 +264,10 @@ def deploy(cls, *args): if gpus != 'cpu': if is_torch_npu_available(): cuda_param = f'ASCEND_RT_VISIBLE_DEVICES={gpus}' + all_envs['ASCEND_RT_VISIBLE_DEVICES'] = gpus elif is_torch_cuda_available(): cuda_param = f'CUDA_VISIBLE_DEVICES={gpus}' + all_envs['CUDA_VISIBLE_DEVICES'] = gpus else: cuda_param = '' now = datetime.now() @@ -264,22 +278,23 @@ def deploy(cls, *args): log_file = os.path.join(os.getcwd(), f'{file_path}/run_deploy.log') deploy_args.log_file = log_file params += f'--log_file "{log_file}" ' + command.extend(['--log_file', f'{log_file}']) params += '--ignore_args_error true ' + command.extend(['--ignore_args_error', 'true']) if sys.platform == 'win32': if cuda_param: cuda_param = f'set {cuda_param} && ' run_command = f'{cuda_param}start /b swift deploy {params} > {log_file} 2>&1' else: run_command = f'{cuda_param} nohup swift deploy {params} > {log_file} 2>&1 &' - return run_command, deploy_args, log_file + return command, all_envs, run_command, deploy_args, log_file @classmethod def deploy_model(cls, *args): - run_command, deploy_args, log_file = cls.deploy(*args) + command, all_envs, run_command, deploy_args, log_file = cls.deploy(*args) logger.info(f'Running deployment command: {run_command}') - os.system(run_command) + run_command_in_background_with_popen(command, all_envs, log_file) gr.Info(cls.locale('load_alert', cls.lang)['value']) - time.sleep(2) running_task = Runtime.refresh_tasks(log_file) return gr.update(open=True), running_task
swift/ui/llm_infer/runtime.py+8 −3 modified@@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import collections import os.path +import subprocess import sys import time from datetime import datetime @@ -250,10 +251,14 @@ def kill_task(cls, task): pid, all_args = cls.parse_info_from_cmdline(task) log_file = all_args['log_file'] if sys.platform == 'win32': - os.system(f'taskkill /f /t /pid "{pid}"') + command = ['taskkill', '/f', '/t', '/pid', pid] else: - os.system(f'pkill -9 -f {log_file}') - time.sleep(1) + command = ['pkill', '-9', '-f', log_file] + try: + result = subprocess.run(command, capture_output=True, text=True) + assert result.returncode == 0 + except Exception as e: + raise e cls.break_log_event(task) return [cls.refresh_tasks()] + [gr.update(value=None)]
swift/ui/llm_sample/llm_sample.py+21 −5 modified@@ -3,8 +3,10 @@ import re import sys import time +from copy import deepcopy from datetime import datetime from functools import partial +from subprocess import DEVNULL, PIPE, STDOUT, Popen from typing import Type import gradio as gr @@ -18,6 +20,7 @@ from swift.ui.llm_sample.model import Model from swift.ui.llm_sample.runtime import SampleRuntime from swift.ui.llm_sample.sample import Sample +from swift.ui.llm_train.utils import run_command_in_background_with_popen from swift.utils import get_device_count, get_logger logger = get_logger() @@ -208,17 +211,26 @@ def sample(cls, *args): }) params = '' + command = ['swift', 'sample'] sep = f'{cls.quote} {cls.quote}' for e in kwargs: if isinstance(kwargs[e], list): params += f'--{e} {cls.quote}{sep.join(kwargs[e])}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(kwargs[e])}']) elif e in kwargs_is_list and kwargs_is_list[e]: all_args = [arg for arg in kwargs[e].split(' ') if arg.strip()] params += f'--{e} {cls.quote}{sep.join(all_args)}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(all_args)}']) else: params += f'--{e} {cls.quote}{kwargs[e]}{cls.quote} ' - - params += more_params_cmd + ' ' + command.extend([f'--{e}', f'{kwargs[e]}']) + if more_params_cmd != '': + params += more_params_cmd + ' ' + more_params_cmd = more_params_cmd.split('--') + more_params_cmd = [param.split(' ') for param in more_params_cmd if param] + for param in more_params_cmd: + command.extend([f'--{param[0]}', ' '.join(param[1:])]) + all_envs = {} devices = other_kwargs['gpu_id'] devices = [d for d in devices if d] assert (len(devices) == 1 or 'cpu' not in devices) @@ -227,8 +239,10 @@ def sample(cls, *args): if gpus != 'cpu': if is_torch_npu_available(): cuda_param = f'ASCEND_RT_VISIBLE_DEVICES={gpus}' + all_envs['ASCEND_RT_VISIBLE_DEVICES'] = gpus elif is_torch_cuda_available(): cuda_param = f'CUDA_VISIBLE_DEVICES={gpus}' + all_envs['CUDA_VISIBLE_DEVICES'] = gpus else: cuda_param = '' now = datetime.now() @@ -239,20 +253,22 @@ def sample(cls, *args): log_file = os.path.join(os.getcwd(), f'{file_path}/run_sample.log') sample_args.log_file = log_file params += f'--log_file "{log_file}" ' + command.extend(['--log_file', f'{log_file}']) params += '--ignore_args_error true ' + command.extend(['--ignore_args_error', 'true']) if sys.platform == 'win32': if cuda_param: cuda_param = f'set {cuda_param} && ' run_command = f'{cuda_param}start /b swift sample {params} > {log_file} 2>&1' else: run_command = f'{cuda_param} nohup swift sample {params} > {log_file} 2>&1 &' - return run_command, sample_args, log_file + return command, all_envs, run_command, sample_args, log_file @classmethod def sample_model(cls, *args): - run_command, sample_args, log_file = cls.sample(*args) + command, all_envs, run_command, sample_args, log_file = cls.sample(*args) logger.info(f'Running sample command: {run_command}') - os.system(run_command) + run_command_in_background_with_popen(command, all_envs, log_file) gr.Info(cls.locale('load_alert', cls.lang)['value']) time.sleep(2) running_task = SampleRuntime.refresh_tasks(log_file)
swift/ui/llm_train/llm_train.py+38 −9 modified@@ -4,8 +4,9 @@ import re import sys import time +from copy import deepcopy from functools import partial -from subprocess import PIPE, STDOUT, Popen +from subprocess import DEVNULL, PIPE, STDOUT, Popen from typing import Dict, Type import gradio as gr @@ -28,6 +29,7 @@ from swift.ui.llm_train.self_cog import SelfCog from swift.ui.llm_train.task import Task from swift.ui.llm_train.tuner import Tuner +from swift.ui.llm_train.utils import run_command_in_background_with_popen from swift.utils import get_device_count, get_logger logger = get_logger() @@ -406,27 +408,43 @@ def train(cls, *args): except Exception as e: raise e params = '' - + command = ['swift', cmd] if cls.group == 'llm_grpo' and sys.platform != 'win32': params += f'--rlhf_type {cls.quote}grpo{cls.quote} ' + command.extend(['--rlhf_type', 'grpo']) sep = f'{cls.quote} {cls.quote}' for e in kwargs: if isinstance(kwargs[e], list): params += f'--{e} {cls.quote}{sep.join(kwargs[e])}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(kwargs[e])}']) elif e in kwargs_is_list and kwargs_is_list[e]: all_args = [arg for arg in kwargs[e].split(' ') if arg.strip()] params += f'--{e} {cls.quote}{sep.join(all_args)}{cls.quote} ' + command.extend([f'--{e}', f'{" ".join(all_args)}']) else: params += f'--{e} {cls.quote}{kwargs[e]}{cls.quote} ' + command.extend([f'--{e}', f'{kwargs[e]}']) if use_liger_kernel: params += f'--use_liger_kernel {cls.quote}{use_liger_kernel}{cls.quote} ' + command.extend(['--use_liger_kernel', f'{use_liger_kernel}']) if use_muon: params += f'--optimizer {cls.quote}muon{cls.quote} ' + command.extend(['--optimizer', 'muon']) + more_params_cmd = more_params_cmd.strip() if more_params_cmd != '': - params += f'{more_params_cmd.strip()} ' + params += f'{more_params_cmd} ' + more_params_cmd = more_params_cmd.split('--') + more_params_cmd = [param.split(' ') for param in more_params_cmd if param] + for param in more_params_cmd: + command.extend([f'--{param[0]}', ' '.join(param[1:])]) params += f'--add_version False --output_dir {sft_args.output_dir} ' \ f'--logging_dir {sft_args.logging_dir} --ignore_args_error True' + command.extend([ + '--add_version', 'False', '--output_dir', f'{sft_args.output_dir}', '--logging_dir', + f'{sft_args.logging_dir}', '--ignore_args_error', 'True' + ]) + all_envs = {} ddp_param = '' devices = other_kwargs['gpu_id'] envs = other_kwargs['envs'] or '' @@ -435,17 +453,24 @@ def train(cls, *args): if other_kwargs['use_ddp']: assert int(other_kwargs['ddp_num']) > 0 ddp_param = f'NPROC_PER_NODE={int(other_kwargs["ddp_num"])}' + all_envs['NPROC_PER_NODE'] = str(other_kwargs['ddp_num']) assert (len(devices) == 1 or 'cpu' not in devices) gpus = ','.join(devices) cuda_param = '' if gpus != 'cpu': if is_torch_npu_available(): cuda_param = f'ASCEND_RT_VISIBLE_DEVICES={gpus}' + all_envs['ASCEND_RT_VISIBLE_DEVICES'] = gpus elif is_torch_cuda_available(): cuda_param = f'CUDA_VISIBLE_DEVICES={gpus}' + all_envs['CUDA_VISIBLE_DEVICES'] = gpus else: cuda_param = '' - + if envs: + envs = envs.split(' ') + for env in envs: + k, v = env.split('=') + all_envs[k] = v log_file = os.path.join(sft_args.logging_dir, 'run.log') if sys.platform == 'win32': if cuda_param: @@ -468,14 +493,18 @@ def train(cls, *args): if key in default_args or key in ('more_params', 'train_stage', 'use_ddp', 'ddp_num', 'gpu_id', 'envs'): record[key] = value or None cls.save_cache(model, record) - return run_command, sft_args, other_kwargs + return command, all_envs, log_file, run_command, sft_args, other_kwargs @classmethod def train_studio(cls, *args): - run_command, sft_args, other_kwargs = cls.train(*args) + command, all_envs, log_file, run_command, sft_args, other_kwargs = cls.train(*args) if not other_kwargs['dry_run']: lines = collections.deque(maxlen=int(os.environ.get('MAX_LOG_LINES', 50))) - process = Popen(run_command, shell=True, stdout=PIPE, stderr=STDOUT) + env = deepcopy(os.environ) + if len(all_envs) > 0: + for k, v in all_envs.items(): + env[k] = v + process = Popen(command, env=env, stdout=PIPE, stderr=STDOUT) with process.stdout: for line in iter(process.stdout.readline, b''): line = line.decode('utf-8') @@ -489,7 +518,7 @@ def train_studio(cls, *args): @classmethod def train_local(cls, *args): - run_command, sft_args, other_kwargs = cls.train(*args) + command, all_envs, log_file, run_command, sft_args, other_kwargs = cls.train(*args) if cls.group == 'llm_grpo' and sft_args.vllm_mode == 'server': host = sft_args.vllm_server_host if sft_args.vllm_server_host else '127.0.0.1' port = sft_args.vllm_server_port if sft_args.vllm_server_port else '8000' @@ -505,7 +534,7 @@ def train_local(cls, *args): return [None] * 2 + [gr.update(open=False)] + [None] * 2 if not other_kwargs['dry_run']: os.makedirs(sft_args.logging_dir, exist_ok=True) - os.system(run_command) + run_command_in_background_with_popen(command, all_envs, log_file) time.sleep(1) # to make sure the log file has been created. gr.Info(cls.locale('submit_alert', cls.lang)['value']) return run_command, sft_args.logging_dir, gr.update(open=True), Runtime.refresh_tasks(
swift/ui/llm_train/runtime.py+13 −7 modified@@ -2,6 +2,7 @@ import collections import os import re +import subprocess import sys import time import webbrowser @@ -561,18 +562,19 @@ def parse_info_from_cmdline(task): for i in range(len(args)): space = args[i].find(' ') splits = args[i][:space], args[i][space + 1:] - all_args[splits[0]] = splits[1] + all_args[splits[0]] = str(splits[1]) if isinstance(splits[1], int) else splits[1] output_dir = all_args['output_dir'] if os.path.exists(os.path.join(output_dir, 'args.json')): with open(os.path.join(output_dir, 'args.json'), 'r', encoding='utf-8') as f: _json = json.load(f) for key in all_args.keys(): - all_args[key] = _json.get(key) + all_args[key] = str(_json.get(key)) if isinstance(_json.get(key), int) else _json.get(key) if isinstance(all_args[key], list): - if any([' ' in value for value in all_args[key]]): + if any([' ' in value for value in all_args[key] if isinstance(value, str)]): all_args[key] = [f'"{value}"' for value in all_args[key]] - all_args[key] = ' '.join(all_args[key]) + if len(all_args[key]) > 0 and isinstance(all_args[key][0], str): + all_args[key] = ' '.join(all_args[key]) return pid, all_args @staticmethod @@ -581,10 +583,14 @@ def kill_task(task): pid, all_args = Runtime.parse_info_from_cmdline(task) output_dir = all_args['output_dir'] if sys.platform == 'win32': - os.system(f'taskkill /f /t /pid "{pid}"') + command = ['taskkill', '/f', '/t', '/pid', pid] else: - os.system(f'pkill -9 -f {output_dir}') - time.sleep(1) + command = ['pkill', '-9', '-f', output_dir] + try: + result = subprocess.run(command, capture_output=True, text=True) + assert result.returncode == 0 + except Exception as e: + raise e Runtime.break_log_event(task) return [Runtime.refresh_tasks()] + [gr.update(value=None)] * (len(Runtime.get_plot(task)) + 1)
swift/ui/llm_train/utils.py+21 −0 modified@@ -1,7 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import asyncio +import os +import subprocess import sys from asyncio.subprocess import PIPE, STDOUT +from copy import deepcopy async def run_and_get_log(*args, timeout=None): @@ -35,3 +38,21 @@ def close_loop(handler): loop, process = handler process.kill() loop.close() + + +def run_command_in_background_with_popen(command, all_envs, log_file): + env = deepcopy(os.environ) + if len(all_envs) > 0: + for k, v in all_envs.items(): + env[k] = v + daemon_kwargs = {} + if sys.platform == 'win32': + from subprocess import DETACHED_PROCESS, CREATE_NO_WINDOW + daemon_kwargs['creationflags'] = DETACHED_PROCESS | CREATE_NO_WINDOW + daemon_kwargs['close_fds'] = True + else: + daemon_kwargs['preexec_fn'] = os.setsid + + with open(log_file, 'w', encoding='utf-8') as f: + subprocess.Popen( + command, stdout=f, stderr=subprocess.STDOUT, stdin=subprocess.DEVNULL, text=True, bufsize=1, env=env)
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
3News mentions
0No linked articles in our index yet.