CVE-2025-15036
Description
A path traversal vulnerability exists in the extract_archive_to_dir function within the mlflow/pyfunc/dbconnect_artifact_cache.py file of the mlflow/mlflow repository. This vulnerability, present in versions before v3.7.0, arises due to the lack of validation of tar member paths during extraction. An attacker with control over the tar.gz file can exploit this issue to overwrite arbitrary files or gain elevated privileges, potentially escaping the sandbox directory in multi-tenant or shared cluster environments.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
mlflowPyPI | < 3.9.0rc0 | 3.9.0rc0 |
Affected products
1Patches
13bf6d81ac4d3Check security before extracting tar file (#19557)
3 files changed · +106 −1
mlflow/pyfunc/dbconnect_artifact_cache.py+2 −1 modified@@ -4,7 +4,7 @@ import tarfile from mlflow.utils.databricks_utils import is_in_databricks_runtime -from mlflow.utils.file_utils import get_or_create_tmp_dir +from mlflow.utils.file_utils import check_tarfile_security, get_or_create_tmp_dir _CACHE_MAP_FILE_NAME = "db_connect_artifact_cache.json" @@ -138,6 +138,7 @@ def archive_directory(input_dir, archive_file_path): def extract_archive_to_dir(archive_path, dest_dir): + check_tarfile_security(archive_path) os.makedirs(dest_dir, exist_ok=True) with tarfile.open(archive_path, "r") as tar: tar.extractall(path=dest_dir)
mlflow/utils/file_utils.py+38 −0 modified@@ -1013,3 +1013,41 @@ def __exit__( # Release lock fcntl.flock(self.fd, fcntl.LOCK_UN) self.fd.close() + + +def check_tarfile_security(archive_path: str) -> None: + """ + Check the tar file content. + If its members contain any of the following paths: + * An absolute path. + * A relative path that escapes the extraction directory. + * A relative path that goes through a symlink. + then raise an error. + """ + with tarfile.open(archive_path, "r") as tar: + symlink_set = set() + for m in tar.getmembers(): + path = posixpath.normpath(m.name) + if m.issym(): + symlink_set.add(path) + else: + if path.startswith("/"): + raise MlflowException( + "Absolute path destination in the archive file is not allowed, " + f"but got path {path}." + ) + path_parts = path.split("/") + if path_parts[0] == "..": + raise MlflowException( + "Escaped path destination in the archive file is not allowed, " + f"but got path {path}." + ) + for m in tar.getmembers(): + if not m.issym(): + for prefix_len in range(1, len(path_parts) + 1): + prefix_path = "/".join(path_parts[:prefix_len]) + if prefix_path in symlink_set: + raise MlflowException( + "Destination path in the archive file can not go through a symlink, " + f"but got path {path}." + )
tests/utils/test_file_utils.py+66 −0 modified@@ -1,5 +1,6 @@ import filecmp import hashlib +import io import os import shutil import stat @@ -10,11 +11,13 @@ from pyspark.sql import SparkSession import mlflow +from mlflow.exceptions import MlflowException from mlflow.utils import file_utils from mlflow.utils.file_utils import ( TempDir, _copy_file_or_tree, _handle_readonly_on_windows, + check_tarfile_security, get_parent_dir, get_total_file_size, local_file_uri_to_path, @@ -220,3 +223,66 @@ def generate_file(path, size_in_bytes): path_file = tmp_path.joinpath("file1.txt") assert get_total_file_size(path_file) is None + + +def test_check_tarfile_security(tmp_path): + def create_tar_with_escaped_path(tar_path: str, escaped_path: str, content: bytes) -> None: + """Create tar with path traversal entry.""" + with tarfile.open(tar_path, "w:gz") as tar: + # Add traversal file + data = io.BytesIO(content) + info = tarfile.TarInfo(name=escaped_path) + info.size = len(content) + tar.addfile(info, data) + + tar1_path = str(tmp_path.joinpath("file1.tar")) + create_tar_with_escaped_path(tar1_path, "../pwned2.txt", b"ABX") + with pytest.raises( + MlflowException, match="Escaped path destination in the archive file is not allowed" + ): + check_tarfile_security(tar1_path) + + def create_tar_with_symlink( + tar_path: str, link_name: str, link_target: str, file_via_link: str, content: bytes + ) -> None: + """Create tar with symlink that points outside, then file through symlink.""" + with tarfile.open(tar_path, "w:gz") as tar: + # First: create a symlink pointing to parent directory + link_info = tarfile.TarInfo(name=link_name) + link_info.type = tarfile.SYMTYPE + link_info.linkname = link_target + tar.addfile(link_info) + # Second: create a file that goes through the symlink + data = io.BytesIO(content) + file_info = tarfile.TarInfo(name=file_via_link) + file_info.size = len(content) + tar.addfile(file_info, data) + + tar2_path = str(tmp_path.joinpath("file2.tar")) + create_tar_with_symlink( + tar2_path, + link_name="escape", + link_target="..", + file_via_link="escape/pwned.txt", + content=b"XYZ", + ) + with pytest.raises( + MlflowException, match="Destination path in the archive file can not go through a symlink" + ): + check_tarfile_security(tar2_path) + + def create_tar_with_abs_path(tar_path: str, abs_path: str, content: bytes) -> None: + """Create tar with path traversal entry.""" + with tarfile.open(tar_path, "w:gz") as tar: + # Add traversal file + data = io.BytesIO(content) + info = tarfile.TarInfo(name=abs_path) + info.size = len(content) + tar.addfile(info, data) + + tar3_path = str(tmp_path.joinpath("file3.tar")) + create_tar_with_abs_path(tar3_path, "/tmp/pwned2.txt", b"ABX") + with pytest.raises( + MlflowException, match="Absolute path destination in the archive file is not allowed" + ): + check_tarfile_security(tar3_path)
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
4- github.com/mlflow/mlflow/commit/3bf6d81ac4d38654c8ff012dbd0c3e9f17e7e346nvdPatchWEB
- github.com/advisories/GHSA-vhcx-3pq2-4fvcghsaADVISORY
- huntr.com/bounties/36c314cf-fd6e-4fb0-b9b0-1b47bcdf0eb0nvdThird Party AdvisoryExploitWEB
- nvd.nist.gov/vuln/detail/CVE-2025-15036ghsaADVISORY
News mentions
0No linked articles in our index yet.