CVE-2026-35492
Description
Kedro-Datasets is a Kendo plugin providing data connectors. Prior to 9.3.0, PartitionedDataset in kedro-datasets was vulnerable to path traversal. Partition IDs were concatenated directly with the dataset base path without validation. An attacker or malicious input containing .. components in a partition ID could cause files to be written outside the configured dataset directory, potentially overwriting arbitrary files on the filesystem. Users of PartitionedDataset with any storage backend (local filesystem, S3, GCS, etc.) are affected. This vulnerability is fixed in 9.3.0.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
kedro-datasetsPyPI | < 9.3.0 | 9.3.0 |
Affected products
1Patches
165115f76b872fix(datasets): Ensure partition paths remain within dataset directory (#1346)
3 files changed · +177 −10
kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py+51 −4 modified@@ -4,6 +4,7 @@ from __future__ import annotations +import posixpath from collections.abc import Callable from copy import deepcopy from pathlib import PurePosixPath @@ -273,17 +274,63 @@ def _join_protocol(self, path: str) -> str: return f"{protocol_prefix}{path}" return path + def _validate_partition_path(self, path: str, dir_path: str) -> None: + """Validate that the partition path is within the base directory. + + Args: + path: The partition path to validate + dir_path: The base directory path + + Raises: + DatasetError: If the path resolves outside the base directory + """ + # Normalize only for validation - handle Windows backslashes + # fsspec uses forward slashes internally, so we normalize to forward slashes + path_to_check = path.replace("\\", "/").lstrip("/") + full_path_to_check = self._sep.join([dir_path, path_to_check]) + + # Normalize the path to resolve any '..' or '.' components for the security check. + # posixpath is used intentionally here as fsspec normalizes all paths to + # forward-slash separated strings regardless of OS (including Windows), so + # this is safe for both local and remote (S3, GCS, etc.) filesystems as long + # as paths have gone through fsspec's normalization before reaching this point. + normalized_full_path = posixpath.normpath(full_path_to_check) + normalized_base_path = posixpath.normpath(dir_path) + + # Ensure the normalized path is within the base directory + # Check that normalized path starts with base path followed by separator or is exactly base path + if not ( + normalized_full_path == normalized_base_path + or normalized_full_path.startswith(normalized_base_path + self._sep) + ): + raise DatasetError( + f"Partition ID '{path}' resolves to '{normalized_full_path}' " + f"which is outside the dataset directory '{dir_path}'." + ) + def _partition_to_path(self, path: str): - dir_path = self._path.rstrip(self._sep) + dir_path = self._filesystem._strip_protocol(self._normalized_path).rstrip( + self._sep + ) path = path.lstrip(self._sep) - full_path = self._sep.join([dir_path, path]) + self._filename_suffix - return full_path + + # Validate the path is within the base directory + self._validate_partition_path(path, dir_path) + + full_path = self._sep.join([dir_path, path]) + return full_path + self._filename_suffix def _path_to_partition(self, path: str) -> str: - dir_path = self._filesystem._strip_protocol(self._normalized_path) + dir_path = self._filesystem._strip_protocol(self._normalized_path).rstrip( + self._sep + ) path = path.split(dir_path, 1).pop().lstrip(self._sep) if self._filename_suffix and path.endswith(self._filename_suffix): path = path[: -len(self._filename_suffix)] + + # Validate the partition ID to ensure it doesn't escape the base directory + self._validate_partition_path(path, dir_path) + return path def load(self) -> dict[str, Callable[[], Any]]:
kedro-datasets/tests/partitions/test_partitioned_dataset.py+120 −0 modified@@ -443,6 +443,126 @@ def test_no_partitions(self, tmpdir): with pytest.raises(DatasetError, match=pattern): pds.load() + @pytest.mark.parametrize( + "safe_partition_id", + [ + "data1", + "partition_a", + "year=2024/month=01/data", + "v1.0.0", + "file.backup", + "...data", + "foo/bar/baz", + ".hidden", + ], + ) + def test_load_partition_safe_paths(self, tmpdir, safe_partition_id): + """Test legitimate partition IDs can be loaded without error.""" + pds = PartitionedDataset(path=str(tmpdir), dataset="pandas.CSVDataset") + original_data = pd.DataFrame({"foo": 42, "bar": ["a", "b", None]}) + pds.save({safe_partition_id: original_data}) + + loaded = pds.load() + assert safe_partition_id in loaded + assert_frame_equal(loaded[safe_partition_id](), original_data) + + @pytest.mark.parametrize( + "unsafe_partition_id", + [ + "..", + "../secrets", + "../../../secrets", + "foo/../../secrets", + # Windows paths with backslashes + "..\\secrets", + "..\\..\\secrets", + "foo\\..\\..\\secrets", + ], + ) + def test_load_partition_unsafe_paths(self, tmpdir, mocker, unsafe_partition_id): + """Test path traversal partition IDs are rejected during load.""" + pds = PartitionedDataset(path=str(tmpdir), dataset="pandas.CSVDataset") + dir_path = pds._filesystem._strip_protocol(pds._normalized_path).rstrip( + pds._sep + ) + malicious_full_path = f"{dir_path}/{unsafe_partition_id}/data.csv" + mocker.patch.object(pds, "_list_partitions", return_value=[malicious_full_path]) + + with pytest.raises(DatasetError, match="outside the dataset directory"): + pds.load() + + @pytest.mark.parametrize( + "safe_partition_id", + [ + "data1", + "partition_a", + "year=2024/month=01/data", + "v1.0.0", + "file.backup", + "...data", + "foo/bar/baz", + ".hidden", + ], + ) + def test_save_partition_safe_paths(self, tmpdir, safe_partition_id): + """Test legitimate partition IDs can be saved without error.""" + pds = PartitionedDataset(path=str(tmpdir), dataset="pandas.CSVDataset") + original_data = pd.DataFrame({"foo": 42, "bar": ["a", "b", None]}) + pds.save({safe_partition_id: original_data}) + + @pytest.mark.parametrize( + "unsafe_partition_id", + [ + "..", + "../secrets", + "../../../secrets", + "foo/../../secrets", + # Windows paths with backslashes + "..\\secrets", + "..\\..\\secrets", + "foo\\..\\..\\secrets", + ], + ) + def test_save_partition_unsafe_paths(self, tmpdir, unsafe_partition_id): + """Test path traversal partition IDs are rejected during save.""" + pds = PartitionedDataset(path=str(tmpdir), dataset="pandas.CSVDataset") + original_data = pd.DataFrame({"foo": 42, "bar": ["a", "b", None]}) + + with pytest.raises(DatasetError, match="outside the dataset directory"): + pds.save({unsafe_partition_id: original_data}) + + def test_unsafe_partition_error_message(self, tmpdir): + """Test DatasetError message includes the resolved path and base directory.""" + pds = PartitionedDataset(path=str(tmpdir), dataset="pandas.CSVDataset") + original_data = pd.DataFrame({"foo": 42, "bar": ["a", "b", None]}) + + with pytest.raises( + DatasetError, + match=r"Partition ID '.*' resolves to '.*' which is outside the dataset directory '.*'\.", + ): + pds.save({"../secrets": original_data}) + + @pytest.mark.parametrize( + "partition_id,expected_key", + [ + # Intra-directory traversal — stays within base so validation passes, + # but the OS resolves '..' / '.' on write so the loaded key is normalised. + ("a/../b", "b"), # written to base/b.csv + ("sub/./data", "sub/data"), # written to base/sub/data.csv + ("a/b/../c/d", "a/c/d"), # written to base/a/c/d.csv + ], + ) + def test_safe_partition_expected_key(self, tmpdir, partition_id, expected_key): + """Test intra-directory traversal paths pass validation but load under + the OS-normalised key, not the original partition_id.""" + pds = PartitionedDataset(path=str(tmpdir), dataset="pandas.CSVDataset") + original_data = pd.DataFrame({"foo": 42, "bar": ["a", "b", None]}) + pds.save({partition_id: original_data}) + + loaded = pds.load() + assert expected_key in loaded + assert_frame_equal(loaded[expected_key](), original_data) + @pytest.mark.parametrize( "pds_config,filepath_arg", [
.secrets.baseline+6 −6 modified@@ -310,35 +310,35 @@ "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py", "hashed_secret": "76f747de912e8682e29a23cb506dd5bf0de080d2", "is_verified": false, - "line_number": 479 + "line_number": 599 }, { "type": "Secret Keyword", "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py", "hashed_secret": "9027cc5a2c1321de60a2d71ccde6229d1152d6d3", "is_verified": false, - "line_number": 480 + "line_number": 600 }, { "type": "Secret Keyword", "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py", "hashed_secret": "5dcbdf371f181b9b7a41a4be7be70f8cbee67da7", "is_verified": false, - "line_number": 516 + "line_number": 636 }, { "type": "Secret Keyword", "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py", "hashed_secret": "727d8ff68b6b550f2cf6e737b3cad5149c65fe5b", "is_verified": false, - "line_number": 567 + "line_number": 687 }, { "type": "Secret Keyword", "filename": "kedro-datasets/tests/partitions/test_partitioned_dataset.py", "hashed_secret": "adb5fabe51f5b45e83fdd91b71c92156fec4a63e", "is_verified": false, - "line_number": 587 + "line_number": 707 } ], "kedro-datasets/tests/plotly/test_html_dataset.py": [ @@ -460,5 +460,5 @@ } ] }, - "generated_at": "2026-02-28T10:57:30Z" + "generated_at": "2026-03-18T05:51:45Z" }
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
7- github.com/advisories/GHSA-cjg8-h5qc-hrjvghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-35492ghsaADVISORY
- github.com/kedro-org/kedro-plugins/commit/65115f76b872217317734b6bde8927170c98fc4bghsaWEB
- github.com/kedro-org/kedro-plugins/pull/1346nvdWEB
- github.com/kedro-org/kedro-plugins/releases/tag/kedro-datasets-9.3.0ghsaWEB
- github.com/kedro-org/kedro-plugins/security/advisories/GHSA-cjg8-h5qc-hrjvnvdWEB
- github.com/kedro-org/kedro/issues/5452nvdWEB
News mentions
0No linked articles in our index yet.