Moderate severityNVD Advisory· Published Dec 11, 2023· Updated Aug 2, 2024
MindsDB Arbitrary File Write vulnerability
CVE-2023-49796
Description
MindsDB connects artificial intelligence models to real time data. Versions prior to 23.11.4.1 contain a limited file write vulnerability in file.py Users should use MindsDB's staging branch or v23.11.4.1, which contain a fix for the issue.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
mindsdbPyPI | < 23.11.4.1 | 23.11.4.1 |
Affected products
1Patches
14 files changed · +65 −6
mindsdb/api/http/namespaces/file.py+9 −3 modified@@ -14,6 +14,7 @@ from mindsdb.utilities.config import Config from mindsdb.utilities.context import context as ctx from mindsdb.utilities import log +from mindsdb.utilities.security import is_private_url, clear_filename logger = log.getLogger(__name__) @@ -51,7 +52,7 @@ def on_field(field): def on_file(file): nonlocal file_object - data["file"] = file.file_name.decode() + data["file"] = clear_filename(file.file_name.decode()) file_object = file.file_object temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_") @@ -91,10 +92,15 @@ def on_file(file): if data.get("source_type") == "url": url = data["source"] - data["file"] = data["name"] + data["file"] = clear_filename(data["name"]) config = Config() is_cloud = config.get("cloud", False) + if is_cloud and is_private_url(url): + return http_error( + 400, f'URL is private: {url}' + ) + if is_cloud is True and ctx.user_class != 1: info = requests.head(url) file_size = info.headers.get("Content-Length") @@ -123,7 +129,7 @@ def on_file(file): for chunk in r.iter_content(chunk_size=8192): f.write(chunk) - original_file_name = data.get("original_file_name") + original_file_name = clear_filename(data.get("original_file_name")) file_path = os.path.join(temp_dir_path, data["file"]) lp = file_path.lower()
mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py+8 −1 modified@@ -13,6 +13,7 @@ from mindsdb.integrations.libs.base import BaseMLEngine from mindsdb.utilities.config import Config +from mindsdb.utilities.security import is_private_url def _validate_prompt_template(prompt_template: str): @@ -69,7 +70,13 @@ def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[ if 'source_url_link' not in args['using']: raise Exception("SimpleWebPageReader requires a `source_url_link` parameter. Refer to LlamaIndex documentation for more details.") # noqa - reader = SimpleWebPageReader(html_to_text=True).load_data([args['using']['source_url_link']]) + url = args['using']['source_url_link'] + config = Config() + is_cloud = config.get("cloud", False) + if is_cloud and is_private_url(url): + raise Exception(f'URL is private: {url}') + + reader = SimpleWebPageReader(html_to_text=True).load_data([url]) else: raise Exception(f"Invalid operation mode. Please use one of {self.supported_reader}.")
mindsdb/integrations/handlers/web_handler/web_handler.py+15 −2 modified@@ -10,6 +10,8 @@ HandlerResponse as Response, RESPONSE_TYPE ) +from mindsdb.utilities.security import is_private_url +from mindsdb.utilities.config import Config from .urlcrawl_helpers import get_df_from_query_str, get_all_websites @@ -37,7 +39,9 @@ def select(self, query: ast.Select) -> pd.DataFrame: urls = url else: raise NotImplementedError( - f'url can be url = "someurl", you can also crawl multiple sites, as follows: url IN ("url1", "url2", ..)') + f'url can be url = "someurl", you can also crawl multiple sites, as follows:' + f' url IN ("url1", "url2", ..)' + ) else: pass @@ -52,7 +56,16 @@ def select(self, query: ast.Select) -> pd.DataFrame: if limit < 0: limit = 0 - + + config = Config() + is_cloud = config.get("cloud", False) + if is_cloud: + urls = [ + url + for url in urls + if not is_private_url(url) + ] + result = get_all_websites(urls, limit, html=False) if len(result) > limit: result = result[:limit]
mindsdb/utilities/security.py+33 −0 added@@ -0,0 +1,33 @@ +from urllib.parse import urlparse +import socket +import ipaddress + + +def is_private_url(url: str): + """ + Raises exception if url is private + + :param url: url to check + """ + + hostname = urlparse(url).hostname + if not hostname: + # Unable find hostname in url + return True + ip = socket.gethostbyname(hostname) + return ipaddress.ip_address(ip).is_private + + +def clear_filename(filename: str): + """ + Removes path symbols from filename which could be used for path injection + :param s: + :return: + """ + + if not filename: + return filename + badchars = '\\/:*?\"<>|' + for c in badchars: + filename = filename.replace(c, '') + return filename
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
6- github.com/advisories/GHSA-crhp-7c74-cg4cghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2023-49796ghsaADVISORY
- github.com/mindsdb/mindsdb/blob/1821da719f34c022890c9ff25810218e71c5abbc/mindsdb/api/http/namespaces/file.pyghsaWEB
- github.com/mindsdb/mindsdb/commit/8d13c9c28ebcf3b36509eb679378004d4648d8feghsax_refsource_MISCWEB
- github.com/mindsdb/mindsdb/security/advisories/GHSA-crhp-7c74-cg4cghsax_refsource_CONFIRMWEB
- github.com/pypa/advisory-database/tree/main/vulns/mindsdb/PYSEC-2023-278.yamlghsaWEB
News mentions
0No linked articles in our index yet.