VYPR
Moderate severityNVD Advisory· Published Dec 11, 2023· Updated Aug 2, 2024

MindsDB Arbitrary File Write vulnerability

CVE-2023-49796

Description

MindsDB connects artificial intelligence models to real time data. Versions prior to 23.11.4.1 contain a limited file write vulnerability in file.py Users should use MindsDB's staging branch or v23.11.4.1, which contain a fix for the issue.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
mindsdbPyPI
< 23.11.4.123.11.4.1

Affected products

1

Patches

1
8d13c9c28ebc

fixes:

https://github.com/mindsdb/mindsdbandrewNov 29, 2023via ghsa
4 files changed · +65 6
  • mindsdb/api/http/namespaces/file.py+9 3 modified
    @@ -14,6 +14,7 @@
     from mindsdb.utilities.config import Config
     from mindsdb.utilities.context import context as ctx
     from mindsdb.utilities import log
    +from mindsdb.utilities.security import is_private_url, clear_filename
     
     logger = log.getLogger(__name__)
     
    @@ -51,7 +52,7 @@ def on_field(field):
     
             def on_file(file):
                 nonlocal file_object
    -            data["file"] = file.file_name.decode()
    +            data["file"] = clear_filename(file.file_name.decode())
                 file_object = file.file_object
     
             temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_")
    @@ -91,10 +92,15 @@ def on_file(file):
     
             if data.get("source_type") == "url":
                 url = data["source"]
    -            data["file"] = data["name"]
    +            data["file"] = clear_filename(data["name"])
     
                 config = Config()
                 is_cloud = config.get("cloud", False)
    +            if is_cloud and is_private_url(url):
    +                return http_error(
    +                    400, f'URL is private: {url}'
    +                )
    +
                 if is_cloud is True and ctx.user_class != 1:
                     info = requests.head(url)
                     file_size = info.headers.get("Content-Length")
    @@ -123,7 +129,7 @@ def on_file(file):
                         for chunk in r.iter_content(chunk_size=8192):
                             f.write(chunk)
     
    -        original_file_name = data.get("original_file_name")
    +        original_file_name = clear_filename(data.get("original_file_name"))
     
             file_path = os.path.join(temp_dir_path, data["file"])
             lp = file_path.lower()
    
  • mindsdb/integrations/handlers/llama_index_handler/llama_index_handler.py+8 1 modified
    @@ -13,6 +13,7 @@
     
     from mindsdb.integrations.libs.base import BaseMLEngine
     from mindsdb.utilities.config import Config
    +from mindsdb.utilities.security import is_private_url
     
     
     def _validate_prompt_template(prompt_template: str):
    @@ -69,7 +70,13 @@ def create(self, target: str, df: Optional[pd.DataFrame] = None, args: Optional[
                 if 'source_url_link' not in args['using']:
                     raise Exception("SimpleWebPageReader requires a `source_url_link` parameter. Refer to LlamaIndex documentation for more details.")  # noqa
     
    -            reader = SimpleWebPageReader(html_to_text=True).load_data([args['using']['source_url_link']])
    +            url = args['using']['source_url_link']
    +            config = Config()
    +            is_cloud = config.get("cloud", False)
    +            if is_cloud and is_private_url(url):
    +                raise Exception(f'URL is private: {url}')
    +
    +            reader = SimpleWebPageReader(html_to_text=True).load_data([url])
     
             else:
                 raise Exception(f"Invalid operation mode. Please use one of {self.supported_reader}.")
    
  • mindsdb/integrations/handlers/web_handler/web_handler.py+15 2 modified
    @@ -10,6 +10,8 @@
         HandlerResponse as Response,
         RESPONSE_TYPE
     )
    +from mindsdb.utilities.security import is_private_url
    +from mindsdb.utilities.config import Config
     
     from .urlcrawl_helpers import get_df_from_query_str, get_all_websites
     
    @@ -37,7 +39,9 @@ def select(self, query: ast.Select) -> pd.DataFrame:
                             urls = url
                     else:
                         raise NotImplementedError(
    -                        f'url can be url = "someurl", you can also crawl multiple sites, as follows: url IN ("url1", "url2", ..)')
    +                        f'url can be url = "someurl", you can also crawl multiple sites, as follows:'
    +                        f' url IN ("url1", "url2", ..)'
    +                    )
     
                 else:
                     pass
    @@ -52,7 +56,16 @@ def select(self, query: ast.Select) -> pd.DataFrame:
     
             if limit < 0:
                 limit = 0
    -            
    +
    +        config = Config()
    +        is_cloud = config.get("cloud", False)
    +        if is_cloud:
    +            urls = [
    +                url
    +                for url in urls
    +                if not is_private_url(url)
    +            ]
    +
             result = get_all_websites(urls, limit, html=False)
             if len(result) > limit:
                 result = result[:limit]
    
  • mindsdb/utilities/security.py+33 0 added
    @@ -0,0 +1,33 @@
    +from urllib.parse import urlparse
    +import socket
    +import ipaddress
    +
    +
    +def is_private_url(url: str):
    +    """
    +    Raises exception if url is private
    +
    +    :param url: url to check
    +    """
    +
    +    hostname = urlparse(url).hostname
    +    if not hostname:
    +        # Unable find hostname in url
    +        return True
    +    ip = socket.gethostbyname(hostname)
    +    return ipaddress.ip_address(ip).is_private
    +
    +
    +def clear_filename(filename: str):
    +    """
    +    Removes path symbols from filename which could be used for path injection
    +    :param s:
    +    :return:
    +    """
    +
    +    if not filename:
    +        return filename
    +    badchars = '\\/:*?\"<>|'
    +    for c in badchars:
    +        filename = filename.replace(c, '')
    +    return filename
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.