VYPR
High severityNVD Advisory· Published Mar 6, 2026· Updated Mar 9, 2026

changedetection.io: XPath - Arbitrary File Read via unparsed-text()

CVE-2026-29039

Description

changedetection.io is a free open source web page change detection tool. Prior to version 0.54.4, the changedetection.io application allows users to specify XPath expressions as content filters via the include_filters field. These XPath expressions are processed using the elementpath library which implements XPath 3.0/3.1 specification. XPath 3.0 includes the unparsed-text() function which can read arbitrary files from the filesystem. The application does not validate or sanitize XPath expressions to block dangerous functions, allowing an attacker to read any file accessible to the application process. This issue has been patched in version 0.54.4.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
changedetection.ioPyPI
< 0.54.40.54.4

Affected products

1

Patches

1
417d57e57494

CVE-2026-29039 - XPath - Arbitrary File Read via unparsed-text()

3 files changed · +124 6
  • changedetectionio/forms.py+2 3 modified
    @@ -608,13 +608,12 @@ def __call__(self, form, field):
                         raise ValidationError("XPath not permitted in this field!")
                     from lxml import etree, html
                     import elementpath
    -                # xpath 2.0-3.1
    -                from elementpath.xpath3 import XPath3Parser
    +                from changedetectionio.html_tools import SafeXPath3Parser
                     tree = html.fromstring("<html></html>")
                     line = line.replace('xpath:', '')
     
                     try:
    -                    elementpath.select(tree, line.strip(), parser=XPath3Parser)
    +                    elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
                     except elementpath.ElementPathError as e:
                         message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                         raise ValidationError(message % (line, str(e)))
    
  • changedetectionio/html_tools.py+51 3 modified
    @@ -23,6 +23,53 @@ class JSONNotFound(ValueError):
         def __init__(self, msg):
             ValueError.__init__(self, msg)
     
    +
    +_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
    +    'unparsed-text',
    +    'unparsed-text-lines',
    +    'unparsed-text-available',
    +    'doc',
    +    'doc-available',
    +    'environment-variable',
    +    'available-environment-variables',
    +]
    +
    +
    +def _build_safe_xpath3_parser():
    +    """Return an XPath3Parser subclass with filesystem/environment access functions removed.
    +
    +    XPath 3.0 includes functions that can read arbitrary files or environment variables:
    +      - unparsed-text / unparsed-text-lines / unparsed-text-available  (file read)
    +      - doc / doc-available                                             (XML fetch from URI)
    +      - environment-variable / available-environment-variables         (env var leakage)
    +
    +    Subclassing gives us an independent symbol_table copy (not shared with the parent class),
    +    so removing entries here does not affect XPath3Parser itself.
    +
    +    Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
    +    (comma-separated, e.g. "unparsed-text,doc,environment-variable").
    +    """
    +    import os
    +    from elementpath.xpath3 import XPath3Parser
    +
    +    class SafeXPath3Parser(XPath3Parser):
    +        pass
    +
    +    env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
    +    if env_override is not None:
    +        blocked = [f.strip() for f in env_override.split(',') if f.strip()]
    +    else:
    +        blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
    +
    +    for _fn in blocked:
    +        SafeXPath3Parser.symbol_table.pop(_fn, None)
    +
    +    return SafeXPath3Parser
    +
    +
    +# Module-level singleton — built once, reused everywhere.
    +SafeXPath3Parser = _build_safe_xpath3_parser()
    +
     # Doesn't look like python supports forward slash auto enclosure in re.findall
     # So convert it to inline flag "(?i)foobar" type configuration
     @lru_cache(maxsize=100)
    @@ -183,8 +230,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
         """
         from lxml import etree, html
         import elementpath
    -    # xpath 2.0-3.1
    -    from elementpath.xpath3 import XPath3Parser
     
         parser = etree.HTMLParser()
         tree = None
    @@ -210,7 +255,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
                 # This allows //title to match elements in the default namespace
                 namespaces[''] = tree.nsmap[None]
     
    -        r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
    +        r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
             #@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
             #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
     
    @@ -235,6 +280,9 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
                 else:
                     html_block += elementpath_tostring(element)
     
    +        # Drop element references before the finally block so tree.clear() can release
    +        # the libxml2 document immediately (elements pin the C-level doc via refcount).
    +        del r
             return html_block
         finally:
             # Explicitly clear the tree to free memory
    
  • changedetectionio/tests/test_xpath_selector.py+71 0 modified
    @@ -592,3 +592,74 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
             set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
             for content_type in RSS_XML_CONTENT_TYPES:
                 _subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
    +
    +
    +# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
    +# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
    +def test_xpath_blocked_functions_unit():
    +    """Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
    +    import elementpath
    +    from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
    +    from lxml import html
    +
    +    html_content = '<html><body><p>safe content</p></body></html>'
    +
    +    dangerous_expressions = [
    +        "unparsed-text('file:///etc/passwd')",
    +        "unparsed-text-lines('file:///etc/passwd')",
    +        "unparsed-text-available('file:///etc/passwd')",
    +        "doc('file:///etc/passwd')",
    +        "doc-available('file:///etc/passwd')",
    +        "environment-variable('PATH')",
    +        "available-environment-variables()",
    +    ]
    +
    +    for expr in dangerous_expressions:
    +        # xpath_filter() must raise, not silently return file contents
    +        try:
    +            result = xpath_filter(expr, html_content)
    +            assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
    +        except elementpath.ElementPathError:
    +            pass  # expected
    +
    +        # SafeXPath3Parser must reject the expression at parse time
    +        tree = html.fromstring(html_content)
    +        try:
    +            elementpath.select(tree, expr, parser=SafeXPath3Parser)
    +            assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
    +        except elementpath.ElementPathError:
    +            pass  # expected
    +
    +    # Sanity check: normal XPath still works
    +    result = xpath_filter('//p/text()', html_content)
    +    assert result == 'safe content'
    +
    +
    +# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
    +def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
    +    """Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
    +    from flask import url_for
    +
    +    set_original_response(datastore_path=datastore_path)
    +    test_url = url_for('test_endpoint', _external=True)
    +    client.application.config.get('DATASTORE').add_watch(url=test_url)
    +    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    +    wait_for_all_checks(client)
    +
    +    dangerous_expressions = [
    +        "xpath:unparsed-text('file:///etc/passwd')",
    +        "xpath:environment-variable('PATH')",
    +        "xpath:doc('file:///etc/passwd')",
    +    ]
    +
    +    for expr in dangerous_expressions:
    +        res = client.post(
    +            url_for("ui.ui_edit.edit_page", uuid="first"),
    +            data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
    +                  'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
    +            follow_redirects=True
    +        )
    +        assert b"is not a valid XPath expression" in res.data, \
    +            f"Form should reject dangerous expression: {expr!r}"
    +
    +    delete_all_watches(client)
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

5

News mentions

0

No linked articles in our index yet.