changedetection.io: XPath - Arbitrary File Read via unparsed-text()
Description
changedetection.io is a free open source web page change detection tool. Prior to version 0.54.4, the changedetection.io application allows users to specify XPath expressions as content filters via the include_filters field. These XPath expressions are processed using the elementpath library which implements XPath 3.0/3.1 specification. XPath 3.0 includes the unparsed-text() function which can read arbitrary files from the filesystem. The application does not validate or sanitize XPath expressions to block dangerous functions, allowing an attacker to read any file accessible to the application process. This issue has been patched in version 0.54.4.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
changedetection.ioPyPI | < 0.54.4 | 0.54.4 |
Affected products
1- Range: < 0.54.4
Patches
1417d57e57494CVE-2026-29039 - XPath - Arbitrary File Read via unparsed-text()
3 files changed · +124 −6
changedetectionio/forms.py+2 −3 modified@@ -608,13 +608,12 @@ def __call__(self, form, field): raise ValidationError("XPath not permitted in this field!") from lxml import etree, html import elementpath - # xpath 2.0-3.1 - from elementpath.xpath3 import XPath3Parser + from changedetectionio.html_tools import SafeXPath3Parser tree = html.fromstring("<html></html>") line = line.replace('xpath:', '') try: - elementpath.select(tree, line.strip(), parser=XPath3Parser) + elementpath.select(tree, line.strip(), parser=SafeXPath3Parser) except elementpath.ElementPathError as e: message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') raise ValidationError(message % (line, str(e)))
changedetectionio/html_tools.py+51 −3 modified@@ -23,6 +23,53 @@ class JSONNotFound(ValueError): def __init__(self, msg): ValueError.__init__(self, msg) + +_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [ + 'unparsed-text', + 'unparsed-text-lines', + 'unparsed-text-available', + 'doc', + 'doc-available', + 'environment-variable', + 'available-environment-variables', +] + + +def _build_safe_xpath3_parser(): + """Return an XPath3Parser subclass with filesystem/environment access functions removed. + + XPath 3.0 includes functions that can read arbitrary files or environment variables: + - unparsed-text / unparsed-text-lines / unparsed-text-available (file read) + - doc / doc-available (XML fetch from URI) + - environment-variable / available-environment-variables (env var leakage) + + Subclassing gives us an independent symbol_table copy (not shared with the parent class), + so removing entries here does not affect XPath3Parser itself. + + Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable + (comma-separated, e.g. "unparsed-text,doc,environment-variable"). + """ + import os + from elementpath.xpath3 import XPath3Parser + + class SafeXPath3Parser(XPath3Parser): + pass + + env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS') + if env_override is not None: + blocked = [f.strip() for f in env_override.split(',') if f.strip()] + else: + blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS + + for _fn in blocked: + SafeXPath3Parser.symbol_table.pop(_fn, None) + + return SafeXPath3Parser + + +# Module-level singleton — built once, reused everywhere. +SafeXPath3Parser = _build_safe_xpath3_parser() + # Doesn't look like python supports forward slash auto enclosure in re.findall # So convert it to inline flag "(?i)foobar" type configuration @lru_cache(maxsize=100) @@ -183,8 +230,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False """ from lxml import etree, html import elementpath - # xpath 2.0-3.1 - from elementpath.xpath3 import XPath3Parser parser = etree.HTMLParser() tree = None @@ -210,7 +255,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False # This allows //title to match elements in the default namespace namespaces[''] = tree.nsmap[None] - r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser) + r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser) #@note: //title/text() now works with default namespaces (fixed by registering '' prefix) #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) @@ -235,6 +280,9 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False else: html_block += elementpath_tostring(element) + # Drop element references before the finally block so tree.clear() can release + # the libxml2 document immediately (elements pin the C-level doc via refcount). + del r return html_block finally: # Explicitly clear the tree to free memory
changedetectionio/tests/test_xpath_selector.py+71 −0 modified@@ -592,3 +592,74 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path): set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path) for content_type in RSS_XML_CONTENT_TYPES: _subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path) + + +# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends +# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions. +def test_xpath_blocked_functions_unit(): + """Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed).""" + import elementpath + from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser + from lxml import html + + html_content = '<html><body><p>safe content</p></body></html>' + + dangerous_expressions = [ + "unparsed-text('file:///etc/passwd')", + "unparsed-text-lines('file:///etc/passwd')", + "unparsed-text-available('file:///etc/passwd')", + "doc('file:///etc/passwd')", + "doc-available('file:///etc/passwd')", + "environment-variable('PATH')", + "available-environment-variables()", + ] + + for expr in dangerous_expressions: + # xpath_filter() must raise, not silently return file contents + try: + result = xpath_filter(expr, html_content) + assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}" + except elementpath.ElementPathError: + pass # expected + + # SafeXPath3Parser must reject the expression at parse time + tree = html.fromstring(html_content) + try: + elementpath.select(tree, expr, parser=SafeXPath3Parser) + assert False, f"SafeXPath3Parser should have raised for: {expr!r}" + except elementpath.ElementPathError: + pass # expected + + # Sanity check: normal XPath still works + result = xpath_filter('//p/text()', html_content) + assert result == 'safe content' + + +# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions. +def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path): + """Edit-form validation must reject dangerous XPath 3.0 functions before they are stored.""" + from flask import url_for + + set_original_response(datastore_path=datastore_path) + test_url = url_for('test_endpoint', _external=True) + client.application.config.get('DATASTORE').add_watch(url=test_url) + client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) + wait_for_all_checks(client) + + dangerous_expressions = [ + "xpath:unparsed-text('file:///etc/passwd')", + "xpath:environment-variable('PATH')", + "xpath:doc('file:///etc/passwd')", + ] + + for expr in dangerous_expressions: + res = client.post( + url_for("ui.ui_edit.edit_page", uuid="first"), + data={"include_filters": expr, "url": test_url, "tags": "", "headers": "", + 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, + follow_redirects=True + ) + assert b"is not a valid XPath expression" in res.data, \ + f"Form should reject dangerous expression: {expr!r}" + + delete_all_watches(client)
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
5- github.com/advisories/GHSA-6fmw-82m7-jq6pghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-29039ghsaADVISORY
- github.com/dgtlmoon/changedetection.io/commit/417d57e5749441e4be9acc4010369bded805d66fghsax_refsource_MISCWEB
- github.com/dgtlmoon/changedetection.io/releases/tag/0.54.4ghsax_refsource_MISCWEB
- github.com/dgtlmoon/changedetection.io/security/advisories/GHSA-6fmw-82m7-jq6pghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.