CVE-2024-41950
Description
Haystack is an end-to-end LLM framework that allows you to build applications powered by LLMs, Transformer models, vector search and more. Haystack clients that let their users create and run Pipelines from scratch are vulnerable to remote code executions. Certain Components in Haystack use Jinja2 templates, if anyone can create and render that template on the client machine they run any code. The vulnerability has been fixed with Haystack 2.3.1.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
haystack-aiPyPI | < 2.3.1 | 2.3.1 |
Patches
36c25a5c73e83Fix issue that could lead to RCE if using unsecure Jinja templates in dynamic prompt builders (#8096)
3 files changed · +12 −8
haystack/components/builders/dynamic_chat_prompt_builder.py+5 −4 modified@@ -5,7 +5,8 @@ import warnings from typing import Any, Dict, List, Optional, Set -from jinja2 import Template, meta +from jinja2 import meta +from jinja2.sandbox import SandboxedEnvironment from haystack import component, logging from haystack.dataclasses.chat_message import ChatMessage, ChatRole @@ -177,8 +178,8 @@ def _validate_template(self, template_text: str, provided_variables: Set[str]): :raises ValueError: If all the required template variables are not provided. """ - template = Template(template_text) - ast = template.environment.parse(template_text) + env = SandboxedEnvironment() + ast = env.parse(template_text) required_template_variables = meta.find_undeclared_variables(ast) filled_template_vars = required_template_variables.intersection(provided_variables) if len(filled_template_vars) != len(required_template_variables): @@ -187,4 +188,4 @@ def _validate_template(self, template_text: str, provided_variables: Set[str]): f"Required variables: {required_template_variables}. Only the following variables were " f"provided: {provided_variables}. Please provide all the required template variables." ) - return template + return env.from_string(template_text)
haystack/components/builders/dynamic_prompt_builder.py+5 −4 modified@@ -5,7 +5,8 @@ import warnings from typing import Any, Dict, List, Optional, Set -from jinja2 import Template, meta +from jinja2 import meta +from jinja2.sandbox import SandboxedEnvironment from haystack import component, logging @@ -156,8 +157,8 @@ def _validate_template(self, template_text: str, provided_variables: Set[str]): :raises ValueError: If all the required template variables are not provided. """ - template = Template(template_text) - ast = template.environment.parse(template_text) + env = SandboxedEnvironment() + ast = env.parse(template_text) required_template_variables = meta.find_undeclared_variables(ast) filled_template_vars = required_template_variables.intersection(provided_variables) if len(filled_template_vars) != len(required_template_variables): @@ -166,4 +167,4 @@ def _validate_template(self, template_text: str, provided_variables: Set[str]): f"Required variables: {required_template_variables}. Only the following variables were " f"provided: {provided_variables}. Please provide all the required template variables." ) - return template + return env.from_string(template_text)
releasenotes/notes/fix-jinja-env-81c98225b22dc827.yaml+2 −0 modified@@ -8,6 +8,8 @@ security: - `PromptBuilder` - `ChatPromptBuilder` + - `DynamicPromptBuilder` + - `DynamicChatPromptBuilder` - `OutputAdapter` - `ConditionalRouter`
3fed1366c448fix: Fix issue that could lead to RCE if using unsecure Jinja templates (#8095)
7 files changed · +83 −80
haystack/components/builders/chat_prompt_builder.py+6 −4 modified@@ -4,7 +4,8 @@ from typing import Any, Dict, List, Optional, Set -from jinja2 import Template, meta +from jinja2 import meta +from jinja2.sandbox import SandboxedEnvironment from haystack import component, default_from_dict, default_to_dict, logging from haystack.dataclasses.chat_message import ChatMessage, ChatRole @@ -123,12 +124,12 @@ def __init__( self.required_variables = required_variables or [] self.template = template variables = variables or [] + self._env = SandboxedEnvironment() if template and not variables: for message in template: if message.is_from(ChatRole.USER) or message.is_from(ChatRole.SYSTEM): # infere variables from template - msg_template = Template(message.content) - ast = msg_template.environment.parse(message.content) + ast = self._env.parse(message.content) template_variables = meta.find_undeclared_variables(ast) variables += list(template_variables) @@ -194,7 +195,8 @@ def run( for message in template: if message.is_from(ChatRole.USER) or message.is_from(ChatRole.SYSTEM): self._validate_variables(set(template_variables_combined.keys())) - compiled_template = Template(message.content) + + compiled_template = self._env.from_string(message.content) rendered_content = compiled_template.render(template_variables_combined) rendered_message = ( ChatMessage.from_user(rendered_content)
haystack/components/builders/prompt_builder.py+8 −5 modified@@ -4,7 +4,8 @@ from typing import Any, Dict, List, Optional, Set -from jinja2 import Template, meta +from jinja2 import meta +from jinja2.sandbox import SandboxedEnvironment from haystack import component, default_to_dict @@ -158,10 +159,12 @@ def __init__( self._variables = variables self._required_variables = required_variables self.required_variables = required_variables or [] - self.template = Template(template) + + self._env = SandboxedEnvironment() + self.template = self._env.from_string(template) if not variables: # infere variables from template - ast = self.template.environment.parse(template) + ast = self._env.parse(template) template_variables = meta.find_undeclared_variables(ast) variables = list(template_variables) @@ -216,8 +219,8 @@ def run(self, template: Optional[str] = None, template_variables: Optional[Dict[ self._validate_variables(set(template_variables_combined.keys())) compiled_template = self.template - if isinstance(template, str): - compiled_template = Template(template) + if template is not None: + compiled_template = self._env.from_string(template) result = compiled_template.render(template_variables_combined) return {"prompt": result}
haystack/components/converters/output_adapter.py+18 −12 modified@@ -2,11 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 +import ast +import contextlib from typing import Any, Callable, Dict, Optional, Set import jinja2.runtime from jinja2 import TemplateSyntaxError, meta -from jinja2.nativetypes import NativeEnvironment +from jinja2.sandbox import SandboxedEnvironment from typing_extensions import TypeAlias from haystack import component, default_from_dict, default_to_dict @@ -58,18 +60,18 @@ def __init__(self, template: str, output_type: TypeAlias, custom_filters: Option # Create a Jinja native environment, we need it to: # a) add custom filters to the environment for filter compilation stage - env = NativeEnvironment() + self._env = SandboxedEnvironment(undefined=jinja2.runtime.StrictUndefined) try: - env.parse(template) # Validate template syntax + self._env.parse(template) # Validate template syntax self.template = template except TemplateSyntaxError as e: raise ValueError(f"Invalid Jinja template '{template}': {e}") from e for name, filter_func in self.custom_filters.items(): - env.filters[name] = filter_func + self._env.filters[name] = filter_func # b) extract variables in the template - route_input_names = self._extract_variables(env) + route_input_names = self._extract_variables(self._env) input_types.update(route_input_names) # the env is not needed, discarded automatically @@ -92,16 +94,22 @@ def run(self, **kwargs): # check if kwargs are empty if not kwargs: raise ValueError("No input data provided for output adaptation") - env = NativeEnvironment() for name, filter_func in self.custom_filters.items(): - env.filters[name] = filter_func + self._env.filters[name] = filter_func adapted_outputs = {} try: - adapted_output_template = env.from_string(self.template) + adapted_output_template = self._env.from_string(self.template) output_result = adapted_output_template.render(**kwargs) if isinstance(output_result, jinja2.runtime.Undefined): raise OutputAdaptationException(f"Undefined variable in the template {self.template}; kwargs: {kwargs}") + # We suppress the exception in case the output is already a string, otherwise + # we try to evaluate it and would fail. + # This must be done cause the output could be different literal structures. + # This doesn't support any user types. + with contextlib.suppress(Exception): + output_result = ast.literal_eval(output_result) + adapted_outputs["output"] = output_result except Exception as e: raise OutputAdaptationException(f"Error adapting {self.template} with {kwargs}: {e}") from e @@ -135,14 +143,12 @@ def from_dict(cls, data: Dict[str, Any]) -> "OutputAdapter": init_params["custom_filters"][name] = deserialize_callable(filter_func) if filter_func else None return default_from_dict(cls, data) - def _extract_variables(self, env: NativeEnvironment) -> Set[str]: + def _extract_variables(self, env: SandboxedEnvironment) -> Set[str]: """ Extracts all variables from a list of Jinja template strings. :param env: A Jinja native environment. :return: A set of variable names extracted from the template strings. """ - variables = set() ast = env.parse(self.template) - variables.update(meta.find_undeclared_variables(ast)) - return variables + return meta.find_undeclared_variables(ast)
haystack/components/routers/conditional_router.py+17 −10 modified@@ -2,10 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 +import ast +import contextlib from typing import Any, Callable, Dict, List, Optional, Set from jinja2 import Environment, TemplateSyntaxError, meta from jinja2.nativetypes import NativeEnvironment +from jinja2.sandbox import SandboxedEnvironment from haystack import component, default_from_dict, default_to_dict, logging from haystack.utils import deserialize_callable, deserialize_type, serialize_callable, serialize_type @@ -125,16 +128,16 @@ def __init__(self, routes: List[Dict], custom_filters: Optional[Dict[str, Callab self.custom_filters = custom_filters or {} # Create a Jinja native environment to inspect variables in the condition templates - env = NativeEnvironment() - env.filters.update(self.custom_filters) + self._env = SandboxedEnvironment() + self._env.filters.update(self.custom_filters) # Inspect the routes to determine input and output types. input_types: Set[str] = set() # let's just store the name, type will always be Any output_types: Dict[str, str] = {} for route in routes: # extract inputs - route_input_names = self._extract_variables(env, [route["output"], route["condition"]]) + route_input_names = self._extract_variables(self._env, [route["output"], route["condition"]]) input_types.update(route_input_names) # extract outputs @@ -194,16 +197,20 @@ def run(self, **kwargs): routes. """ # Create a Jinja native environment to evaluate the condition templates as Python expressions - env = NativeEnvironment() - env.filters.update(self.custom_filters) - for route in self.routes: try: - t = env.from_string(route["condition"]) - if t.render(**kwargs): + t = self._env.from_string(route["condition"]) + rendered = t.render(**kwargs) + if ast.literal_eval(rendered): # We now evaluate the `output` expression to determine the route output - t_output = env.from_string(route["output"]) + t_output = self._env.from_string(route["output"]) output = t_output.render(**kwargs) + # We suppress the exception in case the output is already a string, otherwise + # we try to evaluate it and would fail. + # This must be done cause the output could be different literal structures. + # This doesn't support any user types. + with contextlib.suppress(Exception): + output = ast.literal_eval(output) # and return the output as a dictionary under the output_name key return {route["output_name"]: output} except Exception as e: @@ -234,7 +241,7 @@ def _validate_routes(self, routes: List[Dict]): if not self._validate_template(env, route[field]): raise ValueError(f"Invalid template for field '{field}': {route[field]}") - def _extract_variables(self, env: NativeEnvironment, templates: List[str]) -> Set[str]: + def _extract_variables(self, env: SandboxedEnvironment, templates: List[str]) -> Set[str]: """ Extracts all variables from a list of Jinja template strings.
haystack/core/pipeline/template.py+3 −2 modified@@ -6,7 +6,8 @@ from pathlib import Path from typing import Any, Dict, Optional, Union -from jinja2 import Environment, PackageLoader, TemplateSyntaxError, meta +from jinja2 import PackageLoader, TemplateSyntaxError, meta +from jinja2.sandbox import SandboxedEnvironment TEMPLATE_FILE_EXTENSION = ".yaml.jinja2" TEMPLATE_HOME_DIR = Path(__file__).resolve().parent / "predefined" @@ -74,7 +75,7 @@ def __init__(self, template_content: str): :param template_content: The raw template source to use in the template. """ - env = Environment( + env = SandboxedEnvironment( loader=PackageLoader("haystack.core.pipeline", "predefined"), trim_blocks=True, lstrip_blocks=True ) try:
releasenotes/notes/fix-jinja-env-81c98225b22dc827.yaml+14 −0 added@@ -0,0 +1,14 @@ +--- +upgrade: + - | + `OutputAdapter` and `ConditionalRouter` can't return users inputs anymore. +security: + - | + Fix issue that could lead to remote code execution when using insecure Jinja template in the following Components: + + - `PromptBuilder` + - `ChatPromptBuilder` + - `OutputAdapter` + - `ConditionalRouter` + + The same issue has been fixed in the `PipelineTemplate` class too.
test/components/routers/test_conditional_router.py+17 −47 modified@@ -97,7 +97,23 @@ def test_router_initialized(self, routes): assert set(router.__haystack_input__._sockets_dict.keys()) == {"query", "streams"} assert set(router.__haystack_output__._sockets_dict.keys()) == {"query", "streams"} - def test_router_evaluate_condition_expressions(self, router): + def test_router_evaluate_condition_expressions(self): + router = ConditionalRouter( + [ + { + "condition": "{{streams|length < 2}}", + "output": "{{query}}", + "output_type": str, + "output_name": "query", + }, + { + "condition": "{{streams|length >= 2}}", + "output": "{{streams}}", + "output_type": List[int], + "output_name": "streams", + }, + ] + ) # first route should be selected kwargs = {"streams": [1, 2, 3], "query": "test"} result = router.run(**kwargs) @@ -227,52 +243,6 @@ def test_router_de_serialization(self): # check that the result is the same and correct assert result1 == result2 and result1 == {"streams": [1, 2, 3]} - def test_router_de_serialization_user_type(self): - routes = [ - { - "condition": "{{streams|length < 2}}", - "output": "{{message}}", - "output_type": ChatMessage, - "output_name": "message", - }, - { - "condition": "{{streams|length >= 2}}", - "output": "{{streams}}", - "output_type": List[int], - "output_name": "streams", - }, - ] - router = ConditionalRouter(routes) - router_dict = router.to_dict() - - # assert that the router dict is correct, with all keys and values being strings - for route in router_dict["init_parameters"]["routes"]: - for key in route.keys(): - assert isinstance(key, str) - assert isinstance(route[key], str) - - # check that the output_type is a string and a proper class name - assert ( - router_dict["init_parameters"]["routes"][0]["output_type"] - == "haystack.dataclasses.chat_message.ChatMessage" - ) - - # deserialize the router - new_router = ConditionalRouter.from_dict(router_dict) - - # check that the output_type is the right class - assert new_router.routes[0]["output_type"] == ChatMessage - assert router.routes == new_router.routes - - # now use both routers to run the same message - message = ChatMessage.from_user("ciao") - kwargs = {"streams": [1], "message": message} - result1 = router.run(**kwargs) - result2 = new_router.run(**kwargs) - - # check that the result is the same and correct - assert result1 == result2 and result1["message"].content == message.content - def test_router_serialization_idempotence(self): routes = [ {
efc907cc8228Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
8- github.com/advisories/GHSA-hx9v-6r9f-w677ghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2024-41950ghsaADVISORY
- github.com/deepset-ai/haystack/commit/3fed1366c448b02189851bf08166c1f6477a02b0nvdWEB
- github.com/deepset-ai/haystack/commit/6c25a5c73e83aa32c3241ba84a5cbb3ac0e8a89envdWEB
- github.com/deepset-ai/haystack/pull/8095nvdWEB
- github.com/deepset-ai/haystack/pull/8096nvdWEB
- github.com/deepset-ai/haystack/releases/tag/v2.3.1nvdWEB
- github.com/deepset-ai/haystack/security/advisories/GHSA-hx9v-6r9f-w677nvdWEB
News mentions
0No linked articles in our index yet.