High severityNVD Advisory· Published May 20, 2025· Updated May 20, 2025

Langroid has a Code Injection vulnerability in LanceDocChatAgent through vector_store

CVE-2025-46725

Description

Langroid is a Python framework to build large language model (LLM)-powered applications. Prior to version 0.53.15, LanceDocChatAgent uses pandas eval() through compute_from_docs(). As a result, an attacker may be able to make the agent run malicious commands through QueryPlan.dataframe_calc]) compromising the host system. Langroid 0.53.15 sanitizes input to the affected function by default to tackle the most common attack vectors, and added several warnings about the risky behavior in the project documentation.

Affected packages

Versions sourced from the GitHub Security Advisory.

Package	Affected versions	Patched versions
langroidPyPI	< 0.53.15	0.53.15

Affected products

Langchain/Langroidv5
Range: < 0.53.15

Patches

0d9e4a7bb3ae

fixes to code-injection mitigation PR #841 (#850)

https://github.com/langroid/langroidPrasad ChalasaniMay 17, 2025via ghsa

commit

6 files changed · +371 −12

langroid/agent/special/table_chat_agent.py+19 −5 modified

@@ -7,6 +7,13 @@
 The expression is passed via the `pandas_eval` tool/function-call,
 which is handled by the Agent's `pandas_eval` method. This method evaluates
 the expression and returns the result as a string.
+
+WARNING: This Agent should be used only with trusted input, as it can execute system
+commands. 
+
+The `full_eval` flag is false by default, which means that the input is sanitized
+against most common code injection attack vectors. `full_eval` may be set to True to 
+disable sanitization at all. Both cases should be used with caution.
 """
 
 import io
@@ -26,6 +33,7 @@
 from langroid.parsing.table_loader import read_tabular_data
 from langroid.prompts.prompts_config import PromptsConfig
 from langroid.utils.constants import DONE, PASS
+from langroid.utils.pandas_utils import sanitize_command
 from langroid.vector_store.base import VectorStoreConfig
 
 logger = logging.getLogger(__name__)
@@ -113,6 +121,9 @@ class TableChatAgentConfig(ChatAgentConfig):
     cache: bool = True  # cache results
     debug: bool = False
     stream: bool = True  # allow streaming where needed
+    full_eval: bool = (
+        False  # runs eval without sanitization. Use only on trusted input!
+    )
     data: str | pd.DataFrame  # data file, URL, or DataFrame
     separator: None | str = None  # separator for data file
     vecdb: None | VectorStoreConfig = None
@@ -204,18 +215,21 @@ def pandas_eval(self, msg: PandasEvalTool) -> str:
         """
         self.sent_expression = True
         exprn = msg.expression
-        local_vars = {"df": self.df}
+        vars = {"df": self.df}
         # Create a string-based I/O stream
         code_out = io.StringIO()
 
         # Temporarily redirect standard output to our string-based I/O stream
         sys.stdout = code_out
 
         # Evaluate the last line and get the result;
-        # SECURITY: eval only with empty globals and {"df": df} in locals to
-        # prevent arbitrary Python code execution.
+        # SECURITY MITIGATION: Eval input is sanitized by default to prevent most
+        # common code injection attack vectors.
         try:
-            eval_result = eval(exprn, {}, local_vars)
+            if not self.config.full_eval:
+                exprn = sanitize_command(exprn)
+            code = compile(exprn, "<calc>", "eval")
+            eval_result = eval(code, vars, {})
         except Exception as e:
             eval_result = f"ERROR: {type(e)}: {e}"
 
@@ -226,7 +240,7 @@ def pandas_eval(self, msg: PandasEvalTool) -> str:
         sys.stdout = sys.__stdout__
 
         # If df has been modified in-place, save the changes back to self.df
-        self.df = local_vars["df"]
+        self.df = vars["df"]
 
         # Get the resulting string from the I/O stream
         print_result = code_out.getvalue() or ""

langroid/utils/pandas_utils.py+280 −0 modified

@@ -1,7 +1,287 @@
+import ast
 from typing import Any
 
 import pandas as pd
 
+COMMON_USE_DF_METHODS = {
+    "T",
+    "abs",
+    "add",
+    "add_prefix",
+    "add_suffix",
+    "agg",
+    "aggregate",
+    "align",
+    "all",
+    "any",
+    "apply",
+    "applymap",
+    "at",
+    "at_time",
+    "between_time",
+    "bfill",
+    "clip",
+    "combine",
+    "combine_first",
+    "convert_dtypes",
+    "corr",
+    "corrwith",
+    "count",
+    "cov",
+    "cummax",
+    "cummin",
+    "cumprod",
+    "cumsum",
+    "describe",
+    "diff",
+    "dot",
+    "drop_duplicates",
+    "duplicated",
+    "eq",
+    "eval",
+    "ewm",
+    "expanding",
+    "explode",
+    "filter",
+    "first",
+    "groupby",
+    "head",
+    "idxmax",
+    "idxmin",
+    "infer_objects",
+    "interpolate",
+    "isin",
+    "kurt",
+    "kurtosis",
+    "last",
+    "le",
+    "loc",
+    "lt",
+    "gt",
+    "ge",
+    "iloc",
+    "mask",
+    "max",
+    "mean",
+    "median",
+    "melt",
+    "min",
+    "mode",
+    "mul",
+    "nlargest",
+    "nsmallest",
+    "notna",
+    "notnull",
+    "nunique",
+    "pct_change",
+    "pipe",
+    "pivot",
+    "pivot_table",
+    "prod",
+    "product",
+    "quantile",
+    "query",
+    "rank",
+    "replace",
+    "resample",
+    "rolling",
+    "round",
+    "sample",
+    "select_dtypes",
+    "sem",
+    "shift",
+    "skew",
+    "sort_index",
+    "sort_values",
+    "squeeze",
+    "stack",
+    "std",
+    "sum",
+    "tail",
+    "transform",
+    "transpose",
+    "unstack",
+    "value_counts",
+    "var",
+    "where",
+    "xs",
+}
+
+POTENTIALLY_DANGEROUS_DF_METHODS = {
+    "eval",
+    "query",
+    "apply",
+    "applymap",
+    "pipe",
+    "agg",
+    "aggregate",
+    "transform",
+    "rolling",
+    "expanding",
+    "resample",
+}
+
+WHITELISTED_DF_METHODS = COMMON_USE_DF_METHODS - POTENTIALLY_DANGEROUS_DF_METHODS
+
+
+BLOCKED_KW = {
+    "engine",
+    "parser",
+    "inplace",
+    "regex",
+    "dtype",
+    "converters",
+    "eval",
+}
+MAX_CHAIN = 6
+MAX_DEPTH = 25
+NUMERIC_LIMIT = 1_000_000_000
+
+
+class UnsafeCommandError(ValueError):
+    """Raised when a command string violates security policy."""
+
+    pass
+
+
+def _literal_ok(node: ast.AST) -> bool:
+    """Return True if *node* is a safe literal (and within numeric limit)."""
+    if isinstance(node, ast.Constant):
+        if (
+            isinstance(node.value, (int, float, complex))
+            and abs(node.value) > NUMERIC_LIMIT
+        ):
+            raise UnsafeCommandError("numeric constant exceeds limit")
+        return True
+    if isinstance(node, (ast.Tuple, ast.List)):
+        return all(_literal_ok(elt) for elt in node.elts)
+    if isinstance(node, ast.Slice):
+        return all(
+            sub is None or _literal_ok(sub)
+            for sub in (node.lower, node.upper, node.step)
+        )
+    return False
+
+
+class CommandValidator(ast.NodeVisitor):
+    """AST walker that enforces the security policy."""
+
+    # Comparison operators we allow
+    ALLOWED_CMPOP = (ast.Gt, ast.GtE, ast.Lt, ast.LtE, ast.Eq, ast.NotEq)
+
+    # Arithmetic operators we allow (power ** intentionally omitted)
+    ALLOWED_BINOP = (ast.Add, ast.Sub, ast.Mult, ast.Div, ast.FloorDiv, ast.Mod)
+    ALLOWED_UNARY = (ast.UAdd, ast.USub)
+
+    # Node whitelist
+    ALLOWED_NODES = (
+        ast.Expression,
+        ast.Attribute,
+        ast.Name,
+        ast.Load,
+        ast.Call,
+        ast.Subscript,
+        ast.Constant,
+        ast.Tuple,
+        ast.List,
+        ast.Slice,
+        ast.keyword,
+        ast.BinOp,
+        ast.UnaryOp,
+        ast.Compare,
+        *ALLOWED_BINOP,
+        *ALLOWED_UNARY,
+        *ALLOWED_CMPOP,
+    )
+
+    def __init__(self, df_name: str = "df"):
+        self.df_name = df_name
+        self.depth = 0
+        self.chain = 0
+
+    # Depth guard
+    def generic_visit(self, node: ast.AST) -> None:
+        self.depth += 1
+        if self.depth > MAX_DEPTH:
+            raise UnsafeCommandError("AST nesting too deep")
+        super().generic_visit(node)
+        self.depth -= 1
+
+    # Literal validation
+    def visit_Constant(self, node: ast.Constant) -> None:
+        _literal_ok(node)
+
+    # Arithmetic
+    def visit_BinOp(self, node: ast.BinOp) -> None:
+        if not isinstance(node.op, self.ALLOWED_BINOP):
+            raise UnsafeCommandError("operator not allowed")
+        self.generic_visit(node)
+
+    def visit_UnaryOp(self, node: ast.UnaryOp) -> None:
+        if not isinstance(node.op, self.ALLOWED_UNARY):
+            raise UnsafeCommandError("unary operator not allowed")
+        self.generic_visit(node)
+
+    # Comparisons
+    def visit_Compare(self, node: ast.Compare) -> None:
+        if not all(isinstance(op, self.ALLOWED_CMPOP) for op in node.ops):
+            raise UnsafeCommandError("comparison operator not allowed")
+        for comp in node.comparators:
+            _literal_ok(comp)
+        self.generic_visit(node)
+
+    # Subscripts
+    def visit_Subscript(self, node: ast.Subscript) -> None:
+        if not _literal_ok(node.slice):
+            raise UnsafeCommandError("subscript must be literal")
+        self.generic_visit(node)
+
+    # Method calls
+    def visit_Call(self, node: ast.Call) -> None:
+        if not isinstance(node.func, ast.Attribute):
+            raise UnsafeCommandError("only DataFrame method calls allowed")
+
+        method = node.func.attr
+        self.chain += 1
+        if self.chain > MAX_CHAIN:
+            raise UnsafeCommandError("method-chain too long")
+        if method not in WHITELISTED_DF_METHODS:
+            raise UnsafeCommandError(f"method '{method}' not permitted")
+
+        # kwarg / arg checks
+        for kw in node.keywords:
+            if kw.arg in BLOCKED_KW:
+                raise UnsafeCommandError(f"kwarg '{kw.arg}' is blocked")
+            _literal_ok(kw.value)
+        for arg in node.args:
+            _literal_ok(arg)
+
+        try:
+            self.generic_visit(node)
+        finally:
+            self.chain -= 1
+
+    # Names
+    def visit_Name(self, node: ast.Name) -> None:
+        if node.id != self.df_name:
+            raise UnsafeCommandError(f"unexpected variable '{node.id}'")
+
+    # Top-level gate
+    def visit(self, node: ast.AST) -> None:
+        if not isinstance(node, self.ALLOWED_NODES):
+            raise UnsafeCommandError(f"disallowed node {type(node).__name__}")
+        super().visit(node)
+
+
+def sanitize_command(expr: str, df_name: str = "df") -> str:
+    """
+    Validate *expr*; return it unchanged if it passes all rules,
+    else raise UnsafeCommandError with the first violation encountered.
+    """
+    tree = ast.parse(expr, mode="eval")
+    CommandValidator(df_name).visit(tree)
+    return expr
+
 
 def stringify(x: Any) -> str:
     # Convert x to DataFrame if it is not one already

langroid/vector_store/base.py+7 −6 modified

@@ -14,7 +14,7 @@
 from langroid.utils.configuration import settings
 from langroid.utils.object_registry import ObjectRegistry
 from langroid.utils.output.printing import print_long_text
-from langroid.utils.pandas_utils import stringify
+from langroid.utils.pandas_utils import sanitize_command, stringify
 from langroid.utils.pydantic_utils import flatten_dict
 
 logger = logging.getLogger(__name__)
@@ -159,11 +159,12 @@ def compute_from_docs(self, docs: List[Document], calc: str) -> str:
         df = pd.DataFrame(dicts)
 
         try:
-            # SECURITY: Use Python's eval() with NO globals and only {"df": df}
-            # in locals. This allows pandas operations on `df` while preventing
-            # access to builtins or other potentially harmful global functions,
-            # mitigating risks associated with executing untrusted `calc` strings.
-            result = eval(calc, {}, {"df": df})  # type: ignore
+            # SECURITY MITIGATION: Eval input is sanitized to prevent most common
+            # code injection attack vectors.
+            vars = {"df": df}
+            calc = sanitize_command(calc)
+            code = compile(calc, "<calc>", "eval")
+            result = eval(code, vars, {})
         except Exception as e:
             # return error message so LLM can fix the calc string if needed
             err = f"""

SECURITY.md+7 −1 modified

@@ -1,10 +1,16 @@
 # Security Policy
 
+## ⚠️ Warning
+**Always sanitize user input.**
+
+Langroid executes Python code generated by Large Language Models (LLMs) (e.g., through `TableChatAgent` and `LanceDocChatAgent`). While this provides powerful data analysis capabilities, it can lead to unintended consequences if exposed unsafely. Malicious users may exploit LLM responses to execute harmful code, potentially resulting in sensitive data exposure, denial-of-service, or complete system compromise.
+
+If your LLM application accepts untrusted input, implement input sanitization and sandboxing to mitigate these risks.
+
 ## Supported Versions
 
 Security updates are supported on Langroid version >= 0.18.x
 
-
 ## Reporting a Vulnerability
 
 If you discover a security vulnerability in this repository, **please report it privately**. Security issues should **not** be reported using GitHub Issues or any other public forum.

tests/main/test_pandas_utils.py+56 −0 added

@@ -0,0 +1,56 @@
+import pytest
+
+from langroid.utils.pandas_utils import UnsafeCommandError, sanitize_command
+
+SAFE = [
+    "df.groupby('state')['income'].mean()",
+    "df['a'] + df['b'] * 2",
+    "df.pivot_table(index='year', columns='state', values='sales', aggfunc='sum')",
+    "df.sort_values('income').head(10)",
+    "(df['x'] - df['y']).abs().mean()",
+    "df.sample(n=5)",
+    "df.nsmallest(3, 'income')['income']",
+    "df.where(df['income'] > 50000)['state'].value_counts()",
+    "df.describe()",
+    "df.loc[0:100, 'income'].sum()",
+    "df.head(5)['income'].mean()",
+    "df.select_dtypes(include=['number']).sum().sum()",
+    "df.rank(method='average')['score']",
+    "df.groupby('state', sort=True)['income'].median()",
+    "df.sample(frac=0.1, random_state=42)",
+]
+
+DEEP_EXPR = "df" + "[0]" * 26  # depth bomb (26 > MAX_DEPTH)
+
+BLOCK_WITH_MSG = [
+    ("df.eval('2+2')", r"method 'eval' not permitted"),
+    ("df.sample(n=5, regex=True)", r"kwarg 'regex' is blocked"),
+    ("df['b'] * 12345678901", r"numeric constant exceeds limit"),
+    ("df['a'] ** 8", r"operator not allowed"),
+    (
+        "df.head().tail().sort_values('a').groupby('state').sum().mean().std()",
+        r"method-chain too long",
+    ),
+    ("df.sample(n=10, inplace=True)", r"kwarg 'inplace' is blocked"),
+    ("sales.sum()", r"unexpected variable 'sales'"),
+    ("df2.head()", r"unexpected variable 'df2'"),
+    ("df[other_var]", r"subscript must be literal"),
+    (
+        "df.where(df['income'] > other_var)['income']",
+        r"unexpected variable 'other_var'",
+    ),
+    (DEEP_EXPR, r"AST nesting too deep"),
+]
+
+
+@pytest.mark.parametrize("expr", SAFE)
+def test_safe(expr):
+    """All SAFE expressions must pass without exception."""
+    assert sanitize_command(expr) == expr
+
+
+@pytest.mark.parametrize("expr,msg", BLOCK_WITH_MSG)
+def test_block(expr, msg):
+    """All BLOCK expressions must raise UnsafeCommandError with the right message."""
+    with pytest.raises(UnsafeCommandError, match=msg):
+        sanitize_command(expr)

tests/main/test_table_chat_agent.py+2 −0 modified

@@ -80,6 +80,7 @@ def _test_table_chat_agent(
             data=tabular_data,
             use_tools=not fn_api,
             use_functions_api=fn_api,
+            full_eval=True,  # Allow full evaluation in tests
         )
     )
 
@@ -168,6 +169,7 @@ def test_table_chat_agent_url(test_settings: Settings, fn_api: bool) -> None:
             data=URL,
             use_tools=not fn_api,
             use_functions_api=fn_api,
+            full_eval=True,  # Allow full evaluation in tests
         )
     )

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

github.com/advisories/GHSA-22c2-9gwg-mj59ghsaADVISORY
nvd.nist.gov/vuln/detail/CVE-2025-46725ghsaADVISORY
github.com/langroid/langroid/commit/0d9e4a7bb3ae2eef8d38f2e970ff916599a2b2a6ghsax_refsource_MISCWEB
github.com/langroid/langroid/security/advisories/GHSA-22c2-9gwg-mj59ghsax_refsource_CONFIRMWEB

News mentions

No linked articles in our index yet.

cvss	0.455
epss	0.000
exploit	0.000
kev	0.000
patch	-0.070
ransomware	0.000